diff options
Diffstat (limited to 'src/VBox/NetworkServices')
75 files changed, 28071 insertions, 0 deletions
diff --git a/src/VBox/NetworkServices/.scm-settings b/src/VBox/NetworkServices/.scm-settings new file mode 100644 index 00000000..93fd4b73 --- /dev/null +++ b/src/VBox/NetworkServices/.scm-settings @@ -0,0 +1,29 @@ +# $Id: .scm-settings $ +## @file +# Source code massager settings for the network services. +# + +# +# Copyright (C) 2019-2023 Oracle and/or its affiliates. +# +# This file is part of VirtualBox base platform packages, as +# available from https://www.virtualbox.org. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, in version 3 of the +# License. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <https://www.gnu.org/licenses>. +# +# SPDX-License-Identifier: GPL-3.0-only +# + +/*.h: --guard-relative-to-dir . + diff --git a/src/VBox/NetworkServices/Dhcpd/ClientId.cpp b/src/VBox/NetworkServices/Dhcpd/ClientId.cpp new file mode 100644 index 00000000..a0846b7b --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/ClientId.cpp @@ -0,0 +1,126 @@ +/* $Id: ClientId.cpp $ */ +/** @file + * DHCP server - client identifier + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include <algorithm> +#include "ClientId.h" + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** Indiciates wherther ClientId::rtStrFormat was already registered. */ +bool ClientId::g_fFormatRegistered = false; + + +/** + * Registers the ClientId format type callback ("%R[id]"). + */ +void ClientId::registerFormat() RT_NOEXCEPT +{ + if (!g_fFormatRegistered) + { + int rc = RTStrFormatTypeRegister("id", rtStrFormat, NULL); + AssertRC(rc); + g_fFormatRegistered = RT_SUCCESS(rc); + } +} + + +/** + * @callback_method_impl{FNRTSTRFORMATTYPE, Formats ClientId via "%R[id]". } + */ +DECLCALLBACK(size_t) +ClientId::rtStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, void const *pvValue, + int cchWidth, int cchPrecision, unsigned fFlags, + void *pvUser) +{ + RT_NOREF(pszType, cchWidth, cchPrecision, fFlags, pvUser); + Assert(strcmp(pszType, "id") == 0); + + const ClientId *pThis = static_cast<const ClientId *>(pvValue); + if (pThis == NULL) + return pfnOutput(pvArgOutput, RT_STR_TUPLE("<NULL>")); + + size_t cb = 0; + if (pThis->m_id.present()) + { + cb += pfnOutput(pvArgOutput, RT_STR_TUPLE("[")); + + const OptClientId::value_t &idopt = pThis->m_id.value(); + for (size_t i = 0; i < idopt.size(); ++i) + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "%s%02x", (i == 0 ? "" : ":"), idopt[i]); + + cb += pfnOutput(pvArgOutput, RT_STR_TUPLE("] (")); + } + + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "%RTmac", &pThis->m_mac); + + if (pThis->m_id.present()) + cb += pfnOutput(pvArgOutput, RT_STR_TUPLE(")")); + + return cb; +} + + +bool operator==(const ClientId &l, const ClientId &r) RT_NOEXCEPT +{ + if (l.m_id.present()) + { + if (r.m_id.present()) + return l.m_id.value() == r.m_id.value(); + } + else + { + if (!r.m_id.present()) + return l.m_mac == r.m_mac; + } + + return false; +} + + +bool operator<(const ClientId &l, const ClientId &r) RT_NOEXCEPT +{ + if (l.m_id.present()) + { + if (r.m_id.present()) + return l.m_id.value() < r.m_id.value(); + return false; /* the one with id comes last */ + } + else + { + if (r.m_id.present()) + return true; /* the one with id comes last */ + return l.m_mac < r.m_mac; + } +} + diff --git a/src/VBox/NetworkServices/Dhcpd/ClientId.h b/src/VBox/NetworkServices/Dhcpd/ClientId.h new file mode 100644 index 00000000..3d5bcd16 --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/ClientId.h @@ -0,0 +1,93 @@ +/* $Id: ClientId.h $ */ +/** @file + * DHCP server - client identifier + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Dhcpd_ClientId_h +#define VBOX_INCLUDED_SRC_Dhcpd_ClientId_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include "DhcpdInternal.h" +#include <iprt/net.h> +#include "DhcpOptions.h" + +/** + * A client is identified by either the Client ID option it sends or its chaddr, + * i.e. MAC address. + */ +class ClientId +{ + /** The mac address of the client. */ + RTMAC m_mac; + /** The client ID. */ + OptClientId m_id; + +public: + ClientId() + : m_mac(), m_id() + {} + /** @throws std::bad_alloc */ + ClientId(const RTMAC &a_mac, const OptClientId &a_id) + : m_mac(a_mac), m_id(a_id) + {} + /** @throws std::bad_alloc */ + ClientId(const ClientId &a_rThat) + : m_mac(a_rThat.m_mac), m_id(a_rThat.m_id) + {} + /** @throws std::bad_alloc */ + ClientId &operator=(const ClientId &a_rThat) + { + m_mac = a_rThat.m_mac; + m_id = a_rThat.m_id; + return *this; + } + + const RTMAC &mac() const RT_NOEXCEPT { return m_mac; } + const OptClientId &id() const RT_NOEXCEPT { return m_id; } + + /** @name String formatting of %R[id]. + * @{ */ + static void registerFormat() RT_NOEXCEPT; +private: + static DECLCALLBACK(size_t) rtStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, const char *pszType, + void const *pvValue, int cchWidth, int cchPrecision, unsigned fFlags, void *pvUser); + static bool g_fFormatRegistered; + /** @} */ + + friend bool operator==(const ClientId &l, const ClientId &r) RT_NOEXCEPT; + friend bool operator<(const ClientId &l, const ClientId &r) RT_NOEXCEPT; +}; + +bool operator==(const ClientId &l, const ClientId &r) RT_NOEXCEPT; +bool operator<(const ClientId &l, const ClientId &r) RT_NOEXCEPT; + +inline bool operator!=(const ClientId &l, const ClientId &r) RT_NOEXCEPT +{ + return !(l == r); +} + +#endif /* !VBOX_INCLUDED_SRC_Dhcpd_ClientId_h */ diff --git a/src/VBox/NetworkServices/Dhcpd/Config.cpp b/src/VBox/NetworkServices/Dhcpd/Config.cpp new file mode 100644 index 00000000..bbfc9356 --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/Config.cpp @@ -0,0 +1,1395 @@ +/* $Id: Config.cpp $ */ +/** @file + * DHCP server - server configuration + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "DhcpdInternal.h" + +#include <iprt/ctype.h> +#include <iprt/net.h> /* NB: must come before getopt.h */ +#include <iprt/getopt.h> +#include <iprt/path.h> +#include <iprt/message.h> +#include <iprt/string.h> +#include <iprt/uuid.h> +#include <iprt/cpp/path.h> + +#include <VBox/com/utils.h> /* For log initialization. */ + +#include "Config.h" + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/*static*/ bool Config::g_fInitializedLog = false; +/*static*/ uint32_t GroupConfig::s_uGroupNo = 0; + + +/** + * Configuration file exception. + */ +class ConfigFileError + : public RTCError +{ +public: +#if 0 /* This just confuses the compiler. */ + ConfigFileError(const char *a_pszMessage) + : RTCError(a_pszMessage) + {} +#endif + + explicit ConfigFileError(xml::Node const *pNode, const char *a_pszMsgFmt, ...) + : RTCError((char *)NULL) + { + + i_buildPath(pNode); + m_strMsg.append(": "); + + va_list va; + va_start(va, a_pszMsgFmt); + m_strMsg.appendPrintfV(a_pszMsgFmt, va); + va_end(va); + } + + + ConfigFileError(const char *a_pszMsgFmt, ...) + : RTCError((char *)NULL) + { + va_list va; + va_start(va, a_pszMsgFmt); + m_strMsg.printfV(a_pszMsgFmt, va); + va_end(va); + } + + ConfigFileError(const RTCString &a_rstrMessage) + : RTCError(a_rstrMessage) + {} + +private: + void i_buildPath(xml::Node const *pNode) + { + if (pNode) + { + i_buildPath(pNode->getParent()); + m_strMsg.append('/'); + m_strMsg.append(pNode->getName()); + if (pNode->isElement() && pNode->getParent()) + { + xml::ElementNode const *pElm = (xml::ElementNode const *)pNode; + for (xml::Node const *pAttrib = pElm->getFirstAttribute(); pAttrib != NULL; + pAttrib = pAttrib->getNextSibiling()) + if (pAttrib->isAttribute()) + { + m_strMsg.append("[@"); + m_strMsg.append(pAttrib->getName()); + m_strMsg.append('='); + m_strMsg.append(pAttrib->getValue()); + m_strMsg.append(']'); + } + } + } + } + +}; + + +/** + * Private default constructor, external users use factor methods. + */ +Config::Config() + : m_strHome() + , m_strNetwork() + , m_strTrunk() + , m_enmTrunkType(kIntNetTrunkType_Invalid) + , m_MacAddress() + , m_IPv4Address() + , m_IPv4Netmask() + , m_IPv4PoolFirst() + , m_IPv4PoolLast() + , m_GlobalConfig() + , m_GroupConfigs() + , m_HostConfigs() +{ +} + + +/** + * Initializes the object. + * + * @returns IPRT status code. + */ +int Config::i_init() RT_NOEXCEPT +{ + return i_homeInit(); +} + + +/** + * Initializes the m_strHome member with the path to ~/.VirtualBox or equivalent. + * + * @returns IPRT status code. + * @todo Too many init functions? + */ +int Config::i_homeInit() RT_NOEXCEPT +{ + char szHome[RTPATH_MAX]; + int rc = com::GetVBoxUserHomeDirectory(szHome, sizeof(szHome), false); + if (RT_SUCCESS(rc)) + rc = m_strHome.assignNoThrow(szHome); + else + DHCP_LOG_MSG_ERROR(("unable to locate the VirtualBox home directory: %Rrc\n", rc)); + return rc; +} + + +/** + * Internal worker for the public factory methods that creates an instance and + * calls i_init() on it. + * + * @returns Config instance on success, NULL on failure. + */ +/*static*/ Config *Config::i_createInstanceAndCallInit() RT_NOEXCEPT +{ + Config *pConfig; + try + { + pConfig = new Config(); + } + catch (std::bad_alloc &) + { + return NULL; + } + + int rc = pConfig->i_init(); + if (RT_SUCCESS(rc)) + return pConfig; + delete pConfig; + return NULL; +} + + +/** + * Worker for i_complete() that initializes the release log of the process. + * + * Requires network name to be known as the log file name depends on + * it. Alternatively, consider passing the log file name via the + * command line? + * + * @note This is only used when no --log parameter was given. + */ +int Config::i_logInit() RT_NOEXCEPT +{ + if (g_fInitializedLog) + return VINF_SUCCESS; + + if (m_strHome.isEmpty() || m_strNetwork.isEmpty()) + return VERR_PATH_ZERO_LENGTH; + + /* default log file name */ + char szLogFile[RTPATH_MAX]; + ssize_t cch = RTStrPrintf2(szLogFile, sizeof(szLogFile), + "%s%c%s-Dhcpd.log", + m_strHome.c_str(), RTPATH_DELIMITER, m_strNetwork.c_str()); + if (cch > 0) + { + RTPathPurgeFilename(RTPathFilename(szLogFile), RTPATH_STR_F_STYLE_HOST); + return i_logInitWithFilename(szLogFile); + } + return VERR_BUFFER_OVERFLOW; +} + + +/** + * Worker for i_logInit and for handling --log on the command line. + * + * @returns IPRT status code. + * @param pszFilename The log filename. + */ +/*static*/ int Config::i_logInitWithFilename(const char *pszFilename) RT_NOEXCEPT +{ + AssertReturn(!g_fInitializedLog, VERR_WRONG_ORDER); + + int rc = com::VBoxLogRelCreate("DHCP Server", + pszFilename, + RTLOGFLAGS_PREFIX_TIME_PROG, + "all net_dhcpd.e.l.f.l3.l4.l5.l6", + "VBOXDHCP_RELEASE_LOG", + RTLOGDEST_FILE +#ifdef DEBUG + | RTLOGDEST_STDERR +#endif + , + 32768 /* cMaxEntriesPerGroup */, + 5 /* cHistory */, + RT_SEC_1DAY /* uHistoryFileTime */, + _32M /* uHistoryFileSize */, + NULL /* pErrInfo */); + if (RT_SUCCESS(rc)) + g_fInitializedLog = true; + else + RTMsgError("Log initialization failed: %Rrc, log file '%s'", rc, pszFilename); + return rc; + +} + + +/** + * Post process and validate the configuration after it has been loaded. + */ +int Config::i_complete() RT_NOEXCEPT +{ + if (m_strNetwork.isEmpty()) + { + LogRel(("network name is not specified\n")); + return false; + } + + i_logInit(); + + /** @todo the MAC address is always generated, no XML config option for it ... */ + bool fMACGenerated = false; + if ( m_MacAddress.au16[0] == 0 + && m_MacAddress.au16[1] == 0 + && m_MacAddress.au16[2] == 0) + { + RTUUID Uuid; + int rc = RTUuidCreate(&Uuid); + AssertRCReturn(rc, rc); + + m_MacAddress.au8[0] = 0x08; + m_MacAddress.au8[1] = 0x00; + m_MacAddress.au8[2] = 0x27; + m_MacAddress.au8[3] = Uuid.Gen.au8Node[3]; + m_MacAddress.au8[4] = Uuid.Gen.au8Node[4]; + m_MacAddress.au8[5] = Uuid.Gen.au8Node[5]; + + LogRel(("MAC address is not specified: will use generated MAC %RTmac\n", &m_MacAddress)); + fMACGenerated = true; + } + + /* unicast MAC address */ + if (m_MacAddress.au8[0] & 0x01) + { + LogRel(("MAC address is not unicast: %RTmac\n", &m_MacAddress)); + return VERR_GENERAL_FAILURE; + } + + if (!fMACGenerated) + LogRel(("MAC address %RTmac\n", &m_MacAddress)); + + return VINF_SUCCESS; +} + + +/** + * Parses the command line and loads the configuration. + * + * @returns The configuration, NULL if we ran into some fatal problem. + * @param argc The argc from main(). + * @param argv The argv from main(). + */ +Config *Config::create(int argc, char **argv) RT_NOEXCEPT +{ + /* + * Parse the command line. + */ + static const RTGETOPTDEF s_aOptions[] = + { + { "--comment", '#', RTGETOPT_REQ_STRING }, + { "--config", 'c', RTGETOPT_REQ_STRING }, + { "--log", 'l', RTGETOPT_REQ_STRING }, + { "--log-destinations", 'd', RTGETOPT_REQ_STRING }, + { "--log-flags", 'f', RTGETOPT_REQ_STRING }, + { "--log-group-settings", 'g', RTGETOPT_REQ_STRING }, + { "--relaxed", 'r', RTGETOPT_REQ_NOTHING }, + { "--strict", 's', RTGETOPT_REQ_NOTHING }, + }; + + RTGETOPTSTATE State; + int rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, RTGETOPTINIT_FLAGS_NO_STD_OPTS); + AssertRCReturn(rc, NULL); + + const char *pszLogFile = NULL; + const char *pszLogGroupSettings = NULL; + const char *pszLogDestinations = NULL; + const char *pszLogFlags = NULL; + const char *pszConfig = NULL; + const char *pszComment = NULL; + bool fStrict = true; + + for (;;) + { + RTGETOPTUNION ValueUnion; + rc = RTGetOpt(&State, &ValueUnion); + if (rc == 0) /* done */ + break; + + switch (rc) + { + case 'c': /* --config */ + pszConfig = ValueUnion.psz; + break; + + case 'l': + pszLogFile = ValueUnion.psz; + break; + + case 'd': + pszLogDestinations = ValueUnion.psz; + break; + + case 'f': + pszLogFlags = ValueUnion.psz; + break; + + case 'g': + pszLogGroupSettings = ValueUnion.psz; + break; + + case 'r': + fStrict = false; + break; + + case 's': + fStrict = true; + break; + + case '#': /* --comment */ + /* The sole purpose of this option is to allow identification of DHCP + * server instances in the process list. We ignore the required string + * argument of this option. */ + pszComment = ValueUnion.psz; + break; + + default: + RTGetOptPrintError(rc, &ValueUnion); + return NULL; + } + } + + if (!pszConfig) + { + RTMsgError("No configuration file specified (--config file)!\n"); + return NULL; + } + + /* + * Init the log if a log file was specified. + */ + if (pszLogFile) + { + rc = i_logInitWithFilename(pszLogFile); + if (RT_FAILURE(rc)) + RTMsgError("Failed to initialize log file '%s': %Rrc", pszLogFile, rc); + + if (pszLogDestinations) + RTLogDestinations(RTLogRelGetDefaultInstance(), pszLogDestinations); + if (pszLogFlags) + RTLogFlags(RTLogRelGetDefaultInstance(), pszLogFlags); + if (pszLogGroupSettings) + RTLogGroupSettings(RTLogRelGetDefaultInstance(), pszLogGroupSettings); + + LogRel(("--config: %s\n", pszComment)); + if (pszComment) + LogRel(("--comment: %s\n", pszComment)); + } + + /* + * Read the config file. + */ + RTMsgInfo("reading config from '%s'...\n", pszConfig); + std::unique_ptr<Config> ptrConfig; + ptrConfig.reset(Config::i_read(pszConfig, fStrict)); + if (ptrConfig.get() != NULL) + { + rc = ptrConfig->i_complete(); + if (RT_SUCCESS(rc)) + return ptrConfig.release(); + } + return NULL; +} + + +/** + * + * @note The release log is not operational when this method is called. + */ +Config *Config::i_read(const char *pszFileName, bool fStrict) RT_NOEXCEPT +{ + if (pszFileName == NULL || pszFileName[0] == '\0') + { + DHCP_LOG_MSG_ERROR(("Config::i_read: Empty configuration filename\n")); + return NULL; + } + + xml::Document doc; + try + { + xml::XmlFileParser parser; + parser.read(pszFileName, doc); + } + catch (const xml::EIPRTFailure &e) + { + DHCP_LOG_MSG_ERROR(("Config::i_read: %s\n", e.what())); + return NULL; + } + catch (const RTCError &e) + { + DHCP_LOG_MSG_ERROR(("Config::i_read: %s\n", e.what())); + return NULL; + } + catch (...) + { + DHCP_LOG_MSG_ERROR(("Config::i_read: Unknown exception while reading and parsing '%s'\n", pszFileName)); + return NULL; + } + + std::unique_ptr<Config> config(i_createInstanceAndCallInit()); + AssertReturn(config.get() != NULL, NULL); + + try + { + config->i_parseConfig(doc.getRootElement(), fStrict); + } + catch (const RTCError &e) + { + DHCP_LOG_MSG_ERROR(("Config::i_read: %s\n", e.what())); + return NULL; + } + catch (std::bad_alloc &) + { + DHCP_LOG_MSG_ERROR(("Config::i_read: std::bad_alloc\n")); + return NULL; + } + catch (...) + { + DHCP_LOG_MSG_ERROR(("Config::i_read: Unexpected exception\n")); + return NULL; + } + + return config.release(); +} + + +/** + * Helper for retrieving a IPv4 attribute. + * + * @param pElm The element to get the attribute from. + * @param pszAttrName The name of the attribute + * @param pAddr Where to return the address. + * @throws ConfigFileError + */ +static void getIPv4AddrAttribute(const xml::ElementNode *pElm, const char *pszAttrName, PRTNETADDRIPV4 pAddr) +{ + const char *pszAttrValue; + if (pElm->getAttributeValue(pszAttrName, &pszAttrValue)) + { + int rc = RTNetStrToIPv4Addr(pszAttrValue, pAddr); + if (RT_SUCCESS(rc)) + return; + throw ConfigFileError(pElm, "Attribute %s is not a valid IPv4 address: '%s' -> %Rrc", pszAttrName, pszAttrValue, rc); + } + throw ConfigFileError(pElm, "Required %s attribute missing", pszAttrName); +} + + +/** + * Helper for retrieving a MAC address attribute. + * + * @param pElm The element to get the attribute from. + * @param pszAttrName The name of the attribute + * @param pMacAddr Where to return the MAC address. + * @throws ConfigFileError + */ +static void getMacAddressAttribute(const xml::ElementNode *pElm, const char *pszAttrName, PRTMAC pMacAddr) +{ + const char *pszAttrValue; + if (pElm->getAttributeValue(pszAttrName, &pszAttrValue)) + { + int rc = RTNetStrToMacAddr(pszAttrValue, pMacAddr); + if (RT_SUCCESS(rc) && rc != VWRN_TRAILING_CHARS) + return; + throw ConfigFileError(pElm, "attribute %s is not a valid MAC address: '%s' -> %Rrc", pszAttrName, pszAttrValue, rc); + } + throw ConfigFileError(pElm, "Required %s attribute missing", pszAttrName); +} + + +/** + * Internal worker for i_read() that parses the root element and everything + * below it. + * + * @param pElmRoot The root element. + * @param fStrict Set if we're in strict mode, clear if we just + * want to get on with it if we can. + * @throws std::bad_alloc, ConfigFileError + */ +void Config::i_parseConfig(const xml::ElementNode *pElmRoot, bool fStrict) +{ + /* + * Check the root element and call i_parseServer to do real work. + */ + if (pElmRoot == NULL) + throw ConfigFileError("Empty config file"); + + /** @todo XXX: NAMESPACE API IS COMPLETELY BROKEN, SO IGNORE IT FOR NOW */ + + if (!pElmRoot->nameEquals("DHCPServer")) + throw ConfigFileError("Unexpected root element '%s'", pElmRoot->getName()); + + i_parseServer(pElmRoot, fStrict); + +#if 0 /** @todo convert to LogRel2 stuff */ + // XXX: debug + for (optmap_t::const_iterator it = m_GlobalOptions.begin(); it != m_GlobalOptions.end(); ++it) { + std::shared_ptr<DhcpOption> opt(it->second); + + octets_t data; + opt->encode(data); + + bool space = false; + for (octets_t::const_iterator itData = data.begin(); itData != data.end(); ++itData) { + uint8_t c = *itData; + if (space) + std::cout << " "; + else + space = true; + std::cout << (int)c; + } + std::cout << std::endl; + } +#endif +} + + +/** + * Internal worker for parsing the elements under /DHCPServer/. + * + * @param pElmServer The DHCPServer element. + * @param fStrict Set if we're in strict mode, clear if we just + * want to get on with it if we can. + * @throws std::bad_alloc, ConfigFileError + */ +void Config::i_parseServer(const xml::ElementNode *pElmServer, bool fStrict) +{ + /* + * <DHCPServer> attributes + */ + if (!pElmServer->getAttributeValue("networkName", m_strNetwork)) + throw ConfigFileError("DHCPServer/@networkName missing"); + if (m_strNetwork.isEmpty()) + throw ConfigFileError("DHCPServer/@networkName is empty"); + + const char *pszTrunkType; + if (!pElmServer->getAttributeValue("trunkType", &pszTrunkType)) + throw ConfigFileError("DHCPServer/@trunkType missing"); + if (strcmp(pszTrunkType, "none") == 0) + m_enmTrunkType = kIntNetTrunkType_None; + else if (strcmp(pszTrunkType, "whatever") == 0) + m_enmTrunkType = kIntNetTrunkType_WhateverNone; + else if (strcmp(pszTrunkType, "netflt") == 0) + m_enmTrunkType = kIntNetTrunkType_NetFlt; + else if (strcmp(pszTrunkType, "netadp") == 0) + m_enmTrunkType = kIntNetTrunkType_NetAdp; + else + throw ConfigFileError("Invalid DHCPServer/@trunkType value: %s", pszTrunkType); + + if ( m_enmTrunkType == kIntNetTrunkType_NetFlt + || m_enmTrunkType == kIntNetTrunkType_NetAdp) + { + if (!pElmServer->getAttributeValue("trunkName", &m_strTrunk)) + throw ConfigFileError("DHCPServer/@trunkName missing"); + } + else + m_strTrunk = ""; + + m_strLeasesFilename = pElmServer->findAttributeValue("leasesFilename"); /* optional */ + if (m_strLeasesFilename.isEmpty()) + { + int rc = m_strLeasesFilename.assignNoThrow(getHome()); + if (RT_SUCCESS(rc)) + rc = RTPathAppendCxx(m_strLeasesFilename, m_strNetwork); + if (RT_SUCCESS(rc)) + rc = m_strLeasesFilename.appendNoThrow("-Dhcpd.leases"); + if (RT_FAILURE(rc)) + throw ConfigFileError("Unexpected error constructing default m_strLeasesFilename value: %Rrc", rc); + RTPathPurgeFilename(RTPathFilename(m_strLeasesFilename.mutableRaw()), RTPATH_STR_F_STYLE_HOST); + m_strLeasesFilename.jolt(); + } + + /* + * Addresses and mask. + */ + ::getIPv4AddrAttribute(pElmServer, "IPAddress", &m_IPv4Address); + ::getIPv4AddrAttribute(pElmServer, "networkMask", &m_IPv4Netmask); + ::getIPv4AddrAttribute(pElmServer, "lowerIP", &m_IPv4PoolFirst); + ::getIPv4AddrAttribute(pElmServer, "upperIP", &m_IPv4PoolLast); + + /* unicast IP address */ + if ((m_IPv4Address.au8[0] & 0xe0) == 0xe0) + throw ConfigFileError("DHCP server IP address is not unicast: %RTnaipv4", m_IPv4Address.u); + + /* valid netmask */ + int cPrefixBits; + int rc = RTNetMaskToPrefixIPv4(&m_IPv4Netmask, &cPrefixBits); + if (RT_FAILURE(rc) || cPrefixBits == 0) + throw ConfigFileError("IP mask is not valid: %RTnaipv4", m_IPv4Netmask.u); + + /* first IP is from the same network */ + if ((m_IPv4PoolFirst.u & m_IPv4Netmask.u) != (m_IPv4Address.u & m_IPv4Netmask.u)) + throw ConfigFileError("first pool address is outside the network %RTnaipv4/%d: %RTnaipv4", + (m_IPv4Address.u & m_IPv4Netmask.u), cPrefixBits, m_IPv4PoolFirst.u); + + /* last IP is from the same network */ + if ((m_IPv4PoolLast.u & m_IPv4Netmask.u) != (m_IPv4Address.u & m_IPv4Netmask.u)) + throw ConfigFileError("last pool address is outside the network %RTnaipv4/%d: %RTnaipv4\n", + (m_IPv4Address.u & m_IPv4Netmask.u), cPrefixBits, m_IPv4PoolLast.u); + + /* the pool is valid */ + if (RT_N2H_U32(m_IPv4PoolLast.u) < RT_N2H_U32(m_IPv4PoolFirst.u)) + throw ConfigFileError("pool range is invalid: %RTnaipv4 - %RTnaipv4", m_IPv4PoolFirst.u, m_IPv4PoolLast.u); + LogRel(("IP address: %RTnaipv4/%d\n", m_IPv4Address.u, cPrefixBits)); + LogRel(("Address pool: %RTnaipv4 - %RTnaipv4\n", m_IPv4PoolFirst.u, m_IPv4PoolLast.u)); + + /* + * <DHCPServer> children + */ + xml::NodesLoop it(*pElmServer); + const xml::ElementNode *pElmChild; + while ((pElmChild = it.forAllNodes()) != NULL) + { + /* Global options: */ + if (pElmChild->nameEquals("Options")) + m_GlobalConfig.initFromXml(pElmChild, fStrict, this); + /* Group w/ options: */ + else if (pElmChild->nameEquals("Group")) + { + std::unique_ptr<GroupConfig> ptrGroup(new GroupConfig()); + ptrGroup->initFromXml(pElmChild, fStrict, this); + if (m_GroupConfigs.find(ptrGroup->getGroupName()) == m_GroupConfigs.end()) + { + m_GroupConfigs[ptrGroup->getGroupName()] = ptrGroup.get(); + ptrGroup.release(); + } + else if (!fStrict) + LogRelFunc(("Ignoring duplicate group name: %s", ptrGroup->getGroupName().c_str())); + else + throw ConfigFileError("Duplicate group name: %s", ptrGroup->getGroupName().c_str()); + } + /* + * MAC address and per VM NIC configurations: + */ + else if (pElmChild->nameEquals("Config")) + { + std::unique_ptr<HostConfig> ptrHost(new HostConfig()); + ptrHost->initFromXml(pElmChild, fStrict, this); + if (m_HostConfigs.find(ptrHost->getMACAddress()) == m_HostConfigs.end()) + { + m_HostConfigs[ptrHost->getMACAddress()] = ptrHost.get(); + ptrHost.release(); + } + else if (!fStrict) + LogRelFunc(("Ignorining duplicate MAC address (Config): %RTmac", &ptrHost->getMACAddress())); + else + throw ConfigFileError("Duplicate MAC address (Config): %RTmac", &ptrHost->getMACAddress()); + } + else if (!fStrict) + LogRel(("Ignoring unexpected DHCPServer child: %s\n", pElmChild->getName())); + else + throw ConfigFileError("Unexpected DHCPServer child <%s>'", pElmChild->getName()); + } +} + + +/** + * Internal worker for parsing \<Option\> elements found under + * /DHCPServer/Options/, /DHCPServer/Group/ and /DHCPServer/Config/. + * + * @param pElmOption An \<Option\> element. + * @throws std::bad_alloc, ConfigFileError + */ +void ConfigLevelBase::i_parseOption(const xml::ElementNode *pElmOption) +{ + /* The 'name' attribute: */ + const char *pszName; + if (!pElmOption->getAttributeValue("name", &pszName)) + throw ConfigFileError(pElmOption, "missing option name"); + + uint8_t u8Opt; + int rc = RTStrToUInt8Full(pszName, 10, &u8Opt); + if (rc != VINF_SUCCESS) /* no warnings either */ + throw ConfigFileError(pElmOption, "Bad option name '%s': %Rrc", pszName, rc); + + /* The opional 'encoding' attribute: */ + uint32_t u32Enc = 0; /* XXX: DHCPOptionEncoding_Normal */ + const char *pszEncoding; + if (pElmOption->getAttributeValue("encoding", &pszEncoding)) + { + rc = RTStrToUInt32Full(pszEncoding, 10, &u32Enc); + if (rc != VINF_SUCCESS) /* no warnings either */ + throw ConfigFileError(pElmOption, "Bad option encoding '%s': %Rrc", pszEncoding, rc); + + switch (u32Enc) + { + case 0: /* XXX: DHCPOptionEncoding_Normal */ + case 1: /* XXX: DHCPOptionEncoding_Hex */ + break; + default: + throw ConfigFileError(pElmOption, "Unknown encoding '%s'", pszEncoding); + } + } + + /* The 'value' attribute. May be omitted for OptNoValue options like rapid commit. */ + const char *pszValue; + if (!pElmOption->getAttributeValue("value", &pszValue)) + pszValue = ""; + + /** @todo XXX: TODO: encoding, handle hex */ + DhcpOption *opt = DhcpOption::parse(u8Opt, u32Enc, pszValue); + if (opt == NULL) + throw ConfigFileError(pElmOption, "Bad option '%s' (encoding %u): '%s' ", pszName, u32Enc, pszValue ? pszValue : ""); + + /* Add it to the map: */ + m_Options << opt; +} + + +/** + * Internal worker for parsing \<ForcedOption\> and \<SupressedOption\> elements + * found under /DHCPServer/Options/, /DHCPServer/Group/ and /DHCPServer/Config/. + * + * @param pElmOption The element. + * @param fForced Whether it's a ForcedOption (true) or + * SuppressedOption element. + * @throws std::bad_alloc, ConfigFileError + */ +void ConfigLevelBase::i_parseForcedOrSuppressedOption(const xml::ElementNode *pElmOption, bool fForced) +{ + /* Only a name attribute: */ + const char *pszName; + if (!pElmOption->getAttributeValue("name", &pszName)) + throw ConfigFileError(pElmOption, "missing option name"); + + uint8_t u8Opt; + int rc = RTStrToUInt8Full(pszName, 10, &u8Opt); + if (rc != VINF_SUCCESS) /* no warnings either */ + throw ConfigFileError(pElmOption, "Bad option name '%s': %Rrc", pszName, rc); + + if (fForced) + m_vecForcedOptions.push_back(u8Opt); + else + m_vecSuppressedOptions.push_back(u8Opt); +} + + +/** + * Final children parser, handling only \<Option\> and barfing at anything else. + * + * @param pElmChild The child element to handle. + * @param fStrict Set if we're in strict mode, clear if we just + * want to get on with it if we can. That said, + * the caller will catch ConfigFileError exceptions + * and ignore them if @a fStrict is @c false. + * @param pConfig The configuration object. + * @throws std::bad_alloc, ConfigFileError + */ +void ConfigLevelBase::i_parseChild(const xml::ElementNode *pElmChild, bool fStrict, Config const *pConfig) +{ + /* + * Options. + */ + if (pElmChild->nameEquals("Option")) + { + i_parseOption(pElmChild); + return; + } + + /* + * Forced and supressed options. + */ + bool const fForced = pElmChild->nameEquals("ForcedOption"); + if (fForced || pElmChild->nameEquals("SuppressedOption")) + { + i_parseForcedOrSuppressedOption(pElmChild, fForced); + return; + } + + /* + * What's this? + */ + throw ConfigFileError(pElmChild->getParent(), "Unexpected child '%s'", pElmChild->getName()); + RT_NOREF(fStrict, pConfig); +} + + +/** + * Base class initialization taking a /DHCPServer/Options, /DHCPServer/Group or + * /DHCPServer/Config element as input and handling common attributes as well as + * any \<Option\> children. + * + * @param pElmConfig The configuration element to parse. + * @param fStrict Set if we're in strict mode, clear if we just + * want to get on with it if we can. + * @param pConfig The configuration object. + * @throws std::bad_alloc, ConfigFileError + */ +void ConfigLevelBase::initFromXml(const xml::ElementNode *pElmConfig, bool fStrict, Config const *pConfig) +{ + /* + * Common attributes: + */ + if (!pElmConfig->getAttributeValue("secMinLeaseTime", &m_secMinLeaseTime)) + m_secMinLeaseTime = 0; + if (!pElmConfig->getAttributeValue("secDefaultLeaseTime", &m_secDefaultLeaseTime)) + m_secDefaultLeaseTime = 0; + if (!pElmConfig->getAttributeValue("secMaxLeaseTime", &m_secMaxLeaseTime)) + m_secMaxLeaseTime = 0; + + /* Swap min and max if max is smaller: */ + if (m_secMaxLeaseTime < m_secMinLeaseTime && m_secMinLeaseTime && m_secMaxLeaseTime) + { + LogRel(("Swapping min/max lease times: %u <-> %u\n", m_secMinLeaseTime, m_secMaxLeaseTime)); + uint32_t uTmp = m_secMaxLeaseTime; + m_secMaxLeaseTime = m_secMinLeaseTime; + m_secMinLeaseTime = uTmp; + } + + /* + * Parse children. + */ + xml::NodesLoop it(*pElmConfig); + const xml::ElementNode *pElmChild; + while ((pElmChild = it.forAllNodes()) != NULL) + { + try + { + i_parseChild(pElmChild, fStrict, pConfig); + } + catch (ConfigFileError &rXcpt) + { + if (fStrict) + throw rXcpt; + LogRelFunc(("Ignoring: %s\n", rXcpt.what())); + } + } +} + + +/** + * Internal worker for parsing the elements under /DHCPServer/Options/. + * + * @param pElmOptions The \<Options\> element. + * @param fStrict Set if we're in strict mode, clear if we just + * want to get on with it if we can. + * @param pConfig The configuration object. + * @throws std::bad_alloc, ConfigFileError + */ +void GlobalConfig::initFromXml(const xml::ElementNode *pElmOptions, bool fStrict, Config const *pConfig) +{ + ConfigLevelBase::initFromXml(pElmOptions, fStrict, pConfig); + + /* + * Resolve defaults here in the global config so we don't have to do this + * in Db::allocateBinding() for every lease request. + */ + if (m_secMaxLeaseTime == 0 && m_secDefaultLeaseTime == 0 && m_secMinLeaseTime == 0) + { + m_secMinLeaseTime = 300; /* 5 min */ + m_secDefaultLeaseTime = 600; /* 10 min */ + m_secMaxLeaseTime = 12 * RT_SEC_1HOUR; /* 12 hours */ + } + else + { + if (m_secDefaultLeaseTime == 0) + { + if (m_secMaxLeaseTime != 0) + m_secDefaultLeaseTime = RT_MIN(RT_MAX(m_secMinLeaseTime, 600), m_secMaxLeaseTime); + else + { + m_secDefaultLeaseTime = RT_MAX(m_secMinLeaseTime, 600); + m_secMaxLeaseTime = RT_MAX(m_secDefaultLeaseTime, 12 * RT_SEC_1HOUR); + } + } + if (m_secMaxLeaseTime == 0) + m_secMaxLeaseTime = RT_MAX(RT_MAX(m_secMinLeaseTime, m_secDefaultLeaseTime), 12 * RT_SEC_1HOUR); + if (m_secMinLeaseTime == 0) + m_secMinLeaseTime = RT_MIN(300, m_secDefaultLeaseTime); + } + +} + + +/** + * Overrides base class to handle the condition elements under \<Group\>. + * + * @param pElmChild The child element. + * @param fStrict Set if we're in strict mode, clear if we just + * want to get on with it if we can. + * @param pConfig The configuration object. + * @throws std::bad_alloc, ConfigFileError + */ +void GroupConfig::i_parseChild(const xml::ElementNode *pElmChild, bool fStrict, Config const *pConfig) +{ + /* + * Match the condition + */ + std::unique_ptr<GroupCondition> ptrCondition; + if (pElmChild->nameEquals("ConditionMAC")) + ptrCondition.reset(new GroupConditionMAC()); + else if (pElmChild->nameEquals("ConditionMACWildcard")) + ptrCondition.reset(new GroupConditionMACWildcard()); + else if (pElmChild->nameEquals("ConditionVendorClassID")) + ptrCondition.reset(new GroupConditionVendorClassID()); + else if (pElmChild->nameEquals("ConditionVendorClassIDWildcard")) + ptrCondition.reset(new GroupConditionVendorClassIDWildcard()); + else if (pElmChild->nameEquals("ConditionUserClassID")) + ptrCondition.reset(new GroupConditionUserClassID()); + else if (pElmChild->nameEquals("ConditionUserClassIDWildcard")) + ptrCondition.reset(new GroupConditionUserClassIDWildcard()); + else + { + /* + * Not a condition, pass it on to the base class. + */ + ConfigLevelBase::i_parseChild(pElmChild, fStrict, pConfig); + return; + } + + /* + * Get the attributes and initialize the condition. + */ + bool fInclusive; + if (!pElmChild->getAttributeValue("inclusive", fInclusive)) + fInclusive = true; + const char *pszValue = pElmChild->findAttributeValue("value"); + if (pszValue && *pszValue) + { + int rc = ptrCondition->initCondition(pszValue, fInclusive); + if (RT_SUCCESS(rc)) + { + /* + * Add it to the appropriate vector. + */ + if (fInclusive) + m_Inclusive.push_back(ptrCondition.release()); + else + m_Exclusive.push_back(ptrCondition.release()); + } + else + { + ConfigFileError Xcpt(pElmChild, "initCondition failed with %Rrc for '%s' and %RTbool", rc, pszValue, fInclusive); + if (!fStrict) + LogRelFunc(("%s, ignoring condition\n", Xcpt.what())); + else + throw ConfigFileError(Xcpt); + } + } + else + { + ConfigFileError Xcpt(pElmChild, "condition value is empty or missing (inclusive=%RTbool)", fInclusive); + if (fStrict) + throw Xcpt; + LogRelFunc(("%s, ignoring condition\n", Xcpt.what())); + } +} + + +/** + * Internal worker for parsing the elements under /DHCPServer/Group/. + * + * @param pElmGroup The \<Group\> element. + * @param fStrict Set if we're in strict mode, clear if we just + * want to get on with it if we can. + * @param pConfig The configuration object. + * @throws std::bad_alloc, ConfigFileError + */ +void GroupConfig::initFromXml(const xml::ElementNode *pElmGroup, bool fStrict, Config const *pConfig) +{ + /* + * Attributes: + */ + if (!pElmGroup->getAttributeValue("name", m_strName) || m_strName.isEmpty()) + { + if (fStrict) + throw ConfigFileError(pElmGroup, "Group as no name or the name is empty"); + m_strName.printf("Group#%u", s_uGroupNo++); + } + + /* + * Do common initialization (including children). + */ + ConfigLevelBase::initFromXml(pElmGroup, fStrict, pConfig); +} + + +/** + * Internal worker for parsing the elements under /DHCPServer/Config/. + * + * VM Config entries are generated automatically from VirtualBox.xml + * with the MAC fetched from the VM config. The client id is nowhere + * in the picture there, so VM config is indexed with plain RTMAC, not + * ClientId (also see getOptions below). + * + * @param pElmConfig The \<Config\> element. + * @param fStrict Set if we're in strict mode, clear if we just + * want to get on with it if we can. + * @param pConfig The configuration object (for netmask). + * @throws std::bad_alloc, ConfigFileError + */ +void HostConfig::initFromXml(const xml::ElementNode *pElmConfig, bool fStrict, Config const *pConfig) +{ + /* + * Attributes: + */ + /* The MAC address: */ + ::getMacAddressAttribute(pElmConfig, "MACAddress", &m_MACAddress); + + /* Name - optional: */ + if (!pElmConfig->getAttributeValue("name", m_strName)) + m_strName.printf("MAC:%RTmac", &m_MACAddress); + + /* Fixed IP address assignment - optional: */ + const char *pszFixedAddress = pElmConfig->findAttributeValue("fixedAddress"); + if (!pszFixedAddress || *RTStrStripL(pszFixedAddress) == '\0') + m_fHaveFixedAddress = false; + else + { + ::getIPv4AddrAttribute(pElmConfig, "fixedAddress", &m_FixedAddress); + if (pConfig->isInIPv4Network(m_FixedAddress)) + m_fHaveFixedAddress = true; + else + { + ConfigFileError Xcpt(pElmConfig, "fixedAddress '%s' is not the DHCP network", pszFixedAddress); + if (fStrict) + throw Xcpt; + LogRelFunc(("%s - ignoring the fixed address assignment\n", Xcpt.what())); + m_fHaveFixedAddress = false; + } + } + + /* + * Do common initialization. + */ + ConfigLevelBase::initFromXml(pElmConfig, fStrict, pConfig); +} + + +/** + * Assembles a list of hosts with fixed address assignments. + * + * @returns IPRT status code. + * @param a_rRetConfigs Where to return the configurations. + */ +int Config::getFixedAddressConfigs(HostConfigVec &a_rRetConfigs) const +{ + for (HostConfigMap::const_iterator it = m_HostConfigs.begin(); it != m_HostConfigs.end(); ++it) + { + HostConfig const *pHostConfig = it->second; + if (pHostConfig->haveFixedAddress()) + try + { + a_rRetConfigs.push_back(pHostConfig); + } + catch (std::bad_alloc &) + { + return VERR_NO_MEMORY; + } + } + return VINF_SUCCESS; +} + + +/** + * Assembles a priorities vector of configurations for the client. + * + * @returns a_rRetConfigs for convenience. + * @param a_rRetConfigs Where to return the configurations. + * @param a_ridClient The client ID. + * @param a_ridVendorClass The vendor class ID if present. + * @param a_ridUserClass The user class ID if present + */ +Config::ConfigVec &Config::getConfigsForClient(Config::ConfigVec &a_rRetConfigs, const ClientId &a_ridClient, + const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const +{ + /* Host specific config first: */ + HostConfigMap::const_iterator itHost = m_HostConfigs.find(a_ridClient.mac()); + if (itHost != m_HostConfigs.end()) + a_rRetConfigs.push_back(itHost->second); + + /* Groups: */ + for (GroupConfigMap::const_iterator itGrp = m_GroupConfigs.begin(); itGrp != m_GroupConfigs.end(); ++itGrp) + if (itGrp->second->match(a_ridClient, a_ridVendorClass, a_ridUserClass)) + a_rRetConfigs.push_back(itGrp->second); + + /* Global: */ + a_rRetConfigs.push_back(&m_GlobalConfig); + + return a_rRetConfigs; +} + + +/** + * Method used by DHCPD to assemble a list of options for the client. + * + * @returns a_rRetOpts for convenience + * @param a_rRetOpts Where to put the requested options. + * @param a_rReqOpts The requested options. + * @param a_rConfigs Relevant configurations returned by + * Config::getConfigsForClient(). + * + * @throws std::bad_alloc + */ +optmap_t &Config::getOptionsForClient(optmap_t &a_rRetOpts, const OptParameterRequest &a_rReqOpts, ConfigVec &a_rConfigs) const +{ + /* + * The client typcially requests a list of options. The list is subject to + * forced and supressed lists on each configuration level in a_rConfig. To + * efficiently manage it without resorting to maps, the current code + * assembles a C-style array of options on the stack that should be returned + * to the client. + */ + uint8_t abOptions[256]; + size_t cOptions = 0; + size_t iFirstForced = 255; +#define IS_OPTION_PRESENT(a_bOption) (memchr(abOptions, (a_bOption), cOptions) != NULL) +#define APPEND_NOT_PRESENT_OPTION(a_bOption) do { \ + AssertLogRelMsgBreak(cOptions < sizeof(abOptions), \ + ("a_bOption=%#x abOptions=%.*Rhxs\n", (a_bOption), sizeof(abOptions), &abOptions[0])); \ + abOptions[cOptions++] = (a_bOption); \ + } while (0) + + const OptParameterRequest::value_t &reqValue = a_rReqOpts.value(); + if (reqValue.size() != 0) + { + /* Copy the requested list and append any forced options from the configs: */ + for (octets_t::const_iterator itOptReq = reqValue.begin(); itOptReq != reqValue.end(); ++itOptReq) + if (!IS_OPTION_PRESENT(*itOptReq)) + APPEND_NOT_PRESENT_OPTION(*itOptReq); + iFirstForced = cOptions; + for (Config::ConfigVec::const_iterator itCfg = a_rConfigs.begin(); itCfg != a_rConfigs.end(); ++itCfg) + { + octets_t const &rForced = (*itCfg)->getForcedOptions(); + for (octets_t::const_iterator itOpt = rForced.begin(); itOpt != rForced.end(); ++itOpt) + if (!IS_OPTION_PRESENT(*itOpt)) + { + LogRel3((">>> Forcing option %d (%s)\n", *itOpt, DhcpOption::name(*itOpt))); + APPEND_NOT_PRESENT_OPTION(*itOpt); + } + } + } + else + { + /* No options requests, feed the client all available options: */ + for (Config::ConfigVec::const_iterator itCfg = a_rConfigs.begin(); itCfg != a_rConfigs.end(); ++itCfg) + { + optmap_t const &rOptions = (*itCfg)->getOptions(); + for (optmap_t::const_iterator itOpt = rOptions.begin(); itOpt != rOptions.end(); ++itOpt) + if (!IS_OPTION_PRESENT(itOpt->first)) + APPEND_NOT_PRESENT_OPTION(itOpt->first); + + } + } + + /* + * Always supply the subnet: + */ + a_rRetOpts << new OptSubnetMask(m_IPv4Netmask); + + /* + * Try provide the options we've decided to return. + */ + for (size_t iOpt = 0; iOpt < cOptions; iOpt++) + { + uint8_t const bOptReq = abOptions[iOpt]; + if (iOpt < iFirstForced) + LogRel2((">>> requested option %d (%s)\n", bOptReq, DhcpOption::name(bOptReq))); + else + LogRel2((">>> forced option %d (%s)\n", bOptReq, DhcpOption::name(bOptReq))); + + if (bOptReq != OptSubnetMask::optcode) + { + bool fFound = false; + for (size_t i = 0; i < a_rConfigs.size(); i++) + { + if (!a_rConfigs[i]->isOptionSuppressed(bOptReq)) + { + optmap_t::const_iterator itFound; + if (a_rConfigs[i]->findOption(bOptReq, itFound)) /* crap interface */ + { + LogRel2(("... found in %s (type %s)\n", a_rConfigs[i]->getName(), a_rConfigs[i]->getType())); + a_rRetOpts << itFound->second; + fFound = true; + break; + } + } + else + { + LogRel2(("... suppressed by %s (type %s)\n", a_rConfigs[i]->getName(), a_rConfigs[i]->getType())); + fFound = true; + break; + } + } + if (!fFound) + LogRel3(("... not found\n")); + } + else + LogRel2(("... always supplied\n")); + } + +#undef IS_OPTION_PRESENT +#undef APPEND_NOT_PRESENT_OPTION + return a_rRetOpts; +} + + + +/********************************************************************************************************************************* +* Group Condition Matching * +*********************************************************************************************************************************/ + +bool GroupConfig::match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const +{ + /* + * Check the inclusive ones first, only one need to match. + */ + for (GroupConditionVec::const_iterator itIncl = m_Inclusive.begin(); itIncl != m_Inclusive.end(); ++itIncl) + if ((*itIncl)->match(a_ridClient, a_ridVendorClass, a_ridUserClass)) + { + /* + * Now make sure it isn't excluded by any of the exclusion condition. + */ + for (GroupConditionVec::const_iterator itExcl = m_Exclusive.begin(); itExcl != m_Exclusive.end(); ++itExcl) + if ((*itIncl)->match(a_ridClient, a_ridVendorClass, a_ridUserClass)) + return false; + return true; + } + + return false; +} + + +int GroupCondition::initCondition(const char *a_pszValue, bool a_fInclusive) +{ + m_fInclusive = a_fInclusive; + return m_strValue.assignNoThrow(a_pszValue); +} + + +bool GroupCondition::matchClassId(bool a_fPresent, const std::vector<uint8_t> &a_rBytes, bool fWildcard) const RT_NOEXCEPT +{ + if (a_fPresent) + { + size_t const cbBytes = a_rBytes.size(); + if (cbBytes > 0) + { + if (a_rBytes[cbBytes - 1] == '\0') + { + uint8_t const *pb = &a_rBytes.front(); + if (!fWildcard) + return m_strValue.equals((const char *)pb); + return RTStrSimplePatternMatch(m_strValue.c_str(), (const char *)pb); + } + + if (cbBytes <= 255) + { + char szTmp[256]; + memcpy(szTmp, &a_rBytes.front(), cbBytes); + szTmp[cbBytes] = '\0'; + if (!fWildcard) + return m_strValue.equals(szTmp); + return RTStrSimplePatternMatch(m_strValue.c_str(), szTmp); + } + } + } + return false; + +} + + +int GroupConditionMAC::initCondition(const char *a_pszValue, bool a_fInclusive) +{ + int vrc = RTNetStrToMacAddr(a_pszValue, &m_MACAddress); + if (RT_SUCCESS(vrc)) + return GroupCondition::initCondition(a_pszValue, a_fInclusive); + return vrc; +} + + +bool GroupConditionMAC::match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const RT_NOEXCEPT +{ + RT_NOREF(a_ridVendorClass, a_ridUserClass); + return a_ridClient.mac() == m_MACAddress; +} + + +bool GroupConditionMACWildcard::match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const RT_NOEXCEPT +{ + RT_NOREF(a_ridVendorClass, a_ridUserClass); + char szTmp[32]; + RTStrPrintf(szTmp, sizeof(szTmp), "%RTmac", &a_ridClient.mac()); + return RTStrSimplePatternMatch(m_strValue.c_str(), szTmp); +} + + +bool GroupConditionVendorClassID::match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const RT_NOEXCEPT +{ + RT_NOREF(a_ridClient, a_ridUserClass); + return matchClassId(a_ridVendorClass.present(), a_ridVendorClass.value()); +} + + +bool GroupConditionVendorClassIDWildcard::match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const RT_NOEXCEPT +{ + RT_NOREF(a_ridClient, a_ridUserClass); + return matchClassId(a_ridVendorClass.present(), a_ridVendorClass.value(), true /*fWildcard*/); +} + + +bool GroupConditionUserClassID::match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const RT_NOEXCEPT +{ + RT_NOREF(a_ridClient, a_ridVendorClass); + return matchClassId(a_ridVendorClass.present(), a_ridUserClass.value()); +} + + +bool GroupConditionUserClassIDWildcard::match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const RT_NOEXCEPT +{ + RT_NOREF(a_ridClient, a_ridVendorClass); + return matchClassId(a_ridVendorClass.present(), a_ridUserClass.value(), true /*fWildcard*/); +} + diff --git a/src/VBox/NetworkServices/Dhcpd/Config.h b/src/VBox/NetworkServices/Dhcpd/Config.h new file mode 100644 index 00000000..133dc21c --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/Config.h @@ -0,0 +1,401 @@ +/* $Id: Config.h $ */ +/** @file + * DHCP server - server configuration + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Dhcpd_Config_h +#define VBOX_INCLUDED_SRC_Dhcpd_Config_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include "DhcpdInternal.h" +#include <iprt/types.h> +#include <iprt/net.h> +#include <iprt/cpp/xml.h> +#include <iprt/cpp/ministring.h> + +#include <VBox/intnet.h> + +#include "DhcpOptions.h" +#include "ClientId.h" + + +class Config; + +/** + * Base configuration + * + * @author bird (2019-07-15) + */ +class ConfigLevelBase +{ +private: + /** DHCP options. */ + optmap_t m_Options; +protected: + /** Minimum lease time, zero means try next level up. */ + uint32_t m_secMinLeaseTime; + /** Default lease time, zero means try next level up. */ + uint32_t m_secDefaultLeaseTime; + /** Maximum lease time, zero means try next level up. */ + uint32_t m_secMaxLeaseTime; + + /** Options forced unsolicited upon the client. */ + octets_t m_vecForcedOptions; + /** Options (typcially from higher up) that should be hidden from the client. */ + octets_t m_vecSuppressedOptions; + +public: + ConfigLevelBase() + : m_Options() + , m_secMinLeaseTime(0) + , m_secDefaultLeaseTime(0) + , m_secMaxLeaseTime(0) + , m_vecForcedOptions() + , m_vecSuppressedOptions() + { } + + virtual ~ConfigLevelBase() + { } + + virtual void initFromXml(xml::ElementNode const *pElmConfig, bool fStrict, Config const *pConfig); + virtual const char *getType() const RT_NOEXCEPT = 0; + virtual const char *getName() const RT_NOEXCEPT = 0; + + /** + * Tries to find DHCP option @a bOpt, returning an success indicator and + * iterator to the result. + */ + bool findOption(uint8_t bOpt, optmap_t::const_iterator &a_rItRet) const RT_NOEXCEPT + { + a_rItRet = m_Options.find(bOpt); + return a_rItRet != m_Options.end(); + } + + /** Checks if @a bOpt is suppressed or not. */ + bool isOptionSuppressed(uint8_t bOpt) const RT_NOEXCEPT + { + return m_vecSuppressedOptions.size() > 0 + && memchr(&m_vecSuppressedOptions.front(), bOpt, m_vecSuppressedOptions.size()) != NULL; + } + + /** @name Accessors + * @{ */ + uint32_t getMinLeaseTime() const RT_NOEXCEPT { return m_secMinLeaseTime; } + uint32_t getDefaultLeaseTime() const RT_NOEXCEPT { return m_secDefaultLeaseTime; } + uint32_t getMaxLeaseTime() const RT_NOEXCEPT { return m_secMaxLeaseTime; } + octets_t const &getForcedOptions() const RT_NOEXCEPT { return m_vecForcedOptions; } + octets_t const &getSuppressedOptions() const RT_NOEXCEPT { return m_vecSuppressedOptions; } + optmap_t const &getOptions() const RT_NOEXCEPT { return m_Options; } + /** @} */ + +protected: + void i_parseOption(const xml::ElementNode *pElmOption); + void i_parseForcedOrSuppressedOption(const xml::ElementNode *pElmOption, bool fForced); + virtual void i_parseChild(const xml::ElementNode *pElmChild, bool fStrict, Config const *pConfig); +}; + + +/** + * Global config + */ +class GlobalConfig : public ConfigLevelBase +{ +public: + GlobalConfig() + : ConfigLevelBase() + { } + void initFromXml(xml::ElementNode const *pElmOptions, bool fStrict, Config const *pConfig) RT_OVERRIDE; + const char *getType() const RT_NOEXCEPT RT_OVERRIDE { return "global"; } + const char *getName() const RT_NOEXCEPT RT_OVERRIDE { return "GlobalConfig"; } +}; + + +/** + * Group membership condition. + */ +class GroupCondition +{ +protected: + /** The value. */ + RTCString m_strValue; + /** Inclusive (true) or exclusive (false), latter takes precedency. */ + bool m_fInclusive; + +public: + virtual ~GroupCondition() + {} + + virtual int initCondition(const char *a_pszValue, bool a_fInclusive); + virtual bool match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const RT_NOEXCEPT = 0; + + /** @name accessors + * @{ */ + RTCString const &getValue() const RT_NOEXCEPT { return m_strValue; } + bool getInclusive() const RT_NOEXCEPT { return m_fInclusive; } + /** @} */ + +protected: + bool matchClassId(bool a_fPresent, std::vector<uint8_t> const &a_rBytes, bool fWildcard = false) const RT_NOEXCEPT; +}; + +/** MAC condition. */ +class GroupConditionMAC : public GroupCondition +{ +private: + RTMAC m_MACAddress; +public: + int initCondition(const char *a_pszValue, bool a_fInclusive) RT_OVERRIDE; + bool match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const RT_NOEXCEPT RT_OVERRIDE; +}; + +/** MAC wildcard condition. */ +class GroupConditionMACWildcard : public GroupCondition +{ +public: + bool match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const RT_NOEXCEPT RT_OVERRIDE; +}; + +/** Vendor class ID condition. */ +class GroupConditionVendorClassID : public GroupCondition +{ +public: + bool match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const RT_NOEXCEPT RT_OVERRIDE; +}; + +/** Vendor class ID wildcard condition. */ +class GroupConditionVendorClassIDWildcard : public GroupCondition +{ +public: + bool match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const RT_NOEXCEPT RT_OVERRIDE; +}; + +/** User class ID condition. */ +class GroupConditionUserClassID : public GroupCondition +{ +public: + bool match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const RT_NOEXCEPT RT_OVERRIDE; +}; + +/** User class ID wildcard condition. */ +class GroupConditionUserClassIDWildcard : public GroupCondition +{ +public: + bool match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const RT_NOEXCEPT RT_OVERRIDE; +}; + + +/** + * Group config + */ +class GroupConfig : public ConfigLevelBase +{ +private: + typedef std::vector<GroupCondition *> GroupConditionVec; + + /** The group name. */ + RTCString m_strName; + /** Vector containing the inclusive membership conditions (must match one). */ + GroupConditionVec m_Inclusive; + /** Vector containing the exclusive membership conditions (must match none). */ + GroupConditionVec m_Exclusive; + +public: + GroupConfig() + : ConfigLevelBase() + { + } + + void initFromXml(xml::ElementNode const *pElmGroup, bool fStrict, Config const *pConfig) RT_OVERRIDE; + bool match(const ClientId &a_ridClient, const OptVendorClassId &a_ridVendorClass, const OptUserClassId &a_ridUserClass) const; + + /** @name Accessors + * @{ */ + const char *getType() const RT_NOEXCEPT RT_OVERRIDE { return "group"; } + const char *getName() const RT_NOEXCEPT RT_OVERRIDE { return m_strName.c_str(); } + RTCString const &getGroupName() const RT_NOEXCEPT { return m_strName; } + /** @} */ + +protected: + void i_parseChild(const xml::ElementNode *pElmChild, bool fStrict, Config const *pConfig) RT_OVERRIDE; + /** Used to name unnamed groups. */ + static uint32_t s_uGroupNo; +}; + + +/** + * Host (MAC address) specific configuration. + */ +class HostConfig : public ConfigLevelBase +{ +protected: + /** The MAC address. */ + RTMAC m_MACAddress; + /** Name annotating the entry. */ + RTCString m_strName; + /** Fixed address assignment when m_fHaveFixedAddress is true. */ + RTNETADDRIPV4 m_FixedAddress; + /** Set if we have a fixed address asignment. */ + bool m_fHaveFixedAddress; + +public: + HostConfig() + : ConfigLevelBase() + , m_fHaveFixedAddress(false) + { + RT_ZERO(m_MACAddress); + RT_ZERO(m_FixedAddress); + } + + void initFromXml(xml::ElementNode const *pElmConfig, bool fStrict, Config const *pConfig) RT_OVERRIDE; + const char *getType() const RT_NOEXCEPT RT_OVERRIDE { return "host"; } + const char *getName() const RT_NOEXCEPT RT_OVERRIDE { return m_strName.c_str(); } + + /** @name Accessors + * @{ */ + RTMAC const &getMACAddress() const RT_NOEXCEPT { return m_MACAddress; } + bool haveFixedAddress() const RT_NOEXCEPT { return m_fHaveFixedAddress; } + RTNETADDRIPV4 const & getFixedAddress() const RT_NOEXCEPT { return m_FixedAddress; } + /** @} */ +}; + + +/** + * DHCP server configuration. + */ +class Config +{ + /** Group configuration map. */ + typedef std::map<RTCString, GroupConfig const * > GroupConfigMap; + /** Host configuration map. */ + typedef std::map<RTMAC, HostConfig const * > HostConfigMap; + + + RTCString m_strHome; /**< path of ~/.VirtualBox or equivalent, */ + + RTCString m_strNetwork; /**< The name of the internal network the DHCP server is connected to. */ + RTCString m_strLeasesFilename;/**< The lease DB filename. */ + + RTCString m_strTrunk; /**< The trunk name of the internal network. */ + INTNETTRUNKTYPE m_enmTrunkType; /**< The trunk type of the internal network. */ + + RTMAC m_MacAddress; /**< The MAC address for the DHCP server. */ + + RTNETADDRIPV4 m_IPv4Address; /**< The IPv4 address of the DHCP server. */ + RTNETADDRIPV4 m_IPv4Netmask; /**< The IPv4 netmask for the DHCP server. */ + + RTNETADDRIPV4 m_IPv4PoolFirst; /**< The first IPv4 address in the pool. */ + RTNETADDRIPV4 m_IPv4PoolLast; /**< The last IPV4 address in the pool (inclusive like all other 'last' variables). */ + + + /** The global configuration. */ + GlobalConfig m_GlobalConfig; + /** The group configurations, indexed by group name. */ + GroupConfigMap m_GroupConfigs; + /** The host configurations, indexed by MAC address. */ + HostConfigMap m_HostConfigs; + + /** Set if we've initialized the log already (via command line). */ + static bool g_fInitializedLog; + +private: + Config(); + + int i_init() RT_NOEXCEPT; + int i_homeInit() RT_NOEXCEPT; + static Config *i_createInstanceAndCallInit() RT_NOEXCEPT; + int i_logInit() RT_NOEXCEPT; + static int i_logInitWithFilename(const char *pszFilename) RT_NOEXCEPT; + int i_complete() RT_NOEXCEPT; + +public: + /** @name Factory methods + * @{ */ + static Config *hardcoded() RT_NOEXCEPT; /**< For testing. */ + static Config *create(int argc, char **argv) RT_NOEXCEPT; /**< --config */ + static Config *compat(int argc, char **argv); + /** @} */ + + /** @name Accessors + * @{ */ + const RTCString &getHome() const RT_NOEXCEPT { return m_strHome; } + + const RTCString &getNetwork() const RT_NOEXCEPT { return m_strNetwork; } + const RTCString &getLeasesFilename() const RT_NOEXCEPT { return m_strLeasesFilename; } + + const RTCString &getTrunk() const RT_NOEXCEPT { return m_strTrunk; } + INTNETTRUNKTYPE getTrunkType() const RT_NOEXCEPT { return m_enmTrunkType; } + + const RTMAC &getMacAddress() const RT_NOEXCEPT { return m_MacAddress; } + + RTNETADDRIPV4 getIPv4Address() const RT_NOEXCEPT { return m_IPv4Address; } + RTNETADDRIPV4 getIPv4Netmask() const RT_NOEXCEPT { return m_IPv4Netmask; } + RTNETADDRIPV4 getIPv4PoolFirst() const RT_NOEXCEPT { return m_IPv4PoolFirst; } + RTNETADDRIPV4 getIPv4PoolLast() const RT_NOEXCEPT { return m_IPv4PoolLast; } + /** @} */ + + /** Gets the network (IP masked by network mask). */ + RTNETADDRIPV4 getIPv4Network() const RT_NOEXCEPT + { + RTNETADDRIPV4 Network; + Network.u = m_IPv4Netmask.u & m_IPv4Address.u; + return Network; + } + /** Checks if the given IPv4 address is in the DHCP server network. */ + bool isInIPv4Network(RTNETADDRIPV4 a_rAddress) const RT_NOEXCEPT + { + return (a_rAddress.u & getIPv4Netmask().u) == getIPv4Network().u; + } + + /** Host configuration vector. */ + typedef std::vector<HostConfig const *> HostConfigVec; + int getFixedAddressConfigs(HostConfigVec &a_rRetConfigs) const; + + /** Configuration vector. */ + typedef std::vector<ConfigLevelBase const *> ConfigVec; + ConfigVec &getConfigsForClient(ConfigVec &a_rRetConfigs, const ClientId &a_ridClient, + const OptVendorClassId &a_ridVendorClass, + const OptUserClassId &a_ridUserClass) const; + optmap_t &getOptionsForClient(optmap_t &a_rRetOpts, const OptParameterRequest &a_rReqOpts, + ConfigVec &a_rConfigs) const; + +private: + /** @name Configuration file reading and parsing + * @{ */ + static Config *i_read(const char *pszFilename, bool fStrict) RT_NOEXCEPT; + void i_parseConfig(const xml::ElementNode *pElmRoot, bool fStrict); + void i_parseServer(const xml::ElementNode *pElmServer, bool fStrict); + /** @} */ +}; + +#endif /* !VBOX_INCLUDED_SRC_Dhcpd_Config_h */ diff --git a/src/VBox/NetworkServices/Dhcpd/DHCPD.cpp b/src/VBox/NetworkServices/Dhcpd/DHCPD.cpp new file mode 100644 index 00000000..d0b3f819 --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/DHCPD.cpp @@ -0,0 +1,420 @@ +/* $Id: DHCPD.cpp $ */ +/** @file + * DHCP server - protocol logic + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "DhcpdInternal.h" +#include "DHCPD.h" +#include "DhcpOptions.h" + +#include <iprt/message.h> + + +DHCPD::DHCPD() + : m_pConfig(NULL), m_db() +{ +} + + +/** + * Initializes the DHCPD with the given config. + * + * @returns VBox status code. + * @param pConfig The configuration to use. + */ +int DHCPD::init(const Config *pConfig) RT_NOEXCEPT +{ + Assert(pConfig); + AssertReturn(!m_pConfig, VERR_INVALID_STATE); + m_pConfig = pConfig; + + /* Load the lease database, ignoring most issues except being out of memory: */ + int rc = m_db.init(pConfig); + if (RT_SUCCESS(rc)) + { + rc = i_loadLeases(); + if (rc != VERR_NO_MEMORY) + return VINF_SUCCESS; + + DHCP_LOG_MSG_ERROR(("Ran out of memory loading leases from '%s'. Try rename or delete the file.\n", + pConfig->getLeasesFilename().c_str())); + } + return rc; +} + + +/** + * Load leases from pConfig->getLeasesFilename(). + */ +int DHCPD::i_loadLeases() RT_NOEXCEPT +{ + return m_db.loadLeases(m_pConfig->getLeasesFilename()); +} + + +/** + * Save the current leases to pConfig->getLeasesFilename(), doing expiry first. + * + * This is called after m_db is updated during a client request, so the on disk + * database is always up-to-date. This means it doesn't matter if we're + * terminated with extreme prejudice, and it allows Main to look up IP addresses + * for VMs. + * + * @throws nothing + */ +void DHCPD::i_saveLeases() RT_NOEXCEPT +{ + m_db.expire(); + m_db.writeLeases(m_pConfig->getLeasesFilename()); +} + + +/** + * Process a DHCP client message. + * + * Called by VBoxNetDhcpd::dhcp4Recv(). + * + * @returns Pointer to DHCP reply (caller deletes this). NULL if no reply + * warranted or we're out of memory. + * @param req The client message. + * @throws nothing + */ +DhcpServerMessage *DHCPD::process(DhcpClientMessage &req) RT_NOEXCEPT +{ + /* + * Dump the package if release log level 3+1 are enable or if debug logging is + * enabled. We don't normally want to do this at the default log level, of course. + */ + if ((LogRelIs3Enabled() && LogRelIsEnabled()) || LogIsEnabled()) + req.dump(); + + /* + * Fend off requests that are not for us. + */ + OptServerId sid(req); + if (sid.present() && sid.value().u != m_pConfig->getIPv4Address().u) + { + if (req.broadcasted() && req.messageType() == RTNET_DHCP_MT_REQUEST) + { + LogRel2(("Message is not for us, canceling any pending offer.\n")); + m_db.cancelOffer(req); + } + else + LogRel2(("Message is not for us.\n")); + return NULL; + } + + /* + * Process it. + */ + DhcpServerMessage *reply = NULL; + + switch (req.messageType()) + { + /* + * Requests that require server's reply. + */ + case RTNET_DHCP_MT_DISCOVER: + try + { + reply = i_doDiscover(req); + } + catch (std::bad_alloc &) + { + LogRelFunc(("i_doDiscover threw bad_alloc\n")); + } + break; + + case RTNET_DHCP_MT_REQUEST: + try + { + reply = i_doRequest(req); + } + catch (std::bad_alloc &) + { + LogRelFunc(("i_doRequest threw bad_alloc\n")); + } + break; + + case RTNET_DHCP_MT_INFORM: + try + { + reply = i_doInform(req); + } + catch (std::bad_alloc &) + { + LogRelFunc(("i_doInform threw bad_alloc\n")); + } + break; + + /* + * Requests that don't have a reply. + */ + case RTNET_DHCP_MT_DECLINE: + i_doDecline(req); + break; + + case RTNET_DHCP_MT_RELEASE: + i_doRelease(req); + break; + + /* + * Unexpected or unknown message types. + */ + case RTNET_DHCP_MT_OFFER: + LogRel2(("Ignoring unexpected message of type RTNET_DHCP_MT_OFFER!\n")); + break; + case RTNET_DHCP_MT_ACK: + LogRel2(("Ignoring unexpected message of type RTNET_DHCP_MT_ACK!\n")); + break; + case RTNET_DHCP_MT_NAC: + LogRel2(("Ignoring unexpected message of type RTNET_DHCP_MT_NAC!\n")); + break; + default: + LogRel2(("Ignoring unexpected message of unknown type: %d (%#x)!\n", req.messageType(), req.messageType())); + break; + } + + return reply; +} + + +/** + * Internal helper. + * + * @throws std::bad_alloc + */ +DhcpServerMessage *DHCPD::i_createMessage(int type, const DhcpClientMessage &req) +{ + return new DhcpServerMessage(req, type, m_pConfig->getIPv4Address()); +} + + +/** + * 4.3.1 DHCPDISCOVER message + * + * When a server receives a DHCPDISCOVER message from a client, the server + * chooses a network address for the requesting client. If no address is + * available, the server may choose to report the problem to the system + * administrator. If an address is available, the new address SHOULD be chosen + * as follows: + * - The client's current address as recorded in the client's current binding, + * ELSE + * - The client's previous address as recorded in the client's (now expired or + * released) binding, if that address is in the server's pool of available + * addresses and not already allocated, ELSE + * - The address requested in the 'Requested IP Address' option, if that + * address is valid and not already allocated, ELSE + * - A new address allocated from the server's pool of available addresses; + * the address is selected based on the subnet from which the message was + * received (if 'giaddr' is 0) or on the address of the relay agent that + * forwarded the message ('giaddr' when not 0). + * + * ... + * + * @throws std::bad_alloc + */ +DhcpServerMessage *DHCPD::i_doDiscover(const DhcpClientMessage &req) +{ + /** @todo + * XXX: TODO: Windows iSCSI initiator sends DHCPDISCOVER first and + * it has ciaddr filled. Shouldn't let it screw up the normal + * lease we already have for that client, but we should probably + * reply with a pro-forma offer. + */ + if (req.ciaddr().u != 0) + return NULL; + + Config::ConfigVec vecConfigs; + m_pConfig->getConfigsForClient(vecConfigs, req.clientId(), OptVendorClassId(req), OptUserClassId(req)); + + Binding *b = m_db.allocateBinding(req, vecConfigs); + if (b == NULL) + return NULL; + + std::unique_ptr<DhcpServerMessage> reply; + + bool fRapidCommit = OptRapidCommit(req).present(); + if (!fRapidCommit) + { + reply.reset(i_createMessage(RTNET_DHCP_MT_OFFER, req)); + + if (b->state() < Binding::OFFERED) + b->setState(Binding::OFFERED); + + /** @todo use small lease time internally to quickly free unclaimed offers? */ + } + else + { + reply.reset(i_createMessage(RTNET_DHCP_MT_ACK, req)); + reply->addOption(OptRapidCommit(true)); + + b->setState(Binding::ACKED); + if (!b->isFixed()) + i_saveLeases(); + } + + reply->setYiaddr(b->addr()); + reply->addOption(OptLeaseTime(b->leaseTime())); + + OptParameterRequest optlist(req); + optmap_t replyOptions; + reply->addOptions(m_pConfig->getOptionsForClient(replyOptions, optlist, vecConfigs)); + + // reply->maybeUnicast(req); /** @todo XXX: we reject ciaddr != 0 above */ + return reply.release(); +} + + +/** + * 4.3.2 DHCPREQUEST message + * + * A DHCPREQUEST message may come from a client responding to a DHCPOFFER + * message from a server, from a client verifying a previously allocated IP + * address or from a client extending the lease on a network address. If the + * DHCPREQUEST message contains a 'server identifier' option, the message is in + * response to a DHCPOFFER message. Otherwise, the message is a request to + * verify or extend an existing lease. If the client uses a 'client identifier' + * in a DHCPREQUEST message, it MUST use that same 'client identifier' in all + * subsequent messages. If the client included a list of requested parameters in + * a DHCPDISCOVER message, it MUST include that list in all subsequent messages. + * + * ... + * + * @throws std::bad_alloc + */ +DhcpServerMessage *DHCPD::i_doRequest(const DhcpClientMessage &req) +{ + OptRequestedAddress reqAddr(req); + if (req.ciaddr().u != 0 && reqAddr.present() && reqAddr.value().u != req.ciaddr().u) + { + std::unique_ptr<DhcpServerMessage> nak(i_createMessage(RTNET_DHCP_MT_NAC, req)); + nak->addOption(OptMessage("Requested address does not match ciaddr")); + return nak.release(); + } + + Config::ConfigVec vecConfigs; + m_pConfig->getConfigsForClient(vecConfigs, req.clientId(), OptVendorClassId(req), OptUserClassId(req)); + + Binding *b = m_db.allocateBinding(req, vecConfigs); + if (b == NULL) + { + return i_createMessage(RTNET_DHCP_MT_NAC, req); + } + + std::unique_ptr<DhcpServerMessage> ack(i_createMessage(RTNET_DHCP_MT_ACK, req)); + + b->setState(Binding::ACKED); + if (!b->isFixed()) + i_saveLeases(); + + ack->setYiaddr(b->addr()); + ack->addOption(OptLeaseTime(b->leaseTime())); + + OptParameterRequest optlist(req); + optmap_t replyOptions; + ack->addOptions(m_pConfig->getOptionsForClient(replyOptions, optlist, vecConfigs)); + + ack->maybeUnicast(req); + return ack.release(); +} + + +/** + * 4.3.5 DHCPINFORM message + * + * The server responds to a DHCPINFORM message by sending a DHCPACK message + * directly to the address given in the 'ciaddr' field of the DHCPINFORM + * message. The server MUST NOT send a lease expiration time to the client and + * SHOULD NOT fill in 'yiaddr'. The server includes other parameters in the + * DHCPACK message as defined in section 4.3.1. + * + * @throws std::bad_alloc + */ +DhcpServerMessage *DHCPD::i_doInform(const DhcpClientMessage &req) +{ + if (req.ciaddr().u == 0) + return NULL; + + OptParameterRequest optlist(req); + if (!optlist.present()) + return NULL; + + Config::ConfigVec vecConfigs; + optmap_t info; + m_pConfig->getOptionsForClient(info, optlist, m_pConfig->getConfigsForClient(vecConfigs, req.clientId(), + OptVendorClassId(req), OptUserClassId(req))); + if (info.empty()) + return NULL; + + std::unique_ptr<DhcpServerMessage> ack(i_createMessage(RTNET_DHCP_MT_ACK, req)); + ack->addOptions(info); + ack->maybeUnicast(req); + return ack.release(); +} + + +/** + * 4.3.3 DHCPDECLINE message + * + * If the server receives a DHCPDECLINE message, the client has discovered + * through some other means that the suggested network address is already in + * use. The server MUST mark the network address as not available and SHOULD + * notify the local system administrator of a possible configuration problem. + * + * @throws nothing + */ +DhcpServerMessage *DHCPD::i_doDecline(const DhcpClientMessage &req) RT_NOEXCEPT +{ + RT_NOREF(req); + return NULL; +} + + +/** + * 4.3.4 DHCPRELEASE message + * + * Upon receipt of a DHCPRELEASE message, the server marks the network address + * as not allocated. The server SHOULD retain a record of the client's + * initialization parameters for possible reuse in response to subsequent + * requests from the client. + * + * @throws nothing + */ +DhcpServerMessage *DHCPD::i_doRelease(const DhcpClientMessage &req) RT_NOEXCEPT +{ + if (req.ciaddr().u != 0) + { + bool fReleased = m_db.releaseBinding(req); + if (fReleased) + i_saveLeases(); + } + + return NULL; +} diff --git a/src/VBox/NetworkServices/Dhcpd/DHCPD.h b/src/VBox/NetworkServices/Dhcpd/DHCPD.h new file mode 100644 index 00000000..5b938868 --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/DHCPD.h @@ -0,0 +1,88 @@ +/* $Id: DHCPD.h $ */ +/** @file + * DHCP server - protocol logic + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Dhcpd_DHCPD_h +#define VBOX_INCLUDED_SRC_Dhcpd_DHCPD_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include "DhcpdInternal.h" +#include <iprt/cpp/ministring.h> +#include "Config.h" +#include "DhcpMessage.h" +#include "Db.h" + + +/** + * The core of the DHCP server. + * + * This class is feed DhcpClientMessages that VBoxNetDhcpd has picked up from + * the network. After processing a message it returns the appropriate response + * (if any) which VBoxNetDhcpd sends out. + */ +class DHCPD +{ + /** The DHCP configuration. */ + const Config *m_pConfig; + /** The lease database. */ + Db m_db; + +public: + DHCPD(); + + int init(const Config *) RT_NOEXCEPT; + + DhcpServerMessage *process(const std::unique_ptr<DhcpClientMessage> &req) RT_NOEXCEPT + { + if (req.get() != NULL) + return process(*req.get()); + return NULL; + } + + DhcpServerMessage *process(DhcpClientMessage &req) RT_NOEXCEPT; + +private: + /** @name DHCP message processing methods + * @{ */ + DhcpServerMessage *i_doDiscover(const DhcpClientMessage &req); + DhcpServerMessage *i_doRequest(const DhcpClientMessage &req); + DhcpServerMessage *i_doInform(const DhcpClientMessage &req); + DhcpServerMessage *i_doDecline(const DhcpClientMessage &req) RT_NOEXCEPT; + DhcpServerMessage *i_doRelease(const DhcpClientMessage &req) RT_NOEXCEPT; + + DhcpServerMessage *i_createMessage(int type, const DhcpClientMessage &req); + /** @} */ + + /** @name Lease database handling + * @{ */ + int i_loadLeases() RT_NOEXCEPT; + void i_saveLeases() RT_NOEXCEPT; + /** @} */ +}; + +#endif /* !VBOX_INCLUDED_SRC_Dhcpd_DHCPD_h */ diff --git a/src/VBox/NetworkServices/Dhcpd/Db.cpp b/src/VBox/NetworkServices/Dhcpd/Db.cpp new file mode 100644 index 00000000..f4e9012a --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/Db.cpp @@ -0,0 +1,1060 @@ +/* $Id: Db.cpp $ */ +/** @file + * DHCP server - address database + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "DhcpdInternal.h" +#include <iprt/errcore.h> + +#include "Db.h" + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** Indicates whether has been called successfully yet. */ +bool Binding::g_fFormatRegistered = false; + + +/** + * Registers the ClientId format type callback ("%R[binding]"). + */ +void Binding::registerFormat() RT_NOEXCEPT +{ + if (!g_fFormatRegistered) + { + int rc = RTStrFormatTypeRegister("binding", rtStrFormat, NULL); + AssertRC(rc); + g_fFormatRegistered = true; + } +} + + +/** + * @callback_method_impl{FNRTSTRFORMATTYPE, Formats ClientId via "%R[binding]".} + */ +DECLCALLBACK(size_t) +Binding::rtStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, void const *pvValue, + int cchWidth, int cchPrecision, unsigned fFlags, + void *pvUser) +{ + + AssertReturn(strcmp(pszType, "binding") == 0, 0); + RT_NOREF(pszType); + + RT_NOREF(cchWidth, cchPrecision, fFlags); + RT_NOREF(pvUser); + + const Binding *b = static_cast<const Binding *>(pvValue); + if (b == NULL) + return pfnOutput(pvArgOutput, RT_STR_TUPLE("<NULL>")); + + size_t cb = RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "%RTnaipv4", b->m_addr.u); + if (b->m_state == Binding::FREE) + cb += pfnOutput(pvArgOutput, RT_STR_TUPLE(" free")); + else if (b->m_fFixed) + cb += pfnOutput(pvArgOutput, RT_STR_TUPLE(" fixed")); + else + { + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, " to %R[id], %s, valid from ", &b->m_id, b->stateName()); + + Timestamp tsIssued = b->issued(); + cb += tsIssued.strFormatHelper(pfnOutput, pvArgOutput); + + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, " for %ds until ", b->leaseTime()); + + Timestamp tsValid = b->issued(); + tsValid.addSeconds(b->leaseTime()); + cb += tsValid.strFormatHelper(pfnOutput, pvArgOutput); + } + + return cb; +} + + +/** + * Used to update the client ID of a fixed address assignment. + * + * We only have the MAC address when prepraring the binding, so the full client + * ID must be supplied when the client requests it. + * + * @param a_ridClient The client ID. + * @throws std::bad_alloc + */ +void Binding::idUpdate(const ClientId &a_ridClient) +{ + AssertReturnVoid(isFixed()); + m_id = a_ridClient; +} + + +/** + * Get the state as a string for the XML lease database. + */ +const char *Binding::stateName() const RT_NOEXCEPT +{ + switch (m_state) + { + case FREE: + return "free"; + case RELEASED: + return "released"; + case EXPIRED: + return "expired"; + case OFFERED: + return "offered"; + case ACKED: + return "acked"; + default: + AssertMsgFailed(("%d\n", m_state)); + return "released"; + } +} + + +/** + * Sets the state by name (reverse of Binding::stateName()). + */ +Binding &Binding::setState(const char *pszStateName) RT_NOEXCEPT +{ + if (strcmp(pszStateName, "free") == 0) + m_state = Binding::FREE; + else if (strcmp(pszStateName, "released") == 0) + m_state = Binding::RELEASED; + else if (strcmp(pszStateName, "expired") == 0) + m_state = Binding::EXPIRED; + else if (strcmp(pszStateName, "offered") == 0) + m_state = Binding::OFFERED; + else if (strcmp(pszStateName, "acked") == 0) + m_state = Binding::ACKED; + else + { + AssertMsgFailed(("%d\n", m_state)); + m_state = Binding::RELEASED; + } + + return *this; +} + + +/** + * Expires the binding if it's past the specified deadline. + * + * @returns False if already expired, released or freed, otherwise true (i.e. + * does not indicate whether action was taken or not). + * @param tsDeadline The expiry deadline to use. + */ +bool Binding::expire(Timestamp tsDeadline) RT_NOEXCEPT +{ + if (m_state <= Binding::EXPIRED || m_fFixed) + return false; + + Timestamp tsExpire = m_issued; + tsExpire.addSeconds(m_secLease); + + if (tsExpire < tsDeadline) + { + if (m_state == Binding::OFFERED) + setState(Binding::FREE); + else + setState(Binding::EXPIRED); + } + return true; +} + + +/** + * Serializes the binding to XML for the lease database. + * + * @throw std::bad_alloc + * @note DHCPServerImpl.cpp contains a reader, keep it in sync. + */ +void Binding::toXML(xml::ElementNode *pElmParent) const +{ + /* + * Lease + */ + xml::ElementNode *pElmLease = pElmParent->createChild("Lease"); + + pElmLease->setAttribute("mac", RTCStringFmt("%RTmac", &m_id.mac())); + if (m_id.id().present()) + { + /* I'd prefer RTSTRPRINTHEXBYTES_F_SEP_COLON but there's no decoder */ + size_t cbStrId = m_id.id().value().size() * 2 + 1; + char *pszId = new char[cbStrId]; + int rc = RTStrPrintHexBytes(pszId, cbStrId, + &m_id.id().value().front(), m_id.id().value().size(), + 0); + AssertRC(rc); + pElmLease->setAttribute("id", pszId); + delete[] pszId; + } + + /* unused but we need it to keep the old code happy */ + pElmLease->setAttribute("network", "0.0.0.0"); + pElmLease->setAttribute("state", stateName()); + + /* + * Lease/Address + */ + xml::ElementNode *pElmAddr = pElmLease->createChild("Address"); + pElmAddr->setAttribute("value", RTCStringFmt("%RTnaipv4", m_addr.u)); + + /* + * Lease/Time + */ + xml::ElementNode *pElmTime = pElmLease->createChild("Time"); + pElmTime->setAttribute("issued", m_issued.getAbsSeconds()); + pElmTime->setAttribute("expiration", m_secLease); +} + + +/** + * Deserializes the binding from the XML lease database. + * + * @param pElmLease The "Lease" element to serialize into. + * @return Pointer to the resulting binding, NULL on failure. + * @throw std::bad_alloc + * @note DHCPServerImpl.cpp contains a similar reader, keep it in sync. + */ +Binding *Binding::fromXML(const xml::ElementNode *pElmLease) +{ + /* Note! Lease/@network seems to always have bogus value, ignore it. */ + /* Note! We parse the mandatory attributes and elements first, then + the optional ones. This means things appear a little jumbled. */ + + /* + * Lease/@mac - mandatory. + */ + const char *pszMacAddress = pElmLease->findAttributeValue("mac"); + if (!pszMacAddress) + DHCP_LOG_RET_NULL(("Binding::fromXML: <Lease> element without 'mac' attribute! Skipping lease.\n")); + + RTMAC mac; + int rc = RTNetStrToMacAddr(pszMacAddress, &mac); + if (RT_FAILURE(rc)) + DHCP_LOG_RET_NULL(("Binding::fromXML: Malformed mac address attribute value '%s': %Rrc - Skipping lease.\n", + pszMacAddress, rc)); + + /* + * Lease/Address/@value - mandatory. + */ + const char *pszAddress = pElmLease->findChildElementAttributeValue("Address", "value"); + if (!pszAddress) + DHCP_LOG_RET_NULL(("Binding::fromXML: Could not find <Address> with a 'value' attribute! Skipping lease.\n")); + + RTNETADDRIPV4 addr; + rc = RTNetStrToIPv4Addr(pszAddress, &addr); + if (RT_FAILURE(rc)) + DHCP_LOG_RET_NULL(("Binding::fromXML: Malformed IPv4 address value '%s': %Rrc - Skipping lease.\n", pszAddress, rc)); + + /* + * Lease/Time - mandatory. + */ + const xml::ElementNode *pElmTime = pElmLease->findChildElement("Time"); + if (pElmTime == NULL) + DHCP_LOG_RET_NULL(("Binding::fromXML: No <Time> element under <Lease mac=%RTmac>! Skipping lease.\n", &mac)); + + /* + * Lease/Time/@issued - mandatory. + */ + int64_t secIssued; + if (!pElmTime->getAttributeValue("issued", &secIssued)) + DHCP_LOG_RET_NULL(("Binding::fromXML: <Time> element for %RTmac has no valid 'issued' attribute! Skipping lease.\n", &mac)); + + /* + * Lease/Time/@expiration - mandatory. + */ + uint32_t cSecToLive; + if (!pElmTime->getAttributeValue("expiration", &cSecToLive)) + DHCP_LOG_RET_NULL(("Binding::fromXML: <Time> element for %RTmac has no valid 'expiration' attribute! Skipping lease.\n", &mac)); + + std::unique_ptr<Binding> b(new Binding(addr)); + + /* + * Lease/@state - mandatory but not present in old leases file, so pretent + * we're loading an expired one if absent. + */ + const char *pszState = pElmLease->findAttributeValue("state"); + if (pszState) + { + b->m_issued = Timestamp::absSeconds(secIssued); + b->setState(pszState); + } + else + { /** @todo XXX: old code wrote timestamps instead of absolute time. */ + /* pretend that lease has just ended */ + LogRel(("Binding::fromXML: No 'state' attribute for <Lease mac=%RTmac> (ts=%RI64 ttl=%RU32)! Assuming EXPIRED.\n", + &mac, secIssued, cSecToLive)); + b->m_issued = Timestamp::now().subSeconds(cSecToLive); + b->m_state = Binding::EXPIRED; + } + b->m_secLease = cSecToLive; + + + /* + * Lease/@id - optional, ignore if bad. + * Value format: "deadbeef..." or "de:ad:be:ef...". + */ + const char *pszClientId = pElmLease->findAttributeValue("id"); + if (pszClientId) + { + uint8_t abBytes[255]; + size_t cbActual; + rc = RTStrConvertHexBytesEx(pszClientId, abBytes, sizeof(abBytes), RTSTRCONVERTHEXBYTES_F_SEP_COLON, NULL, &cbActual); + if (RT_SUCCESS(rc)) + { + b->m_id = ClientId(mac, OptClientId(std::vector<uint8_t>(&abBytes[0], &abBytes[cbActual]))); /* throws bad_alloc */ + if (rc != VINF_BUFFER_UNDERFLOW && rc != VINF_SUCCESS) + LogRel(("Binding::fromXML: imperfect 'id' attribute: rc=%Rrc, cbActual=%u, '%s'\n", rc, cbActual, pszClientId)); + } + else + { + LogRel(("Binding::fromXML: ignoring malformed 'id' attribute: rc=%Rrc, cbActual=%u, '%s'\n", + rc, cbActual, pszClientId)); + b->m_id = ClientId(mac, OptClientId()); + } + } + else + b->m_id = ClientId(mac, OptClientId()); + + return b.release(); +} + + + +/********************************************************************************************************************************* +* Class Db Implementation * +*********************************************************************************************************************************/ + +Db::Db() + : m_pConfig(NULL) +{ +} + + +Db::~Db() +{ + /** @todo free bindings */ +} + + +int Db::init(const Config *pConfig) +{ + Binding::registerFormat(); + + m_pConfig = pConfig; + + int rc = m_pool.init(pConfig->getIPv4PoolFirst(), pConfig->getIPv4PoolLast()); + if (RT_SUCCESS(rc)) + { + /* + * If the server IP is in the dynamic range, preallocate it like a fixed assignment. + */ + rc = i_enterFixedAddressAssignment(pConfig->getIPv4Address(), pConfig->getMacAddress()); + if (RT_SUCCESS(rc)) + { + /* + * Preallocate any fixed address assignments: + */ + Config::HostConfigVec vecHostConfigs; + rc = pConfig->getFixedAddressConfigs(vecHostConfigs); + for (Config::HostConfigVec::const_iterator it = vecHostConfigs.begin(); + it != vecHostConfigs.end() && RT_SUCCESS(rc); ++it) + rc = i_enterFixedAddressAssignment((*it)->getFixedAddress(), (*it)->getMACAddress()); + } + } + + return rc; +} + + +/** + * Used by Db::init() to register a fixed address assignment. + * + * @returns IPRT status code. + * @param a_rAddress The IPv4 address assignment. + * @param a_rMACAddress The MAC address. + */ +int Db::i_enterFixedAddressAssignment(RTNETADDRIPV4 const &a_rAddress, RTMAC const &a_rMACAddress) RT_NOEXCEPT +{ + LogRelFunc(("%RTmac: %RTnaipv4\n", &a_rMACAddress, a_rAddress)); + Assert(m_pConfig->isInIPv4Network(a_rAddress)); /* should've been checked elsewhere already */ + + /* + * If the address is part of the pool, we have to allocate it to + * prevent it from being used again. + */ + if (m_pool.contains(a_rAddress)) + { + if (!m_pool.allocate(a_rAddress)) + { + LogRelFunc(("%RTnaipv4 already allocated?\n", a_rAddress)); + return VERR_ADDRESS_CONFLICT; + } + } + + /* + * Create the binding. + */ + Binding *pBinding = NULL; + try + { + pBinding = new Binding(a_rAddress, a_rMACAddress, true /*fFixed*/); + m_bindings.push_front(pBinding); + } + catch (std::bad_alloc &) + { + if (pBinding) + delete pBinding; + return VERR_NO_MEMORY; + } + return VINF_SUCCESS; +} + + +/** + * Expire old binding (leases). + */ +void Db::expire() RT_NOEXCEPT +{ + const Timestamp now = Timestamp::now(); + for (bindings_t::iterator it = m_bindings.begin(); it != m_bindings.end(); ++it) + { + Binding *b = *it; + b->expire(now); + } +} + + +/** + * Internal worker that creates a binding for the given client, allocating new + * IPv4 address for it. + * + * @returns Pointer to the binding. + * @param id The client ID. + */ +Binding *Db::i_createBinding(const ClientId &id) +{ + Binding *pBinding = NULL; + RTNETADDRIPV4 addr = m_pool.allocate(); + if (addr.u != 0) + { + try + { + pBinding = new Binding(addr, id); + m_bindings.push_front(pBinding); + } + catch (std::bad_alloc &) + { + if (pBinding) + delete pBinding; + /** @todo free address (no pool method for that) */ + } + } + return pBinding; +} + + +/** + * Internal worker that creates a binding to the specified IPv4 address for the + * given client. + * + * @returns Pointer to the binding. + * NULL if the address is in use or we ran out of memory. + * @param addr The IPv4 address. + * @param id The client. + */ +Binding *Db::i_createBinding(RTNETADDRIPV4 addr, const ClientId &id) +{ + bool fAvailable = m_pool.allocate(addr); + if (!fAvailable) + { + /** @todo + * XXX: this should not happen. If the address is from the + * pool, which we have verified before, then either it's in + * the free pool or there's an binding (possibly free) for it. + */ + return NULL; + } + + Binding *b = new Binding(addr, id); + m_bindings.push_front(b); + return b; +} + + +/** + * Internal worker that allocates an IPv4 address for the given client, taking + * the preferred address (@a addr) into account when possible and if non-zero. + */ +Binding *Db::i_allocateAddress(const ClientId &id, RTNETADDRIPV4 addr) +{ + Assert(addr.u == 0 || addressBelongs(addr)); + + if (addr.u != 0) + LogRel(("> allocateAddress %RTnaipv4 to client %R[id]\n", addr.u, &id)); + else + LogRel(("> allocateAddress to client %R[id]\n", &id)); + + /* + * Allocate existing address if client has one. Ignore requested + * address in that case. While here, look for free addresses and + * addresses that can be reused. + */ + Binding *addrBinding = NULL; + Binding *freeBinding = NULL; + Binding *reuseBinding = NULL; + const Timestamp now = Timestamp::now(); + for (bindings_t::iterator it = m_bindings.begin(); it != m_bindings.end(); ++it) + { + Binding *b = *it; + b->expire(now); + + /* + * We've already seen this client, give it its old binding. + * + * If the client's MAC address is configured with a fixed + * address, give its preconfigured binding. Fixed bindings + * are always at the head of the m_bindings list, so we + * won't be confused by any old leases of the client. + */ + if (b->m_id == id) + { + LogRel(("> ... found existing binding %R[binding]\n", b)); + return b; + } + if (b->isFixed() && b->id().mac() == id.mac()) + { + b->idUpdate(id); + LogRel(("> ... found fixed binding %R[binding]\n", b)); + return b; + } + + if (addr.u != 0 && b->m_addr.u == addr.u) + { + Assert(addrBinding == NULL); + addrBinding = b; + LogRel(("> .... noted existing binding %R[binding]\n", addrBinding)); + } + + /* if we haven't found a free binding yet, keep looking */ + if (freeBinding == NULL) + { + if (b->m_state == Binding::FREE) + { + freeBinding = b; + LogRel(("> .... noted free binding %R[binding]\n", freeBinding)); + continue; + } + + /* still no free binding, can this one be reused? */ + if (b->m_state == Binding::RELEASED) + { + if ( reuseBinding == NULL + /* released binding is better than an expired one */ + || reuseBinding->m_state == Binding::EXPIRED) + { + reuseBinding = b; + LogRel(("> .... noted released binding %R[binding]\n", reuseBinding)); + } + } + else if (b->m_state == Binding::EXPIRED) + { + if ( reuseBinding == NULL + /* long expired binding is bettern than a recent one */ + /* || (reuseBinding->m_state == Binding::EXPIRED && b->olderThan(reuseBinding)) */) + { + reuseBinding = b; + LogRel(("> .... noted expired binding %R[binding]\n", reuseBinding)); + } + } + } + } + + /* + * Allocate requested address if we can. + */ + if (addr.u != 0) + { + if (addrBinding == NULL) + { + addrBinding = i_createBinding(addr, id); + Assert(addrBinding != NULL); + LogRel(("> .... creating new binding for this address %R[binding]\n", addrBinding)); + return addrBinding; + } + + if (addrBinding->m_state <= Binding::EXPIRED) /* not in use */ + { + LogRel(("> .... reusing %s binding for this address\n", addrBinding->stateName())); + addrBinding->giveTo(id); + return addrBinding; + } + LogRel(("> .... cannot reuse %s binding for this address\n", addrBinding->stateName())); + } + + /* + * Allocate new (or reuse). + */ + Binding *idBinding = NULL; + if (freeBinding != NULL) + { + idBinding = freeBinding; + LogRel(("> .... reusing free binding\n")); + } + else + { + idBinding = i_createBinding(); + if (idBinding != NULL) + LogRel(("> .... creating new binding\n")); + else + { + idBinding = reuseBinding; + if (idBinding != NULL) + LogRel(("> .... reusing %s binding %R[binding]\n", reuseBinding->stateName(), reuseBinding)); + else + DHCP_LOG_RET_NULL(("> .... failed to allocate binding\n")); + } + } + + idBinding->giveTo(id); + LogRel(("> .... allocated %R[binding]\n", idBinding)); + + return idBinding; +} + + + +/** + * Called by DHCPD to allocate a binding for the specified request. + * + * @returns Pointer to the binding, NULL on failure. + * @param req The DHCP request being served. + * @param rConfigVec The configurations that applies to the client. + * Used for lease time calculation. + */ +Binding *Db::allocateBinding(const DhcpClientMessage &req, Config::ConfigVec const &rConfigVec) +{ + const ClientId &id(req.clientId()); + + /* + * Get and validate the requested address (if present). + * + * Fixed assignments are often outside the dynamic range, so we much detect + * those to make sure they aren't rejected based on IP range. ASSUMES fixed + * assignments are at the head of the binding list. + */ + OptRequestedAddress reqAddr(req); + if (reqAddr.present() && !addressBelongs(reqAddr.value())) + { + bool fIsFixed = false; + for (bindings_t::iterator it = m_bindings.begin(); it != m_bindings.end() && (*it)->isFixed(); ++it) + if (reqAddr.value().u == (*it)->addr().u) + { + if ( (*it)->id() == id + || (*it)->id().mac() == id.mac()) + { + fIsFixed = true; + break; + } + } + if (fIsFixed) + reqAddr = OptRequestedAddress(); + else if (req.messageType() == RTNET_DHCP_MT_DISCOVER) + { + LogRel(("DISCOVER: ignoring invalid requested address\n")); + reqAddr = OptRequestedAddress(); + } + else + DHCP_LOG_RET_NULL(("rejecting invalid requested address\n")); + } + + /* + * Allocate the address. + */ + Binding *b = i_allocateAddress(id, reqAddr.value()); + if (b != NULL) + { + Assert(b->id() == id); + + /* + * Figure out the lease time. + */ + uint32_t secMin = 0; + uint32_t secDfl = 0; + uint32_t secMax = 0; + for (Config::ConfigVec::const_iterator it = rConfigVec.begin(); it != rConfigVec.end(); ++it) + { + ConfigLevelBase const *pConfig = *it; + if (secMin == 0) + secMin = pConfig->getMinLeaseTime(); + if (secDfl == 0) + secDfl = pConfig->getDefaultLeaseTime(); + if (secMax == 0) + secMax = pConfig->getMaxLeaseTime(); + } + Assert(secMin); Assert(secMax); Assert(secDfl); /* global config always have non-defaults set */ + if (secMin > secMax) + secMin = secMax; + + OptLeaseTime reqLeaseTime(req); + if (!reqLeaseTime.present()) + { + b->setLeaseTime(secDfl); + LogRel2(("Lease time %u secs (default)\n", b->leaseTime())); + } + else if (reqLeaseTime.value() < secMin) + { + b->setLeaseTime(secMin); + LogRel2(("Lease time %u secs (min)\n", b->leaseTime())); + } + else if (reqLeaseTime.value() > secMax) + { + b->setLeaseTime(secMax); + LogRel2(("Lease time %u secs (max)\n", b->leaseTime())); + } + else + { + b->setLeaseTime(reqLeaseTime.value()); + LogRel2(("Lease time %u secs (requested)\n", b->leaseTime())); + } + } + return b; +} + + +/** + * Internal worker used by loadLease(). + * + * @returns IPRT status code. + * @param pNewBinding The new binding to add. + */ +int Db::i_addBinding(Binding *pNewBinding) RT_NOEXCEPT +{ + /* + * Validate the binding against the range and existing bindings. + */ + if (!addressBelongs(pNewBinding->m_addr)) + { + LogRel(("Binding for out of range address %RTnaipv4 ignored\n", pNewBinding->m_addr.u)); + return VERR_OUT_OF_RANGE; + } + + for (bindings_t::iterator it = m_bindings.begin(); it != m_bindings.end(); ++it) + { + Binding *b = *it; + + if (pNewBinding->m_addr.u == b->m_addr.u) + { + LogRel(("> ADD: %R[binding]\n", pNewBinding)); + LogRel(("> .... duplicate ip: %R[binding]\n", b)); + return VERR_DUPLICATE; + } + + if (pNewBinding->m_id == b->m_id) + { + LogRel(("> ADD: %R[binding]\n", pNewBinding)); + LogRel(("> .... duplicate id: %R[binding]\n", b)); + return VERR_DUPLICATE; + } + } + + /* + * Allocate the address and add the binding to the list. + */ + AssertLogRelMsgReturn(m_pool.allocate(pNewBinding->m_addr), + ("> ADD: failed to claim IP %R[binding]\n", pNewBinding), + VERR_INTERNAL_ERROR); + try + { + m_bindings.push_back(pNewBinding); + } + catch (std::bad_alloc &) + { + return VERR_NO_MEMORY; + } + return VINF_SUCCESS; +} + + +/** + * Called by DHCP to cancel an offset. + * + * @param req The DHCP request. + */ +void Db::cancelOffer(const DhcpClientMessage &req) RT_NOEXCEPT +{ + const OptRequestedAddress reqAddr(req); + if (!reqAddr.present()) + return; + + const RTNETADDRIPV4 addr = reqAddr.value(); + const ClientId &id(req.clientId()); + + for (bindings_t::iterator it = m_bindings.begin(); it != m_bindings.end(); ++it) + { + Binding *b = *it; + + if (b->addr().u == addr.u && b->id() == id) + { + if (b->state() == Binding::OFFERED) + { + LogRel2(("Db::cancelOffer: cancelling %R[binding]\n", b)); + if (!b->isFixed()) + { + b->setLeaseTime(0); + b->setState(Binding::RELEASED); + } + else + b->setState(Binding::ACKED); + } + else + LogRel2(("Db::cancelOffer: not offered state: %R[binding]\n", b)); + return; + } + } + LogRel2(("Db::cancelOffer: not found (%RTnaipv4, %R[id])\n", addr.u, &id)); +} + + +/** + * Called by DHCP to cancel an offset. + * + * @param req The DHCP request. + * @returns true if found and released, otherwise false. + * @throws nothing + */ +bool Db::releaseBinding(const DhcpClientMessage &req) RT_NOEXCEPT +{ + const RTNETADDRIPV4 addr = req.ciaddr(); + const ClientId &id(req.clientId()); + + for (bindings_t::iterator it = m_bindings.begin(); it != m_bindings.end(); ++it) + { + Binding *b = *it; + + if (b->addr().u == addr.u && b->id() == id) + { + LogRel2(("Db::releaseBinding: releasing %R[binding]\n", b)); + if (!b->isFixed()) + { + b->setState(Binding::RELEASED); + return true; + } + b->setState(Binding::ACKED); + return false; + } + } + + LogRel2(("Db::releaseBinding: not found (%RTnaipv4, %R[id])\n", addr.u, &id)); + return false; +} + + +/** + * Called by DHCPD to write out the lease database to @a strFilename. + * + * @returns IPRT status code. + * @param strFilename The file to write it to. + */ +int Db::writeLeases(const RTCString &strFilename) const RT_NOEXCEPT +{ + LogRel(("writing leases to %s\n", strFilename.c_str())); + + /** @todo This could easily be written directly to the file w/o going thru + * a xml::Document, xml::XmlFileWriter, hammering the heap and being + * required to catch a lot of different exceptions at various points. + * (RTStrmOpen, bunch of RTStrmPrintf using \%RMas and \%RMes., + * RTStrmClose closely followed by a couple of renames.) + */ + + /* + * Create the document and root element. + */ + xml::Document doc; + try + { + xml::ElementNode *pElmRoot = doc.createRootElement("Leases"); + pElmRoot->setAttribute("version", "1.0"); + + /* + * Add the leases. + */ + for (bindings_t::const_iterator it = m_bindings.begin(); it != m_bindings.end(); ++it) + { + const Binding *b = *it; + if (!b->isFixed()) + b->toXML(pElmRoot); + } + } + catch (std::bad_alloc &) + { + return VERR_NO_MEMORY; + } + + /* + * Write the document to the specified file in a safe manner (written to temporary + * file, renamed to destination on success) + */ + try + { + xml::XmlFileWriter writer(doc); + writer.write(strFilename.c_str(), true /*fSafe*/); + } + catch (const xml::EIPRTFailure &e) + { + LogRel(("%s\n", e.what())); + return e.rc(); + } + catch (const RTCError &e) + { + LogRel(("%s\n", e.what())); + return VERR_GENERAL_FAILURE; + } + catch (...) + { + LogRel(("Unknown exception while writing '%s'\n", strFilename.c_str())); + return VERR_UNEXPECTED_EXCEPTION; + } + + return VINF_SUCCESS; +} + + +/** + * Called by DHCPD to load the lease database to @a strFilename. + * + * @note Does not clear the database state before doing the load. + * + * @returns IPRT status code. + * @param strFilename The file to load it from. + * @throws nothing + */ +int Db::loadLeases(const RTCString &strFilename) RT_NOEXCEPT +{ + LogRel(("loading leases from %s\n", strFilename.c_str())); + + /* + * Load the file into an XML document. + */ + xml::Document doc; + try + { + xml::XmlFileParser parser; + parser.read(strFilename.c_str(), doc); + } + catch (const xml::EIPRTFailure &e) + { + LogRel(("%s\n", e.what())); + return e.rc(); + } + catch (const RTCError &e) + { + LogRel(("%s\n", e.what())); + return VERR_GENERAL_FAILURE; + } + catch (...) + { + LogRel(("Unknown exception while reading and parsing '%s'\n", strFilename.c_str())); + return VERR_UNEXPECTED_EXCEPTION; + } + + /* + * Check that the root element is "Leases" and process its children. + */ + xml::ElementNode *pElmRoot = doc.getRootElement(); + if (!pElmRoot) + { + LogRel(("No root element in '%s'\n", strFilename.c_str())); + return VERR_NOT_FOUND; + } + if (!pElmRoot->nameEquals("Leases")) + { + LogRel(("No root element is not 'Leases' in '%s', but '%s'\n", strFilename.c_str(), pElmRoot->getName())); + return VERR_NOT_FOUND; + } + + int rc = VINF_SUCCESS; + xml::NodesLoop it(*pElmRoot); + const xml::ElementNode *pElmLease; + while ((pElmLease = it.forAllNodes()) != NULL) + { + if (pElmLease->nameEquals("Lease")) + { + int rc2 = i_loadLease(pElmLease); + if (RT_SUCCESS(rc2)) + { /* likely */ } + else if (rc2 == VERR_NO_MEMORY) + return rc2; + else + rc = -rc2; + } + else + LogRel(("Ignoring unexpected element '%s' under 'Leases'...\n", pElmLease->getName())); + } + + return rc; +} + + +/** + * Internal worker for loadLeases() that handles one 'Lease' element. + * + * @param pElmLease The 'Lease' element to handle. + * @return IPRT status code. + */ +int Db::i_loadLease(const xml::ElementNode *pElmLease) RT_NOEXCEPT +{ + Binding *pBinding = NULL; + try + { + pBinding = Binding::fromXML(pElmLease); + } + catch (std::bad_alloc &) + { + return VERR_NO_MEMORY; + } + if (pBinding) + { + bool fExpired = pBinding->expire(); + if (!fExpired) + LogRel(("> LOAD: lease %R[binding]\n", pBinding)); + else + LogRel(("> LOAD: EXPIRED lease %R[binding]\n", pBinding)); + + int rc = i_addBinding(pBinding); + if (RT_FAILURE(rc)) + delete pBinding; + return rc; + } + LogRel(("> LOAD: failed to load lease!\n")); + return VERR_PARSE_ERROR; +} diff --git a/src/VBox/NetworkServices/Dhcpd/Db.h b/src/VBox/NetworkServices/Dhcpd/Db.h new file mode 100644 index 00000000..5208cf3b --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/Db.h @@ -0,0 +1,221 @@ +/* $Id: Db.h $ */ +/** @file + * DHCP server - address database + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Dhcpd_Db_h +#define VBOX_INCLUDED_SRC_Dhcpd_Db_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include "DhcpdInternal.h" +#include <iprt/net.h> + +#include <iprt/cpp/ministring.h> +#include <iprt/cpp/xml.h> + +#include <list> + +#include "Timestamp.h" +#include "ClientId.h" +#include "IPv4Pool.h" +#include "Config.h" +#include "DhcpMessage.h" + + +/** + * An address binding in the lease database. + * + * This is how an allocated IPv4 address is mananged. + */ +class Binding +{ + friend class Db; + +public: + enum State { FREE, RELEASED, EXPIRED, OFFERED, ACKED }; + +private: + const RTNETADDRIPV4 m_addr; + State m_state; + ClientId m_id; + Timestamp m_issued; + uint32_t m_secLease; + /** Set if this is a fixed assignment. */ + bool m_fFixed; + +public: + Binding(); + Binding(const Binding &); + + explicit Binding(RTNETADDRIPV4 a_Addr) + : m_addr(a_Addr), m_state(FREE), m_issued(), m_secLease(0), m_fFixed(false) + {} + + Binding(RTNETADDRIPV4 a_Addr, const ClientId &a_id) + : m_addr(a_Addr), m_state(FREE), m_id(a_id), m_issued(), m_secLease(0), m_fFixed(false) + {} + + Binding(RTNETADDRIPV4 a_Addr, const RTMAC &a_MACAddress, bool a_fFixed) + : m_addr(a_Addr) + , m_state(ACKED) + , m_id(ClientId(a_MACAddress, OptClientId())) + , m_issued(Timestamp::now()) + , m_secLease(UINT32_MAX - 1) + , m_fFixed(a_fFixed) + {} + + + /** @name Attribute accessors + * @{ */ + RTNETADDRIPV4 addr() const RT_NOEXCEPT { return m_addr; } + + const ClientId &id() const RT_NOEXCEPT { return m_id; } + void idUpdate(const ClientId &a_ridClient); + + uint32_t leaseTime() const RT_NOEXCEPT { return m_secLease; } + Timestamp issued() const RT_NOEXCEPT { return m_issued; } + + State state() const RT_NOEXCEPT { return m_state; } + const char *stateName() const RT_NOEXCEPT; + Binding &setState(const char *pszStateName) RT_NOEXCEPT; + Binding &setState(State stateParam) RT_NOEXCEPT + { + m_state = stateParam; + return *this; + } + + bool isFixed() const RT_NOEXCEPT { return m_fFixed; } + /** @} */ + + + Binding &setLeaseTime(uint32_t secLease) RT_NOEXCEPT + { + m_issued = Timestamp::now(); + m_secLease = secLease; + return *this; + } + + /** Reassigns the binding to the given client. */ + Binding &giveTo(const ClientId &a_id) RT_NOEXCEPT + { + m_id = a_id; + m_state = FREE; + return *this; + } + + void free() + { + m_id = ClientId(); + m_state = FREE; + } + + bool expire(Timestamp tsDeadline) RT_NOEXCEPT; + bool expire() RT_NOEXCEPT + { + return expire(Timestamp::now()); + } + + /** @name Serialization + * @{ */ + static Binding *fromXML(const xml::ElementNode *pElmLease); + void toXML(xml::ElementNode *pElmParent) const; + /** @} */ + + /** @name String formatting of %R[binding]. + * @{ */ + static void registerFormat() RT_NOEXCEPT; +private: + static DECLCALLBACK(size_t) rtStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, const char *pszType, + void const *pvValue, int cchWidth, int cchPrecision, unsigned fFlags, void *pvUser); + static bool g_fFormatRegistered; + /** @} */ + + Binding &operator=(const Binding &); /**< Shuts up warning C4626 (incorrect warning?). */ +}; + + +/** + * The lease database. + * + * There is currently just one instance of this class in a running DHCP server + * residing in Dhcpd::m_db. It covers one single range of IPv4 addresses, which + * currently unbound addressed are managed by m_pool. The allocated addresses + * are kept in the m_bindings list. Once an address has been allocated, it will + * stay in the m_bindings list even after released or expired. + */ +class Db +{ +private: + typedef std::list<Binding *> bindings_t; + + /** Configuration (set at init). + * @note Currently not used. */ + const Config *m_pConfig; + /** The lease database. + * @note Since fixed assignments are added during initialization, they will + * always be first. The allocateBinding() code depends on this. */ + bindings_t m_bindings; + /** Address allocation pool. */ + IPv4Pool m_pool; + +public: + Db(); + ~Db(); + + int init(const Config *pConfig); + + /** Check if @a addr belonges to this lease database. */ + bool addressBelongs(RTNETADDRIPV4 addr) const RT_NOEXCEPT { return m_pool.contains(addr); } + + Binding *allocateBinding(const DhcpClientMessage &req, Config::ConfigVec const &rConfigVec); + bool releaseBinding(const DhcpClientMessage &req) RT_NOEXCEPT; + + void cancelOffer(const DhcpClientMessage &req) RT_NOEXCEPT; + + void expire() RT_NOEXCEPT; + + /** @name Database serialization methods + * @{ */ + int loadLeases(const RTCString &strFilename) RT_NOEXCEPT; +private: + int i_loadLease(const xml::ElementNode *pElmLease) RT_NOEXCEPT; +public: + int writeLeases(const RTCString &strFilename) const RT_NOEXCEPT; + /** @} */ + +private: + int i_enterFixedAddressAssignment(RTNETADDRIPV4 const &a_rAddress, RTMAC const &a_rMACAddress) RT_NOEXCEPT; + Binding *i_createBinding(const ClientId &id = ClientId()); + Binding *i_createBinding(RTNETADDRIPV4 addr, const ClientId &id = ClientId()); + + Binding *i_allocateAddress(const ClientId &id, RTNETADDRIPV4 addr); + + /* add binding e.g. from the leases file */ + int i_addBinding(Binding *pNewBinding) RT_NOEXCEPT; +}; + +#endif /* !VBOX_INCLUDED_SRC_Dhcpd_Db_h */ diff --git a/src/VBox/NetworkServices/Dhcpd/DhcpMessage.cpp b/src/VBox/NetworkServices/Dhcpd/DhcpMessage.cpp new file mode 100644 index 00000000..718dcf4e --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/DhcpMessage.cpp @@ -0,0 +1,447 @@ +/* $Id: DhcpMessage.cpp $ */ +/** @file + * DHCP Message and its de/serialization. + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "DhcpdInternal.h" +#include "DhcpMessage.h" +#include "DhcpOptions.h" + +#include <iprt/ctype.h> +#include <iprt/string.h> + + + +DhcpMessage::DhcpMessage() + : m_xid(0) + , m_flags(0) + , m_ciaddr() + , m_yiaddr() + , m_siaddr() + , m_giaddr() +#if 0 /* not currently unused */ + , m_sname() + , m_file() +#endif + , m_optMessageType() +{ +} + + +/** + * Does common message dumping. + */ +void DhcpMessage::dump() const RT_NOEXCEPT +{ + switch (m_optMessageType.value()) + { + case RTNET_DHCP_MT_DISCOVER: LogRel(("DISCOVER")); break; + case RTNET_DHCP_MT_OFFER: LogRel(("OFFER")); break; + case RTNET_DHCP_MT_REQUEST: LogRel(("REQUEST")); break; + case RTNET_DHCP_MT_DECLINE: LogRel(("DECLINE")); break; + case RTNET_DHCP_MT_ACK: LogRel(("ACK")); break; + case RTNET_DHCP_MT_NAC: LogRel(("NAC")); break; + case RTNET_DHCP_MT_RELEASE: LogRel(("RELEASE")); break; + case RTNET_DHCP_MT_INFORM: LogRel(("INFORM")); break; + default: + LogRel(("<Unknown Mesage Type %d>", m_optMessageType.value())); + break; + } + + LogRel((" xid 0x%08x", m_xid)); + LogRel((" chaddr %RTmac\n", &m_mac)); + LogRel((" ciaddr %RTnaipv4", m_ciaddr.u)); + if (m_yiaddr.u != 0) + LogRel((" yiaddr %RTnaipv4", m_yiaddr.u)); + if (m_siaddr.u != 0) + LogRel((" siaddr %RTnaipv4", m_siaddr.u)); + if (m_giaddr.u != 0) + LogRel((" giaddr %RTnaipv4", m_giaddr.u)); + if (broadcast()) + LogRel((" broadcast\n")); + else + LogRel(("\n")); +} + + +/********************************************************************************************************************************* +* DhcpClientMessage Implementation * +*********************************************************************************************************************************/ + +/* static */ +DhcpClientMessage *DhcpClientMessage::parse(bool broadcasted, const void *buf, size_t buflen) +{ + /* + * Validate the request. + */ + if (buflen < RT_OFFSETOF(RTNETBOOTP, bp_vend.Dhcp.dhcp_opts)) + DHCP_LOG2_RET_NULL(("DhcpClientMessage::parse: %zu bytes datagram is too short\n", buflen)); + + PCRTNETBOOTP bp = (PCRTNETBOOTP)buf; + + if (bp->bp_op != RTNETBOOTP_OP_REQUEST) + DHCP_LOG2_RET_NULL(("DhcpClientMessage::parse: bad opcode: %d\n", bp->bp_op)); + + if (bp->bp_htype != RTNET_ARP_ETHER) + DHCP_LOG2_RET_NULL(("DhcpClientMessage::parse: unsupported htype %d\n", bp->bp_htype)); + + if (bp->bp_hlen != sizeof(RTMAC)) + DHCP_LOG2_RET_NULL(("DhcpClientMessage::parse: unexpected hlen %d\n", bp->bp_hlen)); + + if ( (bp->bp_chaddr.Mac.au8[0] & 0x01) != 0 + && (bp->bp_flags & RTNET_DHCP_FLAG_BROADCAST) == 0) + LogRel2(("DhcpClientMessage::parse: multicast chaddr %RTmac without broadcast flag\n", &bp->bp_chaddr.Mac)); + + /* we don't want to deal with forwarding */ + if (bp->bp_giaddr.u != 0) + DHCP_LOG2_RET_NULL(("DhcpClientMessage::parse: giaddr %RTnaipv4\n", bp->bp_giaddr.u)); + + if (bp->bp_hops != 0) + DHCP_LOG2_RET_NULL(("DhcpClientMessage::parse: non-zero hops %d\n", bp->bp_hops)); + + if (bp->bp_vend.Dhcp.dhcp_cookie != RT_H2N_U32_C(RTNET_DHCP_COOKIE)) + DHCP_LOG2_RET_NULL(("DhcpClientMessage::parse: bad cookie %#RX32\n", bp->bp_vend.Dhcp.dhcp_cookie)); + + /* + * Convert it into a DhcpClientMessage instance. + */ + std::unique_ptr<DhcpClientMessage> msg(new DhcpClientMessage()); + + msg->m_broadcasted = broadcasted; + msg->m_xid = bp->bp_xid; + msg->m_flags = bp->bp_flags; + msg->m_mac = bp->bp_chaddr.Mac; + msg->m_ciaddr = bp->bp_ciaddr; + msg->m_yiaddr = bp->bp_yiaddr; + msg->m_siaddr = bp->bp_siaddr; + msg->m_giaddr = bp->bp_giaddr; + + int fOptOverload = msg->i_parseOptions(&bp->bp_vend.Dhcp.dhcp_opts[0], + buflen - RT_OFFSETOF(RTNETBOOTP, bp_vend.Dhcp.dhcp_opts)); + if (fOptOverload < 0) + return NULL; + + /* "The 'file' field MUST be interpreted next ..." */ + if (fOptOverload & RTNET_DHCP_OPTION_OVERLOAD_FILE) + { + int status = msg->i_parseOptions(bp->bp_file, sizeof(bp->bp_file)); + if (status != 0) + return NULL; + } +#if 0 /* not currently unused */ + else if (bp->bp_file[0] != '\0') + { + /* must be zero terminated, ignore if not */ + const char *pszFile = (const char *)bp->bp_file; + size_t len = RTStrNLen(pszFile, sizeof(bp->bp_file)); + if (len < sizeof(bp->bp_file)) + msg->m_file.assign(pszFile, len); + } +#endif + + /* "... followed by the 'sname' field." */ + if (fOptOverload & RTNET_DHCP_OPTION_OVERLOAD_SNAME) + { + int status = msg->i_parseOptions(bp->bp_sname, sizeof(bp->bp_sname)); + if (status != 0) /* NB: this includes "nested" Option Overload */ + return NULL; + } +#if 0 /* not currently unused */ + else if (bp->bp_sname[0] != '\0') + { + /* must be zero terminated, ignore if not */ + const char *pszSName = (const char *)bp->bp_sname; + size_t len = RTStrNLen(pszSName, sizeof(bp->bp_sname)); + if (len < sizeof(bp->bp_sname)) + msg->m_sname.assign(pszSName, len); + } +#endif + + msg->m_optMessageType = OptMessageType(*msg); + if (!msg->m_optMessageType.present()) + return NULL; + + msg->m_id = ClientId(msg->m_mac, OptClientId(*msg)); + + return msg.release(); +} + + +int DhcpClientMessage::i_parseOptions(const uint8_t *pbBuf, size_t cbBuf) RT_NOEXCEPT +{ + int fOptOverload = 0; + while (cbBuf > 0) + { + uint8_t const bOpt = *pbBuf++; + --cbBuf; + + if (bOpt == RTNET_DHCP_OPT_PAD) + continue; + + if (bOpt == RTNET_DHCP_OPT_END) + break; + + if (cbBuf == 0) + DHCP_LOG_RET(-1, ("option %d has no length field\n", bOpt)); + + uint8_t const cbOpt = *pbBuf++; + --cbBuf; + + if (cbOpt > cbBuf) + DHCP_LOG_RET(-1, ("option %d truncated (length %d, but only %zu bytes left)\n", bOpt, cbOpt, cbBuf)); + +#if 0 + rawopts_t::const_iterator it(m_optmap.find(bOpt)); + if (it != m_optmap.cend()) + return -1; +#endif + if (bOpt == RTNET_DHCP_OPT_OPTION_OVERLOAD) + { + if (cbOpt != 1) + DHCP_LOG_RET(-1, ("Overload Option (option %d) has invalid length %d\n", bOpt, cbOpt)); + + fOptOverload = *pbBuf; + + if ((fOptOverload & ~RTNET_DHCP_OPTION_OVERLOAD_MASK) != 0) + DHCP_LOG_RET(-1, ("Overload Option (option %d) has invalid value 0x%x\n", bOpt, fOptOverload)); + } + else + m_rawopts.insert(std::make_pair(bOpt, octets_t(pbBuf, pbBuf + cbOpt))); + + pbBuf += cbOpt; + cbBuf -= cbOpt; + } + + return fOptOverload; +} + + +/** + * Dumps the message. + */ +void DhcpClientMessage::dump() const RT_NOEXCEPT +{ + DhcpMessage::dump(); + + if (OptRapidCommit(*this).present()) + LogRel((" (rapid commit)")); + + try + { + const OptServerId sid(*this); + if (sid.present()) + LogRel((" for server %RTnaipv4", sid.value().u)); + + const OptClientId cid(*this); + if (cid.present()) + { + if (cid.value().size() > 0) + LogRel((" client id: %.*Rhxs\n", cid.value().size(), &cid.value().front())); + else + LogRel((" client id: <empty>\n")); + } + + const OptRequestedAddress reqAddr(*this); + if (reqAddr.present()) + LogRel((" requested address %RTnaipv4", reqAddr.value().u)); + const OptLeaseTime reqLeaseTime(*this); + if (reqLeaseTime.present()) + LogRel((" requested lease time %d", reqAddr.value())); + if (reqAddr.present() || reqLeaseTime.present()) + LogRel(("\n")); + + const OptParameterRequest params(*this); + if (params.present()) + { + LogRel((" params {")); + typedef OptParameterRequest::value_t::const_iterator it_t; + for (it_t it = params.value().begin(); it != params.value().end(); ++it) + LogRel((" %d", *it)); + LogRel((" }\n")); + } + } + catch (std::bad_alloc &) + { + LogRel(("bad_alloc during dumping\n")); + } + + for (rawopts_t::const_iterator it = m_rawopts.begin(); it != m_rawopts.end(); ++it) + { + const uint8_t optcode = (*it).first; + switch (optcode) + { + case OptMessageType::optcode: /* FALLTHROUGH */ + case OptClientId::optcode: /* FALLTHROUGH */ + case OptRequestedAddress::optcode: /* FALLTHROUGH */ + case OptLeaseTime::optcode: /* FALLTHROUGH */ + case OptParameterRequest::optcode: /* FALLTHROUGH */ + case OptRapidCommit::optcode: + break; + + default: + { + size_t const cbBytes = it->second.size(); + uint8_t const *pbBytes = &it->second.front(); + bool fAllPrintable = true; + for (size_t off = 0; off < cbBytes; off++) + if (!RT_C_IS_PRINT((char )pbBytes[off])) + { + fAllPrintable = false; + break; + } + if (fAllPrintable) + LogRel((" %2d: '%.*s'\n", optcode, cbBytes, pbBytes)); + else + LogRel((" %2d: %.*Rhxs\n", optcode, cbBytes, pbBytes)); + } + } + } +} + + + +/********************************************************************************************************************************* +* DhcpServerMessage Implementation * +*********************************************************************************************************************************/ + +DhcpServerMessage::DhcpServerMessage(const DhcpClientMessage &req, uint8_t messageTypeParam, RTNETADDRIPV4 serverAddr) + : DhcpMessage() + , m_optServerId(serverAddr) +{ + m_dst.u = 0xffffffff; /* broadcast */ + + m_optMessageType = OptMessageType(messageTypeParam); + + /* copy values from the request (cf. RFC2131 Table 3) */ + m_xid = req.xid(); + m_flags = req.flags(); + m_giaddr = req.giaddr(); + m_mac = req.mac(); + + if (req.messageType() == RTNET_DHCP_MT_REQUEST) + m_ciaddr = req.ciaddr(); +} + + +void DhcpServerMessage::maybeUnicast(const DhcpClientMessage &req) RT_NOEXCEPT +{ + if (!req.broadcast() && req.ciaddr().u != 0) + setDst(req.ciaddr()); +} + + +/** + * @throws std::bad_alloc + */ +void DhcpServerMessage::addOption(DhcpOption *opt) +{ + m_optmap << opt; +} + + +/** + * @throws std::bad_alloc + */ +void DhcpServerMessage::addOptions(const optmap_t &optmap) +{ + for (optmap_t::const_iterator it = optmap.begin(); it != optmap.end(); ++it) + m_optmap << it->second; +} + + +/** + * @throws std::bad_alloc + */ +int DhcpServerMessage::encode(octets_t &data) +{ + /* + * Header, including DHCP cookie. + */ + RTNETBOOTP bp; + RT_ZERO(bp); + + bp.bp_op = RTNETBOOTP_OP_REPLY; + bp.bp_htype = RTNET_ARP_ETHER; + bp.bp_hlen = sizeof(RTMAC); + + bp.bp_xid = m_xid; + + bp.bp_ciaddr = m_ciaddr; + bp.bp_yiaddr = m_yiaddr; + bp.bp_siaddr = m_siaddr; + bp.bp_giaddr = m_giaddr; + + bp.bp_chaddr.Mac = m_mac; + + bp.bp_vend.Dhcp.dhcp_cookie = RT_H2N_U32_C(RTNET_DHCP_COOKIE); + + data.insert(data.end(), (uint8_t *)&bp, (uint8_t *)&bp.bp_vend.Dhcp.dhcp_opts); + + /** @todo TFTP, bootfile name, etc. pick from extended options if no + * override in effect? */ + + /* + * Options + */ + data << m_optServerId + << m_optMessageType; + + for (optmap_t::const_iterator it = m_optmap.begin(); it != m_optmap.end(); ++it) + { + LogRel3(("encoding option %d (%s)\n", it->first, DhcpOption::name(it->first))); + DhcpOption &opt = *it->second; + data << opt; + } + + data << OptEnd(); + + AssertCompile(RTNET_DHCP_NORMAL_SIZE == 548); + if (data.size() < RTNET_DHCP_NORMAL_SIZE) + data.resize(RTNET_DHCP_NORMAL_SIZE); + + /** @todo dump it */ + if ((LogRelIs4Enabled() && LogRelIsEnabled()) || LogIsEnabled()) + dump(); + if ((LogRelIs5Enabled() && LogRelIsEnabled()) || LogIs5Enabled()) + LogRel5(("encoded message: %u bytes\n%.*Rhxd\n", data.size(), data.size(), &data.front())); + + return VINF_SUCCESS; +} + + +/** + * Dumps a server message to the log. + */ +void DhcpServerMessage::dump() const RT_NOEXCEPT +{ + DhcpMessage::dump(); + + /** @todo dump option details. */ +} + diff --git a/src/VBox/NetworkServices/Dhcpd/DhcpMessage.h b/src/VBox/NetworkServices/Dhcpd/DhcpMessage.h new file mode 100644 index 00000000..f92e8074 --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/DhcpMessage.h @@ -0,0 +1,161 @@ +/* $Id: DhcpMessage.h $ */ +/** @file + * DHCP Message and its de/serialization. + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Dhcpd_DhcpMessage_h +#define VBOX_INCLUDED_SRC_Dhcpd_DhcpMessage_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include "DhcpdInternal.h" +#include <iprt/net.h> +#include <iprt/cpp/ministring.h> +#include "ClientId.h" +#include "DhcpOptions.h" + + +/** + * Base class for internal DHCP client and server message representations. + */ +class DhcpMessage +{ +protected: + uint32_t m_xid; + uint16_t m_flags; + + RTMAC m_mac; + + RTNETADDRIPV4 m_ciaddr; + RTNETADDRIPV4 m_yiaddr; + RTNETADDRIPV4 m_siaddr; + RTNETADDRIPV4 m_giaddr; + +#if 0 /* not currently unused, so avoid wasting time on them for now. */ + RTCString m_sname; /**< @note Not necessarily UTF-8 clean. */ + RTCString m_file; /**< @note Not necessarily UTF-8 clean. */ +#endif + + OptMessageType m_optMessageType; + +protected: + DhcpMessage(); + +public: + /** @name Accessors + * @{ */ + uint32_t xid() const RT_NOEXCEPT { return m_xid; } + + uint16_t flags() const RT_NOEXCEPT { return m_flags; } + bool broadcast() const RT_NOEXCEPT { return (m_flags & RTNET_DHCP_FLAG_BROADCAST) != 0; } + + const RTMAC &mac() const RT_NOEXCEPT { return m_mac; } + + RTNETADDRIPV4 ciaddr() const RT_NOEXCEPT { return m_ciaddr; } + RTNETADDRIPV4 yiaddr() const RT_NOEXCEPT { return m_yiaddr; } + RTNETADDRIPV4 siaddr() const RT_NOEXCEPT { return m_siaddr; } + RTNETADDRIPV4 giaddr() const RT_NOEXCEPT { return m_giaddr; } + + void setCiaddr(RTNETADDRIPV4 addr) RT_NOEXCEPT { m_ciaddr = addr; } + void setYiaddr(RTNETADDRIPV4 addr) RT_NOEXCEPT { m_yiaddr = addr; } + void setSiaddr(RTNETADDRIPV4 addr) RT_NOEXCEPT { m_siaddr = addr; } + void setGiaddr(RTNETADDRIPV4 addr) RT_NOEXCEPT { m_giaddr = addr; } + + uint8_t messageType() const RT_NOEXCEPT + { + Assert(m_optMessageType.present()); + return m_optMessageType.value(); + } + /** @} */ + + void dump() const RT_NOEXCEPT; +}; + + +/** + * Decoded DHCP client message. + * + * This is the internal decoded representation of a DHCP message picked up from + * the wire. + */ +class DhcpClientMessage + : public DhcpMessage +{ +protected: + rawopts_t m_rawopts; + ClientId m_id; + bool m_broadcasted; + +public: + static DhcpClientMessage *parse(bool broadcasted, const void *buf, size_t buflen); + + /** @name Getters + * @{ */ + bool broadcasted() const RT_NOEXCEPT { return m_broadcasted; } + const rawopts_t &rawopts() const RT_NOEXCEPT { return m_rawopts; } + const ClientId &clientId() const RT_NOEXCEPT { return m_id; } + /** @} */ + + void dump() const RT_NOEXCEPT; + +protected: + int i_parseOptions(const uint8_t *pbBuf, size_t cbBuf) RT_NOEXCEPT; +}; + + + +/** + * DHCP server message for encoding. + */ +class DhcpServerMessage + : public DhcpMessage +{ +protected: + RTNETADDRIPV4 m_dst; + OptServerId m_optServerId; + optmap_t m_optmap; + +public: + DhcpServerMessage(const DhcpClientMessage &req, uint8_t messageType, RTNETADDRIPV4 serverAddr); + + /** @name Accessors + * @{ */ + RTNETADDRIPV4 dst() const RT_NOEXCEPT { return m_dst; } + void setDst(RTNETADDRIPV4 aDst) RT_NOEXCEPT { m_dst = aDst; } + + void maybeUnicast(const DhcpClientMessage &req) RT_NOEXCEPT; + + void addOption(DhcpOption *opt); + void addOption(const DhcpOption &opt) { addOption(opt.clone()); } + + void addOptions(const optmap_t &optmap); + /** @} */ + + int encode(octets_t &data); + void dump() const RT_NOEXCEPT; +}; + +#endif /* !VBOX_INCLUDED_SRC_Dhcpd_DhcpMessage_h */ diff --git a/src/VBox/NetworkServices/Dhcpd/DhcpOptions.cpp b/src/VBox/NetworkServices/Dhcpd/DhcpOptions.cpp new file mode 100644 index 00000000..96e34562 --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/DhcpOptions.cpp @@ -0,0 +1,515 @@ +/* $Id: DhcpOptions.cpp $ */ +/** @file + * DHCP server - DHCP options + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "DhcpdInternal.h" +#include "DhcpOptions.h" +#ifndef IN_VBOXSVC +# include "DhcpMessage.h" +#endif + +#include <iprt/cidr.h> + + +#ifndef IN_VBOXSVC + +optmap_t &operator<<(optmap_t &optmap, DhcpOption *option) +{ + if (option == NULL) + return optmap; + + if (option->present()) + optmap[option->optcode()] = std::shared_ptr<DhcpOption>(option); + else + optmap.erase(option->optcode()); + + return optmap; +} + + +optmap_t &operator<<(optmap_t &optmap, const std::shared_ptr<DhcpOption> &option) +{ + if (!option) + return optmap; + + if (option->present()) + optmap[option->optcode()] = option; + else + optmap.erase(option->optcode()); + + return optmap; +} + +#endif /* !IN_VBOXSVC */ + + +int DhcpOption::encode(octets_t &dst) const +{ + if (!m_fPresent) + return VERR_INVALID_STATE; + + size_t cbOrig = dst.size(); + + append(dst, m_OptCode); + appendLength(dst, 0); /* placeholder */ + + ssize_t cbValue = encodeValue(dst); + if (cbValue < 0 || UINT8_MAX <= cbValue) + { + dst.resize(cbOrig); /* undo */ + return VERR_INVALID_PARAMETER; + } + + dst[cbOrig+1] = (uint8_t)cbValue; + return VINF_SUCCESS; +} + + +/* static */ +const octets_t *DhcpOption::findOption(const rawopts_t &aOptMap, uint8_t aOptCode) +{ + rawopts_t::const_iterator it(aOptMap.find(aOptCode)); + if (it == aOptMap.end()) + return NULL; + + return &it->second; +} + + +int DhcpOption::decode(const rawopts_t &map) +{ + const octets_t *rawopt = DhcpOption::findOption(map, m_OptCode); + if (rawopt == NULL) + return VERR_NOT_FOUND; + + int rc = decodeValue(*rawopt, rawopt->size()); + if (RT_FAILURE(rc)) + return VERR_INVALID_PARAMETER; + + return VINF_SUCCESS; +} + + +#ifndef IN_VBOXSVC +int DhcpOption::decode(const DhcpClientMessage &req) +{ + return decode(req.rawopts()); +} +#endif + + +int DhcpOption::parse1(bool &aValue, const char *pcszValue) +{ + pcszValue = RTStrStripL(pcszValue); + if ( strcmp(pcszValue, "true") == 0 + || strcmp(pcszValue, "1") == 0 + || strcmp(pcszValue, "yes") == 0 + || strcmp(pcszValue, "on") == 0 ) + { + aValue = true; + return VINF_SUCCESS; + } + + if ( strcmp(pcszValue, "false") == 0 + || strcmp(pcszValue, "0") == 0 + || strcmp(pcszValue, "no") == 0 + || strcmp(pcszValue, "off") == 0 ) + { + aValue = false; + return VINF_SUCCESS; + } + + uint8_t bTmp; + int rc = RTStrToUInt8Full(RTStrStripL(pcszValue), 10, &bTmp); + if (rc == VERR_TRAILING_SPACES) + rc = VINF_SUCCESS; + if (RT_SUCCESS(rc)) + aValue = bTmp != 0; + + return rc; +} + + +int DhcpOption::parse1(uint8_t &aValue, const char *pcszValue) +{ + int rc = RTStrToUInt8Full(RTStrStripL(pcszValue), 10, &aValue); + if (rc == VERR_TRAILING_SPACES) + rc = VINF_SUCCESS; + return rc; +} + + +int DhcpOption::parse1(uint16_t &aValue, const char *pcszValue) +{ + int rc = RTStrToUInt16Full(RTStrStripL(pcszValue), 10, &aValue); + + if (rc == VERR_TRAILING_SPACES) + rc = VINF_SUCCESS; + return rc; +} + + +int DhcpOption::parse1(uint32_t &aValue, const char *pcszValue) +{ + int rc = RTStrToUInt32Full(RTStrStripL(pcszValue), 10, &aValue); + + if (rc == VERR_TRAILING_SPACES) + rc = VINF_SUCCESS; + return rc; +} + + +int DhcpOption::parse1(RTNETADDRIPV4 &aValue, const char *pcszValue) +{ + return RTNetStrToIPv4Addr(pcszValue, &aValue); +} + + +int DhcpOption::parse1(DhcpIpv4AddrAndMask &aValue, const char *pcszValue) +{ + return RTCidrStrToIPv4(pcszValue, &aValue.Ipv4, &aValue.Mask); +} + + +template <typename a_Type> +/*static*/ int DhcpOption::parseList(std::vector<a_Type> &aList, const char *pcszValue) +{ + std::vector<a_Type> vecTmp; + + pcszValue = RTStrStripL(pcszValue); + for (;;) + { + /* Assume space, tab, comma or semicolon is used as separator (superset of RTStrStrip): */ + const char *pszNext = strpbrk(pcszValue, " ,;:\t\n\r"); + char szTmp[256]; + if (pszNext) + { + size_t cchToCopy = (size_t)(pszNext - pcszValue); + if (cchToCopy >= sizeof(szTmp)) + return VERR_INVALID_PARAMETER; + memcpy(szTmp, pcszValue, cchToCopy); + szTmp[cchToCopy] = '\0'; + pcszValue = szTmp; + + /* Advance pszNext past the separator character and fluff: */ + char ch; + do + pszNext++; + while ((ch = *pszNext) == ' ' || ch == ':' || ch == ';' || ch == '\t' || ch == '\n' || ch == '\r'); + if (ch == '\0') + pszNext = NULL; + } + + /* Try convert it: */ + a_Type Value; + int rc = DhcpOption::parse1(Value, pcszValue); + if (RT_SUCCESS(rc)) + vecTmp.push_back(Value); + else + return VERR_INVALID_PARAMETER; + + if (pszNext) + pcszValue = pszNext; + else + break; + } + + aList.swap(vecTmp); + return VINF_SUCCESS; + +} + +/** ASSUME that uint8_t means hex byte strings. */ +template <> +/*static*/ int DhcpOption::parseList(std::vector<uint8_t> &aList, const char *pcszValue) +{ + uint8_t abBuf[255]; + size_t cbReturned = 0; + int rc = RTStrConvertHexBytesEx(RTStrStripL(pcszValue), abBuf, sizeof(abBuf), RTSTRCONVERTHEXBYTES_F_SEP_COLON, + NULL, &cbReturned); + if (RT_SUCCESS(rc)) + { + if (rc != VWRN_TRAILING_CHARS) + { + for (size_t i = 0; i < cbReturned; i++) + aList.push_back(abBuf[i]); + rc = VINF_SUCCESS; + } + else + rc = VERR_TRAILING_CHARS; + } + return rc; +} + + + +/* + * XXX: See DHCPServer::encodeOption() + */ +int DhcpOption::parseHex(octets_t &aRawValue, const char *pcszValue) +{ + uint8_t abBuf[255]; + size_t cbReturned = 0; + int rc = RTStrConvertHexBytesEx(RTStrStripL(pcszValue), abBuf, sizeof(abBuf), RTSTRCONVERTHEXBYTES_F_SEP_COLON, + NULL, &cbReturned); + if (RT_SUCCESS(rc)) + { + if (rc != VWRN_TRAILING_CHARS) + { + for (size_t i = 0; i < cbReturned; i++) + aRawValue.push_back(abBuf[i]); + rc = VINF_SUCCESS; + } + else + rc = VERR_TRAILING_CHARS; + } + return rc; +} + + +/*static*/ DhcpOption *DhcpOption::parse(uint8_t aOptCode, int aEnc, const char *pcszValue, int *prc /*= NULL*/) +{ + int rcIgn; + if (!prc) + prc = &rcIgn; + + switch (aEnc) + { + case 0: /* DHCPOptionEncoding_Normal */ + switch (aOptCode) + { +#define HANDLE(a_OptClass) \ + case a_OptClass::optcode: \ + return a_OptClass::parse(pcszValue, prc) + + HANDLE(OptSubnetMask); // 1 + HANDLE(OptTimeOffset); // 2 + HANDLE(OptRouters); // 3 + HANDLE(OptTimeServers); // 4 + HANDLE(OptNameServers); // 5 + HANDLE(OptDNSes); // 6 + HANDLE(OptLogServers); // 7 + HANDLE(OptCookieServers); // 8 + HANDLE(OptLPRServers); // 9 + HANDLE(OptImpressServers); // 10 + HANDLE(OptResourceLocationServers); // 11 + HANDLE(OptHostName); // 12 + HANDLE(OptBootFileSize); // 13 + HANDLE(OptMeritDumpFile); // 14 + HANDLE(OptDomainName); // 15 + HANDLE(OptSwapServer); // 16 + HANDLE(OptRootPath); // 17 + HANDLE(OptExtensionPath); // 18 + HANDLE(OptIPForwarding); // 19 + HANDLE(OptNonLocalSourceRouting); // 20 + HANDLE(OptPolicyFilter); // 21 + HANDLE(OptMaxDgramReassemblySize); // 22 + HANDLE(OptDefaultIPTTL); // 23 + HANDLE(OptPathMTUAgingTimeout); // 24 + HANDLE(OptPathMTUPlateauTable); // 25 + HANDLE(OptInterfaceMTU); // 26 + HANDLE(OptAllSubnetsAreLocal); // 27 + HANDLE(OptBroadcastAddress); // 28 + HANDLE(OptPerformMaskDiscovery); // 29 + HANDLE(OptMaskSupplier); // 30 + HANDLE(OptPerformRouterDiscovery); // 31 + HANDLE(OptRouterSolicitationAddress); // 32 + HANDLE(OptStaticRoute); // 33 + HANDLE(OptTrailerEncapsulation); // 34 + HANDLE(OptARPCacheTimeout); // 35 + HANDLE(OptEthernetEncapsulation); // 36 + HANDLE(OptTCPDefaultTTL); // 37 + HANDLE(OptTCPKeepaliveInterval); // 38 + HANDLE(OptTCPKeepaliveGarbage); // 39 + HANDLE(OptNISDomain); // 40 + HANDLE(OptNISServers); // 41 + HANDLE(OptNTPServers); // 42 + //HANDLE(OptVendorSpecificInfo); // 43 - Only DHCPOptionEncoding_hex + HANDLE(OptNetBIOSNameServers); // 44 + HANDLE(OptNetBIOSDatagramServers); // 45 + HANDLE(OptNetBIOSNodeType); // 46 + //HANDLE(OptNetBIOSScope); // 47 - Only DHCPOptionEncoding_hex + HANDLE(OptXWindowsFontServers); // 48 + HANDLE(OptXWindowsDisplayManager); // 49 +#ifndef IN_VBOXSVC /* Don't allow these in new configs */ + // OptRequestedAddress (50) is client only and not configurable. + HANDLE(OptLeaseTime); // 51 - for historical reasons? Configuable elsewhere now. + // OptOptionOverload (52) is part of the protocol and not configurable. + // OptMessageType (53) is part of the protocol and not configurable. + // OptServerId (54) is the IP address of the server and configurable elsewhere. + // OptParameterRequest (55) is client only and not configurable. + // OptMessage (56) is server failure message and not configurable. + // OptMaxDHCPMessageSize (57) is client only (?) and not configurable. + HANDLE(OptRenewalTime); // 58 - for historical reasons? + HANDLE(OptRebindingTime); // 59 - for historical reasons? + // OptVendorClassId (60) is client only and not configurable. + // OptClientId (61) is client only and not configurable. +#endif + HANDLE(OptNetWareIPDomainName); // 62 + //HANDLE(OptNetWareIPInformation); // 63 - Only DHCPOptionEncoding_hex + HANDLE(OptNISPlusDomain); // 64 + HANDLE(OptNISPlusServers); // 65 + HANDLE(OptTFTPServerName); // 66 - perhaps we should use an alternative way to configure these. + HANDLE(OptBootfileName); // 67 - perhaps we should use an alternative way to configure these. + HANDLE(OptMobileIPHomeAgents); // 68 + HANDLE(OptSMTPServers); // 69 + HANDLE(OptPOP3Servers); // 70 + HANDLE(OptNNTPServers); // 71 + HANDLE(OptWWWServers); // 72 + HANDLE(OptFingerServers); // 73 + HANDLE(OptIRCServers); // 74 + HANDLE(OptStreetTalkServers); // 75 + HANDLE(OptSTDAServers); // 76 + // OptUserClassId (77) is client only and not configurable. + //HANDLE(OptSLPDirectoryAgent); // 78 - Only DHCPOptionEncoding_hex + //HANDLE(OptSLPServiceScope); // 79 - Only DHCPOptionEncoding_hex + // OptRapidCommit (80) is not configurable. + + //HANDLE(OptDomainSearch); // 119 - Only DHCPOptionEncoding_hex + +#undef HANDLE + default: + if (prc) + *prc = VERR_NOT_IMPLEMENTED; + return NULL; + } + break; + + case 1: + return RawOption::parse(aOptCode, pcszValue, prc); + + default: + if (prc) + *prc = VERR_WRONG_TYPE; + return NULL; + } +} + + +/** + * Gets the option name (simply "unknown" if not known) for logging purposes. + */ +/*static*/ const char *DhcpOption::name(uint8_t aOptCode) +{ + switch (aOptCode) + { +#define HANDLE(a_OptClass) \ + case a_OptClass::optcode: \ + return &#a_OptClass[3] + + HANDLE(OptSubnetMask); // 1 + HANDLE(OptTimeOffset); // 2 + HANDLE(OptRouters); // 3 + HANDLE(OptTimeServers); // 4 + HANDLE(OptNameServers); // 5 + HANDLE(OptDNSes); // 6 + HANDLE(OptLogServers); // 7 + HANDLE(OptCookieServers); // 8 + HANDLE(OptLPRServers); // 9 + HANDLE(OptImpressServers); // 10 + HANDLE(OptResourceLocationServers); // 11 + HANDLE(OptHostName); // 12 + HANDLE(OptBootFileSize); // 13 + HANDLE(OptMeritDumpFile); // 14 + HANDLE(OptDomainName); // 15 + HANDLE(OptSwapServer); // 16 + HANDLE(OptRootPath); // 17 + HANDLE(OptExtensionPath); // 18 + HANDLE(OptIPForwarding); // 19 + HANDLE(OptNonLocalSourceRouting); // 20 + HANDLE(OptPolicyFilter); // 21 + HANDLE(OptMaxDgramReassemblySize); // 22 + HANDLE(OptDefaultIPTTL); // 23 + HANDLE(OptPathMTUAgingTimeout); // 24 + HANDLE(OptPathMTUPlateauTable); // 25 + HANDLE(OptInterfaceMTU); // 26 + HANDLE(OptAllSubnetsAreLocal); // 27 + HANDLE(OptBroadcastAddress); // 28 + HANDLE(OptPerformMaskDiscovery); // 29 + HANDLE(OptMaskSupplier); // 30 + HANDLE(OptPerformRouterDiscovery); // 31 + HANDLE(OptRouterSolicitationAddress); // 32 + HANDLE(OptStaticRoute); // 33 + HANDLE(OptTrailerEncapsulation); // 34 + HANDLE(OptARPCacheTimeout); // 35 + HANDLE(OptEthernetEncapsulation); // 36 + HANDLE(OptTCPDefaultTTL); // 37 + HANDLE(OptTCPKeepaliveInterval); // 38 + HANDLE(OptTCPKeepaliveGarbage); // 39 + HANDLE(OptNISDomain); // 40 + HANDLE(OptNISServers); // 41 + HANDLE(OptNTPServers); // 42 + HANDLE(OptVendorSpecificInfo); // 43 + HANDLE(OptNetBIOSNameServers); // 44 + HANDLE(OptNetBIOSDatagramServers); // 45 + HANDLE(OptNetBIOSNodeType); // 46 + HANDLE(OptNetBIOSScope); // 47 + HANDLE(OptXWindowsFontServers); // 48 + HANDLE(OptXWindowsDisplayManager); // 49 + HANDLE(OptRequestedAddress); // 50 + HANDLE(OptLeaseTime); // 51 + //HANDLE(OptOptionOverload); // 52 + HANDLE(OptMessageType); // 53 + HANDLE(OptServerId); // 54 + HANDLE(OptParameterRequest); // 55 + HANDLE(OptMessage); // 56 + HANDLE(OptMaxDHCPMessageSize); // 57 + HANDLE(OptRenewalTime); // 58 + HANDLE(OptRebindingTime); // 59 + HANDLE(OptVendorClassId); // 60 + HANDLE(OptClientId); // 61 + HANDLE(OptNetWareIPDomainName); // 62 + HANDLE(OptNetWareIPInformation); // 63 + HANDLE(OptNISPlusDomain); // 64 + HANDLE(OptNISPlusServers); // 65 + HANDLE(OptTFTPServerName); // 66 + HANDLE(OptBootfileName); // 67 + HANDLE(OptMobileIPHomeAgents); // 68 + HANDLE(OptSMTPServers); // 69 + HANDLE(OptPOP3Servers); // 70 + HANDLE(OptNNTPServers); // 71 + HANDLE(OptWWWServers); // 72 + HANDLE(OptFingerServers); // 73 + HANDLE(OptIRCServers); // 74 + HANDLE(OptStreetTalkServers); // 75 + HANDLE(OptSTDAServers); // 76 + HANDLE(OptUserClassId); // 77 + HANDLE(OptSLPDirectoryAgent); // 78 - Only DHCPOptionEncoding_hex + HANDLE(OptSLPServiceScope); // 79 - Only DHCPOptionEncoding_hex + HANDLE(OptRapidCommit); // 80 + + HANDLE(OptDomainSearch); // 119 - Only DHCPOptionEncoding_hex + +#undef HANDLE + default: + return "unknown"; + } +} + diff --git a/src/VBox/NetworkServices/Dhcpd/DhcpOptions.h b/src/VBox/NetworkServices/Dhcpd/DhcpOptions.h new file mode 100644 index 00000000..45408842 --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/DhcpOptions.h @@ -0,0 +1,832 @@ +/* $Id: DhcpOptions.h $ */ +/** @file + * DHCP server - DHCP options + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Dhcpd_DhcpOptions_h +#define VBOX_INCLUDED_SRC_Dhcpd_DhcpOptions_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include "DhcpdInternal.h" + +#include <iprt/asm.h> +#include <iprt/err.h> +#include <iprt/net.h> +#include <iprt/string.h> +#include <iprt/cpp/ministring.h> + + +class DhcpClientMessage; + +typedef struct DhcpIpv4AddrAndMask +{ + RTNETADDRIPV4 Ipv4; + RTNETADDRIPV4 Mask; +} DhcpIpv4AddrAndMask; + + +class DhcpOption +{ +protected: + uint8_t m_OptCode; + bool m_fPresent; + +public: + explicit DhcpOption(uint8_t aOptCode) + : m_OptCode(aOptCode), m_fPresent(true) + {} + + DhcpOption(uint8_t aOptCode, bool fPresent) + : m_OptCode(aOptCode), m_fPresent(fPresent) + {} + + virtual DhcpOption *clone() const = 0; + + virtual ~DhcpOption() + {} + +public: + static DhcpOption *parse(uint8_t aOptCode, int aEnc, const char *pcszValue, int *prc = NULL); + static const char *name(uint8_t bOptcode); + +public: + uint8_t optcode() const RT_NOEXCEPT { return m_OptCode; } + bool present() const RT_NOEXCEPT { return m_fPresent; } + +public: + int encode(octets_t &dst) const; + + int decode(const rawopts_t &map); + int decode(const DhcpClientMessage &req); + +protected: + virtual ssize_t encodeValue(octets_t &dst) const = 0; + virtual int decodeValue(const octets_t &src, size_t cb) = 0; + +protected: + static const octets_t *findOption(const rawopts_t &aOptMap, uint8_t aOptCode); + +protected: + /** @name Serialization + * @{ */ + static void append(octets_t &aDst, bool aValue) + { + uint8_t b = aValue ? 1 : 0; + aDst.push_back(b); + } + + static void append(octets_t &aDst, uint8_t aValue) + { + aDst.push_back(aValue); + } + + static void append(octets_t &aDst, uint16_t aValue) + { + RTUINT16U u16 = { RT_H2N_U16(aValue) }; + aDst.insert(aDst.end(), u16.au8, u16.au8 + sizeof(aValue)); + } + + static void append(octets_t &aDst, uint32_t aValue) + { + RTUINT32U u32 = { RT_H2N_U32(aValue) }; + aDst.insert(aDst.end(), u32.au8, u32.au8 + sizeof(aValue)); + } + + static void append(octets_t &aDst, RTNETADDRIPV4 aIPv4) + { + aDst.insert(aDst.end(), aIPv4.au8, aIPv4.au8 + sizeof(aIPv4)); + } + + static void append(octets_t &aDst, DhcpIpv4AddrAndMask aIPv4) + { + aDst.insert(aDst.end(), (uint8_t *)&aIPv4, (uint8_t *)&aIPv4 + sizeof(aIPv4)); + } + + static void append(octets_t &aDst, const char *pszString, size_t cb) + { + aDst.insert(aDst.end(), pszString, pszString + cb); + } + + static void append(octets_t &aDst, const RTCString &str) + { + append(aDst, str.c_str(), str.length()); + } + + /* non-overloaded name to avoid ambiguity */ + static void appendLength(octets_t &aDst, size_t cb) + { + append(aDst, static_cast<uint8_t>(cb)); + } + + /** @} */ + + + /** @name Deserialization + * @{ */ + static void extract(bool &aValue, octets_t::const_iterator &pos) + { + aValue = *pos != 0; + pos += sizeof(uint8_t); + } + + static void extract(uint8_t &aValue, octets_t::const_iterator &pos) + { + aValue = *pos; + pos += sizeof(uint8_t); + } + + static void extract(uint16_t &aValue, octets_t::const_iterator &pos) + { + RTUINT16U u16; + memcpy(u16.au8, &pos[0], sizeof(uint16_t)); + aValue = RT_N2H_U16(u16.u); + pos += sizeof(uint16_t); + } + + static void extract(uint32_t &aValue, octets_t::const_iterator &pos) + { + RTUINT32U u32; + memcpy(u32.au8, &pos[0], sizeof(uint32_t)); + aValue = RT_N2H_U32(u32.u); + pos += sizeof(uint32_t); + } + + static void extract(RTNETADDRIPV4 &aValue, octets_t::const_iterator &pos) + { + memcpy(aValue.au8, &pos[0], sizeof(RTNETADDRIPV4)); + pos += sizeof(RTNETADDRIPV4); + } + + static void extract(DhcpIpv4AddrAndMask &aValue, octets_t::const_iterator &pos) + { + memcpy(&aValue, &pos[0], sizeof(aValue)); + pos += sizeof(aValue); + } + +#if 0 /** @todo fix me */ + static void extract(RTCString &aString, octets_t::const_iterator &pos, size_t cb) + { + aString.replace(aString.begin(), aString.end(), &pos[0], &pos[cb]); + pos += cb; + } +#endif + + /** @} */ + + /** @name Parse textual representation (e.g. in config file) + * @{ */ + static int parse1(bool &aValue, const char *pcszValue); + static int parse1(uint8_t &aValue, const char *pcszValue); + static int parse1(uint16_t &aValue, const char *pcszValue); + static int parse1(uint32_t &aValue, const char *pcszValue); + static int parse1(RTNETADDRIPV4 &aValue, const char *pcszValue); + static int parse1(DhcpIpv4AddrAndMask &aValue, const char *pcszValue); + + template <typename a_Type> static int parseList(std::vector<a_Type> &aList, const char *pcszValue); + + static int parseHex(octets_t &aRawValue, const char *pcszValue); + + /** @} */ +}; + + +inline octets_t &operator<<(octets_t &dst, const DhcpOption &option) +{ + option.encode(dst); + return dst; +} + + +#ifndef IN_VBOXSVC +optmap_t &operator<<(optmap_t &optmap, DhcpOption *option); +optmap_t &operator<<(optmap_t &optmap, const std::shared_ptr<DhcpOption> &option); +#endif + + + +/** + * Only for << OptEnd() syntactic sugar... + */ +struct OptEnd {}; +inline octets_t &operator<<(octets_t &dst, const OptEnd &end) +{ + RT_NOREF(end); + + dst.push_back(RTNET_DHCP_OPT_END); + return dst; +} + + + +/** + * Option that has no value + */ +class OptNoValueBase + : public DhcpOption +{ +public: + explicit OptNoValueBase(uint8_t aOptCode) + : DhcpOption(aOptCode, false) + {} + + OptNoValueBase(uint8_t aOptCode, bool fPresent) + : DhcpOption(aOptCode, fPresent) + {} + + OptNoValueBase(uint8_t aOptCode, const DhcpClientMessage &req) + : DhcpOption(aOptCode, false) + { + decode(req); + } + + virtual OptNoValueBase *clone() const + { + return new OptNoValueBase(*this); + } + +protected: + virtual ssize_t encodeValue(octets_t &dst) const + { + RT_NOREF(dst); + return 0; + } + +public: + static bool isLengthValid(size_t cb) + { + return cb == 0; + } + + virtual int decodeValue(const octets_t &src, size_t cb) + { + RT_NOREF(src); + + if (!isLengthValid(cb)) + return VERR_INVALID_PARAMETER; + + m_fPresent = true; + return VINF_SUCCESS; + } +}; + +template <uint8_t _OptCode> +class OptNoValue + : public OptNoValueBase +{ +public: + static const uint8_t optcode = _OptCode; + + OptNoValue() + : OptNoValueBase(optcode) + {} + + explicit OptNoValue(bool fPresent) /* there's no overloaded ctor with value */ + : OptNoValueBase(optcode, fPresent) + {} + + explicit OptNoValue(const DhcpClientMessage &req) + : OptNoValueBase(optcode, req) + {} +}; + + + +/* + * Option that contains single value of fixed-size type T + */ +template <typename T> +class OptValueBase + : public DhcpOption +{ +public: + typedef T value_t; + +protected: + T m_Value; + + explicit OptValueBase(uint8_t aOptCode) + : DhcpOption(aOptCode, false), m_Value() + {} + + OptValueBase(uint8_t aOptCode, const T &aOptValue) + : DhcpOption(aOptCode), m_Value(aOptValue) + {} + + OptValueBase(uint8_t aOptCode, const DhcpClientMessage &req) + : DhcpOption(aOptCode, false), m_Value() + { + decode(req); + } + +public: + virtual OptValueBase *clone() const + { + return new OptValueBase(*this); + } + +public: + T &value() { return m_Value; } + const T &value() const { return m_Value; } + +protected: + virtual ssize_t encodeValue(octets_t &dst) const + { + append(dst, m_Value); + return sizeof(T); + } + +public: + static bool isLengthValid(size_t cb) + { + return cb == sizeof(T); + } + + virtual int decodeValue(const octets_t &src, size_t cb) + { + if (!isLengthValid(cb)) + return VERR_INVALID_PARAMETER; + + octets_t::const_iterator pos(src.begin()); + extract(m_Value, pos); + + m_fPresent = true; + return VINF_SUCCESS; + } +}; + +template<uint8_t _OptCode, typename T> +class OptValue + : public OptValueBase<T> +{ +public: + using typename OptValueBase<T>::value_t; + +public: + static const uint8_t optcode = _OptCode; + + OptValue() + : OptValueBase<T>(optcode) + {} + + explicit OptValue(const T &aOptValue) + : OptValueBase<T>(optcode, aOptValue) + {} + + explicit OptValue(const DhcpClientMessage &req) + : OptValueBase<T>(optcode, req) + {} + + static OptValue *parse(const char *pcszValue, int *prc) + { + typename OptValueBase<T>::value_t v; + int rc = DhcpOption::parse1(v, pcszValue); + *prc = rc; + if (RT_SUCCESS(rc)) + return new OptValue(v); + return NULL; + } +}; + + + +/** + * Option that contains a string. + */ +class OptStringBase + : public DhcpOption +{ +public: + typedef RTCString value_t; + +protected: + RTCString m_String; + + explicit OptStringBase(uint8_t aOptCode) + : DhcpOption(aOptCode, false), m_String() + {} + + OptStringBase(uint8_t aOptCode, const RTCString &aOptString) + : DhcpOption(aOptCode), m_String(aOptString) + {} + + OptStringBase(uint8_t aOptCode, const DhcpClientMessage &req) + : DhcpOption(aOptCode, false), m_String() + { + decode(req); + } + +public: + virtual OptStringBase *clone() const + { + return new OptStringBase(*this); + } + +public: + RTCString &value() { return m_String; } + const RTCString &value() const { return m_String; } + +protected: + virtual ssize_t encodeValue(octets_t &dst) const + { + if (!isLengthValid(m_String.length())) + return -1; + + append(dst, m_String); + return (ssize_t)m_String.length(); + } + +public: + static bool isLengthValid(size_t cb) + { + return cb <= UINT8_MAX; + } + + virtual int decodeValue(const octets_t &src, size_t cb) + { + if (!isLengthValid(cb)) + return VERR_INVALID_PARAMETER; + + int rc = m_String.assignNoThrow((char *)&src.front(), cb); /** @todo encoding. */ + m_fPresent = true; + return rc; + } +}; + +template<uint8_t _OptCode> +class OptString + : public OptStringBase +{ +public: + static const uint8_t optcode = _OptCode; + + OptString() + : OptStringBase(optcode) + {} + + explicit OptString(const RTCString &aOptString) + : OptStringBase(optcode, aOptString) + {} + + explicit OptString(const DhcpClientMessage &req) + : OptStringBase(optcode, req) + {} + + static OptString *parse(const char *pcszValue, int *prc) + { + *prc = VINF_SUCCESS; + return new OptString(pcszValue); + } +}; + + + +/* + * Option that contains a list of values of type T + */ +template <typename T> +class OptListBase + : public DhcpOption +{ +public: + typedef std::vector<T> value_t; + +protected: + std::vector<T> m_List; + + explicit OptListBase(uint8_t aOptCode) + : DhcpOption(aOptCode, false), m_List() + {} + + OptListBase(uint8_t aOptCode, const T &aOptSingle) + : DhcpOption(aOptCode), m_List(1, aOptSingle) + {} + + OptListBase(uint8_t aOptCode, const std::vector<T> &aOptList) + : DhcpOption(aOptCode), m_List(aOptList) + {} + + OptListBase(uint8_t aOptCode, const DhcpClientMessage &req) + : DhcpOption(aOptCode, false), m_List() + { + decode(req); + } + +public: + virtual OptListBase *clone() const + { + return new OptListBase(*this); + } + +public: + std::vector<T> &value() { return m_List; } + const std::vector<T> &value() const { return m_List; } + +protected: + virtual ssize_t encodeValue(octets_t &dst) const + { + const size_t cbItem = sizeof(T); + size_t cbValue = 0; + + for (size_t i = 0; i < m_List.size(); ++i) + { + if (cbValue + cbItem > UINT8_MAX) + break; + + append(dst, m_List[i]); + cbValue += cbItem; + } + + return (ssize_t)cbValue; + } + +public: + static bool isLengthValid(size_t cb) + { + return cb % sizeof(T) == 0; + } + + virtual int decodeValue(const octets_t &src, size_t cb) + { + if (!isLengthValid(cb)) + return VERR_INVALID_PARAMETER; + + m_List.erase(m_List.begin(), m_List.end()); + + octets_t::const_iterator pos(src.begin()); + for (size_t i = 0; i < cb / sizeof(T); ++i) + { + T item; + extract(item, pos); + m_List.push_back(item); + } + m_fPresent = true; + return VINF_SUCCESS; + } +}; + +template<uint8_t _OptCode, typename T> +class OptList + : public OptListBase<T> + +{ +public: + using typename OptListBase<T>::value_t; + +public: + static const uint8_t optcode = _OptCode; + + OptList() + : OptListBase<T>(optcode) + {} + + explicit OptList(const T &aOptSingle) + : OptListBase<T>(optcode, aOptSingle) + {} + + explicit OptList(const std::vector<T> &aOptList) + : OptListBase<T>(optcode, aOptList) + {} + + explicit OptList(const DhcpClientMessage &req) + : OptListBase<T>(optcode, req) + {} + + static OptList *parse(const char *pcszValue, int *prc) + { + typename OptListBase<T>::value_t v; + int rc = DhcpOption::parseList<T>(v, pcszValue); + if (RT_SUCCESS(rc)) + { + if (!v.empty()) + { + *prc = rc; + return new OptList(v); + } + rc = VERR_NO_DATA; + } + *prc = rc; + return NULL; + } +}; + + +template<uint8_t _OptCode, typename T> +class OptPairList + : public OptListBase<T> + +{ +public: + using typename OptListBase<T>::value_t; + +public: + static const uint8_t optcode = _OptCode; + + OptPairList() + : OptListBase<T>(optcode) + {} + + explicit OptPairList(const T &aOptSingle) + : OptListBase<T>(optcode, aOptSingle) + {} + + explicit OptPairList(const std::vector<T> &aOptList) + : OptListBase<T>(optcode, aOptList) + {} + + explicit OptPairList(const DhcpClientMessage &req) + : OptListBase<T>(optcode, req) + {} + + static OptPairList *parse(const char *pcszValue, int *prc) + { + typename OptListBase<T>::value_t v; + int rc = DhcpOption::parseList<T>(v, pcszValue); + if (RT_SUCCESS(rc)) + { + if (!v.empty()) + { + if ((v.size() & 1) == 0) + { + *prc = rc; + return new OptPairList(v); + } + rc = VERR_UNEVEN_INPUT; + } + else + rc = VERR_NO_DATA; + } + *prc = rc; + return NULL; + } +}; + + +/* + * Options specified by raw binary data that we don't know how to + * interpret. + */ +class RawOption + : public DhcpOption +{ +protected: + octets_t m_Data; + +public: + explicit RawOption(uint8_t aOptCode) + : DhcpOption(aOptCode, false), m_Data() + {} + + RawOption(uint8_t aOptCode, const octets_t &aSrc) + : DhcpOption(aOptCode), m_Data(aSrc) + {} + +public: + virtual RawOption *clone() const + { + return new RawOption(*this); + } + + +protected: + virtual ssize_t encodeValue(octets_t &dst) const + { + dst.insert(dst.end(), m_Data.begin(), m_Data.end()); + return (ssize_t)m_Data.size(); + } + + virtual int decodeValue(const octets_t &src, size_t cb) + { + octets_t::const_iterator beg(src.begin()); + octets_t data(beg, beg + (ssize_t)cb); + m_Data.swap(data); + + m_fPresent = true; + return VINF_SUCCESS; + } + +public: + static RawOption *parse(uint8_t aOptCode, const char *pcszValue, int *prc) + { + octets_t data; + int rc = DhcpOption::parseHex(data, pcszValue); + *prc = rc; + if (RT_SUCCESS(rc)) + return new RawOption(aOptCode, data); + return NULL; + } +}; + + + +/** @name The DHCP options types. + * @{ + */ +typedef OptValue<1, RTNETADDRIPV4> OptSubnetMask; +typedef OptValue<2, uint32_t> OptTimeOffset; +typedef OptList<3, RTNETADDRIPV4> OptRouters; +typedef OptList<4, RTNETADDRIPV4> OptTimeServers; +typedef OptList<5, RTNETADDRIPV4> OptNameServers; +typedef OptList<6, RTNETADDRIPV4> OptDNSes; +typedef OptList<7, RTNETADDRIPV4> OptLogServers; +typedef OptList<8, RTNETADDRIPV4> OptCookieServers; +typedef OptList<9, RTNETADDRIPV4> OptLPRServers; +typedef OptList<10, RTNETADDRIPV4> OptImpressServers; +typedef OptList<11, RTNETADDRIPV4> OptResourceLocationServers; +typedef OptString<12> OptHostName; +typedef OptValue<13, uint16_t> OptBootFileSize; +typedef OptString<14> OptMeritDumpFile; +typedef OptString<15> OptDomainName; +typedef OptValue<16, RTNETADDRIPV4> OptSwapServer; +typedef OptString<17> OptRootPath; +typedef OptString<18> OptExtensionPath; +typedef OptValue<19, bool> OptIPForwarding; +typedef OptValue<20, bool> OptNonLocalSourceRouting; +typedef OptList<21, DhcpIpv4AddrAndMask> OptPolicyFilter; +typedef OptValue<22, uint16_t> OptMaxDgramReassemblySize; +typedef OptValue<23, uint16_t> OptDefaultIPTTL; +typedef OptValue<24, uint32_t> OptPathMTUAgingTimeout; +typedef OptList<25, uint16_t> OptPathMTUPlateauTable; +typedef OptValue<26, uint16_t> OptInterfaceMTU; +typedef OptValue<27, bool> OptAllSubnetsAreLocal; +typedef OptValue<28, RTNETADDRIPV4> OptBroadcastAddress; +typedef OptValue<29, bool> OptPerformMaskDiscovery; +typedef OptValue<30, bool> OptMaskSupplier; +typedef OptValue<31, bool> OptPerformRouterDiscovery; +typedef OptValue<32, RTNETADDRIPV4> OptRouterSolicitationAddress; +typedef OptPairList<33, RTNETADDRIPV4> OptStaticRoute; +typedef OptValue<34, bool> OptTrailerEncapsulation; +typedef OptValue<35, uint32_t> OptARPCacheTimeout; +typedef OptValue<36, bool> OptEthernetEncapsulation; +typedef OptValue<37, uint8_t> OptTCPDefaultTTL; +typedef OptValue<38, uint32_t> OptTCPKeepaliveInterval; +typedef OptValue<39, bool> OptTCPKeepaliveGarbage; +typedef OptString<40> OptNISDomain; +typedef OptList<41, RTNETADDRIPV4> OptNISServers; +typedef OptList<42, RTNETADDRIPV4> OptNTPServers; +/* DHCP related options: */ +typedef OptList<43, uint8_t> OptVendorSpecificInfo; +typedef OptList<44, RTNETADDRIPV4> OptNetBIOSNameServers; +typedef OptList<45, RTNETADDRIPV4> OptNetBIOSDatagramServers; +typedef OptValue<46, uint8_t> OptNetBIOSNodeType; +typedef OptList<47, uint8_t> OptNetBIOSScope; /**< uint8_t or string? */ +typedef OptList<48, RTNETADDRIPV4> OptXWindowsFontServers; +typedef OptList<49, RTNETADDRIPV4> OptXWindowsDisplayManager; +typedef OptValue<50, RTNETADDRIPV4> OptRequestedAddress; +typedef OptValue<51, uint32_t> OptLeaseTime; +/* 52 - option overload is syntactic and handled internally */ +typedef OptValue<53, uint8_t> OptMessageType; +typedef OptValue<54, RTNETADDRIPV4> OptServerId; +typedef OptList<55, uint8_t> OptParameterRequest; +typedef OptString<56> OptMessage; +typedef OptValue<57, uint16_t> OptMaxDHCPMessageSize; +typedef OptValue<58, uint32_t> OptRenewalTime; +typedef OptValue<59, uint32_t> OptRebindingTime; +typedef OptList<60, uint8_t> OptVendorClassId; +typedef OptList<61, uint8_t> OptClientId; +typedef OptString<62> OptNetWareIPDomainName; /**< RFC2242 */ +typedef OptList<63, uint8_t> OptNetWareIPInformation; /**< complicated, so just byte list for now. RFC2242 */ +typedef OptString<64> OptNISPlusDomain; +typedef OptString<65> OptNISPlusServers; +typedef OptString<66> OptTFTPServerName; /**< when overloaded */ +typedef OptString<67> OptBootfileName; /**< when overloaded */ +typedef OptList<68, RTNETADDRIPV4> OptMobileIPHomeAgents; +typedef OptList<69, RTNETADDRIPV4> OptSMTPServers; +typedef OptList<70, RTNETADDRIPV4> OptPOP3Servers; +typedef OptList<71, RTNETADDRIPV4> OptNNTPServers; +typedef OptList<72, RTNETADDRIPV4> OptWWWServers; +typedef OptList<73, RTNETADDRIPV4> OptFingerServers; +typedef OptList<74, RTNETADDRIPV4> OptIRCServers; +typedef OptList<75, RTNETADDRIPV4> OptStreetTalkServers; +typedef OptList<76, RTNETADDRIPV4> OptSTDAServers; +typedef OptList<77, uint8_t> OptUserClassId; +typedef OptList<78, uint8_t> OptSLPDirectoryAgent; /**< complicated, so just byte list for now. RFC2610 */ +typedef OptList<79, uint8_t> OptSLPServiceScope; /**< complicated, so just byte list for now. RFC2610 */ +typedef OptNoValue<80> OptRapidCommit; /**< RFC4039 */ +typedef OptList<119, uint8_t> OptDomainSearch; /**< RFC3397 */ +/** @} */ + +#endif /* !VBOX_INCLUDED_SRC_Dhcpd_DhcpOptions_h */ diff --git a/src/VBox/NetworkServices/Dhcpd/DhcpdInternal.h b/src/VBox/NetworkServices/Dhcpd/DhcpdInternal.h new file mode 100644 index 00000000..dbeb7676 --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/DhcpdInternal.h @@ -0,0 +1,100 @@ +/* $Id: DhcpdInternal.h $ */ +/** @file + * DHCP server - Internal header. + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Dhcpd_DhcpdInternal_h +#define VBOX_INCLUDED_SRC_Dhcpd_DhcpdInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#ifndef IN_VBOXSVC +# define LOG_GROUP LOG_GROUP_NET_DHCPD +#elif !defined(LOG_GROUP) +# define LOG_GROUP LOG_GROUP_MAIN_DHCPCONFIG +#endif +#include <iprt/stdint.h> +#include <iprt/string.h> +#include <VBox/log.h> + +#include <map> +#include <vector> + +#ifndef IN_VBOXSVC + +# if __cplusplus >= 199711 +#include <memory> +using std::shared_ptr; +# else +# include <tr1/memory> +using std::tr1::shared_ptr; +# endif + +class DhcpOption; +/** DHCP option map (keyed by option number, DhcpOption value). */ +typedef std::map<uint8_t, std::shared_ptr<DhcpOption> > optmap_t; + +#endif /* !IN_VBOXSVC */ + +/** Byte vector. */ +typedef std::vector<uint8_t> octets_t; + +/** Raw DHCP option map (keyed by option number, byte vector value). */ +typedef std::map<uint8_t, octets_t> rawopts_t; + + +/** Equal compare operator for mac address. */ +DECLINLINE(bool) operator==(const RTMAC &l, const RTMAC &r) +{ + return memcmp(&l, &r, sizeof(RTMAC)) == 0; +} + +/** Less-than compare operator for mac address. */ +DECLINLINE(bool) operator<(const RTMAC &l, const RTMAC &r) +{ + return memcmp(&l, &r, sizeof(RTMAC)) < 0; +} + + +/** @name LogXRel + return NULL helpers + * @{ */ +#define DHCP_LOG_RET_NULL(a_MsgArgs) do { LogRel(a_MsgArgs); return NULL; } while (0) +#define DHCP_LOG2_RET_NULL(a_MsgArgs) do { LogRel2(a_MsgArgs); return NULL; } while (0) +#define DHCP_LOG3_RET_NULL(a_MsgArgs) do { LogRel3(a_MsgArgs); return NULL; } while (0) +/** @} */ + + +/** @name LogXRel + return a_rcRet helpers + * @{ */ +#define DHCP_LOG_RET(a_rcRet, a_MsgArgs) do { LogRel(a_MsgArgs); return (a_rcRet); } while (0) +#define DHCP_LOG2_RET(a_rcRet, a_MsgArgs) do { LogRel2(a_MsgArgs); return (a_rcRet); } while (0) +#define DHCP_LOG3_RET(a_rcRet, a_MsgArgs) do { LogRel3(a_MsgArgs); return (a_rcRet); } while (0) +/** @} */ + +/** LogRel + RTMsgError helper. */ +#define DHCP_LOG_MSG_ERROR(a_MsgArgs) do { LogRel(a_MsgArgs); RTMsgError a_MsgArgs; } while (0) + +#endif /* !VBOX_INCLUDED_SRC_Dhcpd_DhcpdInternal_h */ diff --git a/src/VBox/NetworkServices/Dhcpd/IPv4Pool.cpp b/src/VBox/NetworkServices/Dhcpd/IPv4Pool.cpp new file mode 100644 index 00000000..5d1e52a1 --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/IPv4Pool.cpp @@ -0,0 +1,209 @@ +/* $Id: IPv4Pool.cpp $ */ +/** @file + * DHCP server - A pool of IPv4 addresses. + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "DhcpdInternal.h" +#include <iprt/errcore.h> + +#include "IPv4Pool.h" + + +int IPv4Pool::init(const IPv4Range &aRange) RT_NOEXCEPT +{ + AssertReturn(aRange.isValid(), VERR_INVALID_PARAMETER); + + m_range = aRange; + try + { + m_pool.insert(m_range); + } + catch (std::bad_alloc &) + { + return VERR_NO_MEMORY; + } + return VINF_SUCCESS; +} + + +int IPv4Pool::init(RTNETADDRIPV4 aFirstAddr, RTNETADDRIPV4 aLastAddr) RT_NOEXCEPT +{ + return init(IPv4Range(aFirstAddr, aLastAddr)); +} + + +/** + * Internal worker for inserting a range into the pool of available addresses. + * + * @returns IPRT status code (asserted). + * @param a_Range The range to insert. + */ +int IPv4Pool::i_insert(const IPv4Range &a_Range) RT_NOEXCEPT +{ + /* + * Check preconditions. Asserting because nobody checks the return code. + */ + AssertReturn(m_range.isValid(), VERR_INVALID_STATE); + AssertReturn(a_Range.isValid(), VERR_INVALID_PARAMETER); + AssertReturn(m_range.contains(a_Range), VERR_INVALID_PARAMETER); + + /* + * Check that the incoming range doesn't overlap with existing ranges in the pool. + */ + it_t itHint = m_pool.upper_bound(IPv4Range(a_Range.LastAddr)); /* successor, insertion hint */ +#if 0 /** @todo r=bird: This code is wrong. It has no end() check for starters. Since the method is + * only for internal consumption, I've replaced it with a strict build assertion. */ + if (itHint != m_pool.begin()) + { + it_t prev(itHint); + --prev; + if (a_Range.FirstAddr <= prev->LastAddr) + { + LogRel(("%08x-%08x conflicts with %08x-%08x\n", + a_Range.FirstAddr, a_Range.LastAddr, + prev->FirstAddr, prev->LastAddr)); + return VERR_INVALID_PARAMETER; + } + } +#endif +#ifdef VBOX_STRICT + for (it_t it2 = m_pool.begin(); it2 != m_pool.end(); ++it2) + AssertMsg(it2->LastAddr < a_Range.FirstAddr || it2->FirstAddr > a_Range.LastAddr, + ("%08RX32-%08RX32 conflicts with %08RX32-%08RX32\n", + a_Range.FirstAddr, a_Range.LastAddr, it2->FirstAddr, it2->LastAddr)); +#endif + + /* + * No overlaps, insert it. + */ + try + { + m_pool.insert(itHint, a_Range); + } + catch (std::bad_alloc &) + { + return VERR_NO_MEMORY; + } + return VINF_SUCCESS; +} + + +/** + * Allocates an available IPv4 address from the pool. + * + * @returns Non-zero network order IPv4 address on success, zero address + * (0.0.0.0) on failure. + */ +RTNETADDRIPV4 IPv4Pool::allocate() +{ + RTNETADDRIPV4 RetAddr; + if (!m_pool.empty()) + { + /* Grab the first address in the pool: */ + it_t itBeg = m_pool.begin(); + RetAddr.u = RT_H2N_U32(itBeg->FirstAddr); + + if (itBeg->FirstAddr == itBeg->LastAddr) + m_pool.erase(itBeg); + else + { + /* Trim the entry (re-inserting it): */ + IPv4Range trimmed = *itBeg; + trimmed.FirstAddr += 1; + Assert(trimmed.FirstAddr <= trimmed.LastAddr); + m_pool.erase(itBeg); + try + { + m_pool.insert(trimmed); + } + catch (std::bad_alloc &) + { + /** @todo r=bird: Theortically the insert could fail with a bad_alloc and we'd + * drop a range of IP address. It would be nice if we could safely modify itBit + * without having to re-insert it. The author of this code (not bird) didn't + * seem to think this safe? + * + * If we want to play safe and all that, just use a AVLRU32TREE (or AVLRU64TREE + * if lazy) AVL tree from IPRT. Since we know exactly how it's implemented and + * works, there will be no uncertanties like this when using it (both here + * and in the i_insert validation logic). */ + LogRelFunc(("Caught bad_alloc! We're truely buggered now!\n")); + } + } + } + else + RetAddr.u = 0; + return RetAddr; +} + + +/** + * Allocate the given address. + * + * @returns Success indicator. + * @param a_Addr The IP address to allocate (network order). + */ +bool IPv4Pool::allocate(RTNETADDRIPV4 a_Addr) +{ + /* + * Find the range containing a_Addr. + */ + it_t it = m_pool.lower_bound(IPv4Range(a_Addr)); /* candidate range */ + if (it != m_pool.end()) + { + Assert(RT_N2H_U32(a_Addr.u) <= it->LastAddr); /* by definition of < and lower_bound */ + + if (it->contains(a_Addr)) + { + /* + * Remove a_Addr from the range by way of re-insertion. + */ + const IPV4HADDR haddr = RT_N2H_U32(a_Addr.u); + IPV4HADDR first = it->FirstAddr; + IPV4HADDR last = it->LastAddr; + + m_pool.erase(it); + if (first != last) + { + if (haddr == first) + i_insert(++first, last); + else if (haddr == last) + i_insert(first, --last); + else + { + i_insert(first, haddr - 1); + i_insert(haddr + 1, last); + } + } + + return true; + } + } + return false; +} diff --git a/src/VBox/NetworkServices/Dhcpd/IPv4Pool.h b/src/VBox/NetworkServices/Dhcpd/IPv4Pool.h new file mode 100644 index 00000000..12aa8f54 --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/IPv4Pool.h @@ -0,0 +1,154 @@ +/* $Id: IPv4Pool.h $ */ +/** @file + * DHCP server - a pool of IPv4 addresses + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Dhcpd_IPv4Pool_h +#define VBOX_INCLUDED_SRC_Dhcpd_IPv4Pool_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <iprt/asm.h> +#include <iprt/stdint.h> +#include <iprt/net.h> +#include <set> + + +/** Host order IPv4 address. */ +typedef uint32_t IPV4HADDR; + + +/** + * A range of IPv4 addresses (in host order). + */ +struct IPv4Range +{ + IPV4HADDR FirstAddr; /**< Lowest address. */ + IPV4HADDR LastAddr; /**< Higest address (inclusive). */ + + IPv4Range() RT_NOEXCEPT + : FirstAddr(0), LastAddr(0) + {} + + explicit IPv4Range(IPV4HADDR aSingleAddr) RT_NOEXCEPT + : FirstAddr(aSingleAddr), LastAddr(aSingleAddr) + {} + + IPv4Range(IPV4HADDR aFirstAddr, IPV4HADDR aLastAddr) RT_NOEXCEPT + : FirstAddr(aFirstAddr), LastAddr(aLastAddr) + {} + + explicit IPv4Range(RTNETADDRIPV4 aSingleAddr) RT_NOEXCEPT + : FirstAddr(RT_N2H_U32(aSingleAddr.u)), LastAddr(RT_N2H_U32(aSingleAddr.u)) + {} + + IPv4Range(RTNETADDRIPV4 aFirstAddr, RTNETADDRIPV4 aLastAddr) RT_NOEXCEPT + : FirstAddr(RT_N2H_U32(aFirstAddr.u)), LastAddr(RT_N2H_U32(aLastAddr.u)) + {} + + bool isValid() const RT_NOEXCEPT + { + return FirstAddr <= LastAddr; + } + + bool contains(IPV4HADDR addr) const RT_NOEXCEPT + { + return FirstAddr <= addr && addr <= LastAddr; + } + + bool contains(RTNETADDRIPV4 addr) const RT_NOEXCEPT + { + return contains(RT_N2H_U32(addr.u)); + } + + /** Checks if this range includes the @a a_rRange. */ + bool contains(const IPv4Range &a_rRange) const RT_NOEXCEPT + { + return a_rRange.isValid() + && FirstAddr <= a_rRange.FirstAddr + && a_rRange.LastAddr <= LastAddr; + } +}; + + +inline bool operator==(const IPv4Range &l, const IPv4Range &r) RT_NOEXCEPT +{ + return l.FirstAddr == r.FirstAddr && l.LastAddr == r.LastAddr; +} + + +inline bool operator<(const IPv4Range &l, const IPv4Range &r) RT_NOEXCEPT +{ + return l.LastAddr < r.FirstAddr; +} + + +/** + * IPv4 address pool. + * + * This manages a single range of IPv4 addresses (m_range). Unallocated + * addresses are tracked as a set of sub-ranges in the m_pool set. + * + */ +class IPv4Pool +{ + typedef std::set<IPv4Range> set_t; + typedef set_t::iterator it_t; + + /** The IPv4 range of this pool. */ + IPv4Range m_range; + /** Pool of available IPv4 ranges. */ + set_t m_pool; + +public: + IPv4Pool() + {} + + int init(const IPv4Range &aRange) RT_NOEXCEPT; + int init(RTNETADDRIPV4 aFirstAddr, RTNETADDRIPV4 aLastAddr) RT_NOEXCEPT; + + RTNETADDRIPV4 allocate(); + bool allocate(RTNETADDRIPV4); + + /** + * Checks if the pool range includes @a a_Addr (allocation status not considered). + */ + bool contains(RTNETADDRIPV4 a_Addr) const RT_NOEXCEPT + { + return m_range.contains(a_Addr); + } + +private: + int i_insert(const IPv4Range &range) RT_NOEXCEPT; +#if 0 + int i_insert(IPV4HADDR a_Single) RT_NOEXCEPT { return i_insert(IPv4Range(a_Single)); } +#endif + int i_insert(IPV4HADDR a_First, IPV4HADDR a_Last) RT_NOEXCEPT { return i_insert(IPv4Range(a_First, a_Last)); } + int i_insert(RTNETADDRIPV4 a_Single) RT_NOEXCEPT { return i_insert(IPv4Range(a_Single)); } + int i_insert(RTNETADDRIPV4 a_First, RTNETADDRIPV4 a_Last) RT_NOEXCEPT { return i_insert(IPv4Range(a_First, a_Last)); } +}; + +#endif /* !VBOX_INCLUDED_SRC_Dhcpd_IPv4Pool_h */ diff --git a/src/VBox/NetworkServices/Dhcpd/Makefile.kmk b/src/VBox/NetworkServices/Dhcpd/Makefile.kmk new file mode 100644 index 00000000..c5340ad4 --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/Makefile.kmk @@ -0,0 +1,108 @@ +# $Id: Makefile.kmk $ +## @file +# Sub-makefile for the DHCP server. +# + +# +# Copyright (C) 2006-2023 Oracle and/or its affiliates. +# +# This file is part of VirtualBox base platform packages, as +# available from https://www.virtualbox.org. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, in version 3 of the +# License. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <https://www.gnu.org/licenses>. +# +# SPDX-License-Identifier: GPL-3.0-only +# + +SUB_DEPTH := ../../../.. +include $(KBUILD_PATH)/subheader.kmk + +ifndef LWIP_SOURCES + include $(PATH_SUB_CURRENT)/../../Devices/Network/lwip-new/Config.kmk +endif + +# +# Hardended stub executable. +# +ifdef VBOX_WITH_HARDENING + PROGRAMS += VBoxNetDHCPHardened + VBoxNetDHCPHardened_TEMPLATE = VBoxR3HardenedExe + VBoxNetDHCPHardened_NAME = VBoxNetDHCP + VBoxNetDHCPHardened_SOURCES = VBoxNetDhcpdHardened.cpp + VBoxNetDHCPHardened_LDFLAGS.win = /SUBSYSTEM:windows + $(call VBOX_SET_VER_INFO_DLL,VBoxNetDHCPHardened,VirtualBox DHCP Server,$(VBOX_WINDOWS_ICON_FILE)) # Version info / description. +endif + + +# +# The DHCP server module (dll if hardended) +# +ifdef VBOX_WITH_HARDENING + DLLS += VBoxNetDHCP + VBoxNetDHCP_TEMPLATE := VBoxR3Dll +else + PROGRAMS += VBoxNetDHCP + VBoxNetDHCP_TEMPLATE := VBoxR3Exe +endif + +# (current dir is for for lwipopts.h) +VBoxNetDHCP_INCS += . $(addprefix ../../Devices/Network/lwip-new/,$(LWIP_INCS)) + +ifdef VBOX_WITH_AUTOMATIC_DEFS_QUOTING + VBoxNetDHCP_DEFS = KBUILD_TYPE="$(KBUILD_TYPE)" +else + VBoxNetDHCP_DEFS = KBUILD_TYPE=\"$(KBUILD_TYPE)\" +endif +VBoxNetDHCP_DEFS += \ + $(if $(VBOX_WITH_INTNET_SERVICE_IN_R3),VBOX_WITH_INTNET_SERVICE_IN_R3,) + +#VBoxNetDHCP_DEFS += IPv6 +#VBoxNetDHCP_DEFS.linux = WITH_VALGRIND +ifneq ($(KBUILD_TARGET),win) + VBoxNetDHCP_DEFS += VBOX_WITH_XPCOM + VBoxNetDHCP_INCS += $(VBOX_XPCOM_INCS) + ifneq ($(KBUILD_TARGET),darwin) + # We want -std=c++11 for 4.7 and newer compilers, and -std=c++0x for older ones. + VBoxNetDHCP_CXXFLAGS += -std=$(if $(VBOX_GCC_VERSION_CXX),$(if $(VBOX_GCC_VERSION_CXX) < 40700,c++0x,c++11),c++0x) + endif +endif +VBoxNetDHCP_INCS += \ + ../NetLib +VBoxNetDHCP_SOURCES = \ + ClientId.cpp \ + Config.cpp \ + DHCPD.cpp \ + Db.cpp \ + DhcpMessage.cpp \ + DhcpOptions.cpp \ + IPv4Pool.cpp \ + Timestamp.cpp \ + VBoxNetDhcpd.cpp \ + ../NetLib/IntNetIf.cpp \ + ../../Main/glue/VBoxLogRelCreate.cpp \ + ../../Main/glue/GetVBoxUserHomeDirectory.cpp \ + $(addprefix ../../Devices/Network/lwip-new/,$(LWIP_SOURCES)) + +VBoxNetDHCP_LIBS = $(LIB_RUNTIME) +VBoxNetDHCP_LIBS.solaris += socket nsl +VBoxNetDHCP_LDFLAGS.win = /SUBSYSTEM:windows + +ifdef VBOX_WITH_HARDENING + $(call VBOX_SET_VER_INFO_DLL,VBoxNetDHCP,VirtualBox DHCP Server (dll),$(VBOX_WINDOWS_ICON_FILE)) # Version info / description. +else + $(call VBOX_SET_VER_INFO_EXE,VBoxNetDHCP,VirtualBox DHCP Server,$(VBOX_WINDOWS_ICON_FILE)) # Version info / description. +endif + +include $(FILE_KBUILD_SUB_FOOTER) + diff --git a/src/VBox/NetworkServices/Dhcpd/Timestamp.cpp b/src/VBox/NetworkServices/Dhcpd/Timestamp.cpp new file mode 100644 index 00000000..014eebb6 --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/Timestamp.cpp @@ -0,0 +1,45 @@ +/* $Id: Timestamp.cpp $ */ +/** @file + * DHCP server - timestamps + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "DhcpdInternal.h" +#include "Timestamp.h" + + +size_t Timestamp::strFormatHelper(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput) const RT_NOEXCEPT +{ + RTTIMESPEC TimeSpec; + RTTIME Time; + char szBuf[64]; + ssize_t cchBuf = RTTimeToStringEx(RTTimeExplode(&Time, getAbsTimeSpec(&TimeSpec)), szBuf, sizeof(szBuf), 0); + Assert(cchBuf > 0); + return pfnOutput(pvArgOutput, szBuf, cchBuf); +} + diff --git a/src/VBox/NetworkServices/Dhcpd/Timestamp.h b/src/VBox/NetworkServices/Dhcpd/Timestamp.h new file mode 100644 index 00000000..083cbc6b --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/Timestamp.h @@ -0,0 +1,122 @@ +/* $Id: Timestamp.h $ */ +/** @file + * DHCP server - timestamps + */ + +/* + * Copyright (C) 2017-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Dhcpd_Timestamp_h +#define VBOX_INCLUDED_SRC_Dhcpd_Timestamp_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <iprt/time.h> + + +/** + * Wrapper around RTTIMESPEC. + * + * @note Originally wanting to use RTTimeNanoTS rather than RTTimeNow. The term + * "absolute" was used for when the RTTimeNanoTS() value was converted to + * something approximating unix epoch relative time with help of + * RTTimeNow(). Code was later changed to just wrap RTTIMESPEC and drop + * all usage of RTTimeNanoTS, ASSUMING that system time is stable. + */ +class Timestamp +{ + RTTIMESPEC m_TimeSpec; + +public: + Timestamp() RT_NOEXCEPT + { + RTTimeSpecSetNano(&m_TimeSpec, 0); + } + + Timestamp(PCRTTIMESPEC a_pTimeSpec) RT_NOEXCEPT + { + m_TimeSpec = *a_pTimeSpec; + } + + /** Get a timestamp initialized to current time. */ + static Timestamp now() RT_NOEXCEPT + { + RTTIMESPEC Tmp; + return Timestamp(RTTimeNow(&Tmp)); + } + + /** Get a timestamp with the given value in seconds since unix epoch. */ + static Timestamp absSeconds(int64_t secTimestamp) RT_NOEXCEPT + { + RTTIMESPEC Tmp; + return Timestamp(RTTimeSpecSetSeconds(&Tmp, secTimestamp)); + } + + Timestamp &addSeconds(int64_t cSecs) RT_NOEXCEPT + { + RTTimeSpecAddSeconds(&m_TimeSpec, cSecs); + return *this; + } + + Timestamp &subSeconds(int64_t cSecs) RT_NOEXCEPT + { + RTTimeSpecSubSeconds(&m_TimeSpec, cSecs); + return *this; + } + + RTTIMESPEC *getAbsTimeSpec(RTTIMESPEC *pTime) const RT_NOEXCEPT + { + *pTime = m_TimeSpec; + return pTime; + } + + int64_t getAbsSeconds() const RT_NOEXCEPT + { + return RTTimeSpecGetSeconds(&m_TimeSpec); + } + + /** Only for log formatting. */ + size_t strFormatHelper(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput) const RT_NOEXCEPT; + + int compare(const Timestamp &a_rRight) const RT_NOEXCEPT + { + return RTTimeSpecCompare(&m_TimeSpec, &a_rRight.m_TimeSpec); + } + + friend bool operator<( const Timestamp &, const Timestamp &) RT_NOEXCEPT; + friend bool operator>( const Timestamp &, const Timestamp &) RT_NOEXCEPT; + friend bool operator==(const Timestamp &, const Timestamp &) RT_NOEXCEPT; + friend bool operator!=(const Timestamp &, const Timestamp &) RT_NOEXCEPT; + friend bool operator<=(const Timestamp &, const Timestamp &) RT_NOEXCEPT; + friend bool operator>=(const Timestamp &, const Timestamp &) RT_NOEXCEPT; +}; + + +inline bool operator<( const Timestamp &l, const Timestamp &r) RT_NOEXCEPT { return l.compare(r) < 0; } +inline bool operator>( const Timestamp &l, const Timestamp &r) RT_NOEXCEPT { return l.compare(r) > 0; } +inline bool operator==(const Timestamp &l, const Timestamp &r) RT_NOEXCEPT { return l.compare(r) == 0; } +inline bool operator!=(const Timestamp &l, const Timestamp &r) RT_NOEXCEPT { return l.compare(r) != 0; } +inline bool operator<=(const Timestamp &l, const Timestamp &r) RT_NOEXCEPT { return l.compare(r) <= 0; } +inline bool operator>=(const Timestamp &l, const Timestamp &r) RT_NOEXCEPT { return l.compare(r) >= 0; } + +#endif /* !VBOX_INCLUDED_SRC_Dhcpd_Timestamp_h */ diff --git a/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpd.cpp b/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpd.cpp new file mode 100644 index 00000000..563c7e39 --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpd.cpp @@ -0,0 +1,533 @@ +/* $Id: VBoxNetDhcpd.cpp $ */ +/** @file + * VBoxNetDhcpd - DHCP server for host-only and NAT networks. + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/** @page pg_net_dhcp VBoxNetDHCP + * + * Write a few words... + * + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include <iprt/cdefs.h> + +/* + * Need to get host/network order conversion stuff from Windows headers, + * so we do not define them in LWIP and then try to re-define them in + * Windows headers. + */ +#ifdef RT_OS_WINDOWS +# include <iprt/win/winsock2.h> +#endif + +#include "DhcpdInternal.h" +#include <iprt/param.h> +#include <iprt/errcore.h> + +#include <iprt/initterm.h> +#include <iprt/message.h> + +#include <iprt/net.h> +#include <iprt/path.h> +#include <iprt/stream.h> + +#include <VBox/sup.h> +#include <VBox/vmm/vmm.h> +#include <VBox/vmm/pdmnetinline.h> +#include <VBox/intnet.h> +#include <VBox/intnetinline.h> + +#include "VBoxLwipCore.h" +#include "Config.h" +#include "DHCPD.h" +#include "DhcpMessage.h" + +extern "C" +{ +#include "lwip/sys.h" +#include "lwip/pbuf.h" +#include "lwip/netif.h" +#include "lwip/tcpip.h" +#include "lwip/udp.h" +#include "netif/etharp.h" +} + +#include <iprt/sanitized/string> +#include <vector> +#include <memory> + +#ifdef RT_OS_WINDOWS +# include <iprt/win/windows.h> +#endif + +#include "IntNetIf.h" + +struct delete_pbuf +{ + delete_pbuf() {} + void operator()(struct pbuf *p) const { pbuf_free(p); } +}; + +typedef std::unique_ptr<pbuf, delete_pbuf> unique_ptr_pbuf; + + +class VBoxNetDhcpd +{ + DECLARE_CLS_COPY_CTOR_ASSIGN_NOOP(VBoxNetDhcpd); +public: + VBoxNetDhcpd(); + ~VBoxNetDhcpd(); + + int main(int argc, char **argv); + +private: + /** The logger instance. */ + PRTLOGGER m_pStderrReleaseLogger; + /** Internal network interface handle. */ + INTNETIFCTX m_hIf; + /** lwip stack connected to the intnet */ + struct netif m_LwipNetif; + /** The DHCP server config. */ + Config *m_Config; + /** Listening pcb */ + struct udp_pcb *m_Dhcp4Pcb; + /** DHCP server instance. */ + DHCPD m_server; + + int logInitStderr(); + + /* + * Internal network plumbing. + */ + int ifInit(const RTCString &strNetwork, + const RTCString &strTrunk = RTCString(), + INTNETTRUNKTYPE enmTrunkType = kIntNetTrunkType_WhateverNone); + + static DECLCALLBACK(void) ifInput(void *pvUser, void *pvFrame, uint32_t cbFrame); + void ifInputWorker(void *pvFrame, uint32_t cbFrame); + + /* + * lwIP callbacks + */ + static DECLCALLBACK(void) lwipInitCB(void *pvArg); + void lwipInit(); + + static err_t netifInitCB(netif *pNetif) RT_NOTHROW_PROTO; + err_t netifInit(netif *pNetif); + + static err_t netifLinkOutputCB(netif *pNetif, pbuf *pPBuf) RT_NOTHROW_PROTO; + err_t netifLinkOutput(pbuf *pPBuf); + + static void dhcp4RecvCB(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip_addr_t *addr, u16_t port) RT_NOTHROW_PROTO; + void dhcp4Recv(struct udp_pcb *pcb, struct pbuf *p, ip_addr_t *addr, u16_t port); +}; + + +VBoxNetDhcpd::VBoxNetDhcpd() + : m_pStderrReleaseLogger(NULL), + m_hIf(NULL), + m_LwipNetif(), + m_Config(NULL), + m_Dhcp4Pcb(NULL) +{ + logInitStderr(); +} + + +VBoxNetDhcpd::~VBoxNetDhcpd() +{ + if (m_hIf != NULL) + { + int rc = IntNetR3IfDestroy(m_hIf); + AssertRC(rc); + m_hIf = NULL; + } +} + + +/* + * We don't know the name of the release log file until we parse our + * configuration because we use network name as basename. To get + * early logging to work, start with stderr-only release logger. + * + * We disable "sup" for this logger to avoid spam from SUPR3Init(). + */ +int VBoxNetDhcpd::logInitStderr() +{ + static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES; + + uint32_t fFlags = 0; +#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2) + fFlags |= RTLOGFLAGS_USECRLF; +#endif + + PRTLOGGER pLogger; + int rc = RTLogCreate(&pLogger, fFlags, + "all -sup all.restrict -default.restrict", + NULL, /* environment base */ + RT_ELEMENTS(s_apszGroups), s_apszGroups, + RTLOGDEST_STDERR, NULL); + if (RT_FAILURE(rc)) + { + RTPrintf("Failed to init stderr logger: %Rrs\n", rc); + return rc; + } + + m_pStderrReleaseLogger = pLogger; + RTLogRelSetDefaultInstance(m_pStderrReleaseLogger); + + return VINF_SUCCESS; +} + + +int VBoxNetDhcpd::ifInit(const RTCString &strNetwork, + const RTCString &strTrunk, + INTNETTRUNKTYPE enmTrunkType) +{ + if (enmTrunkType == kIntNetTrunkType_Invalid) + enmTrunkType = kIntNetTrunkType_WhateverNone; + + int rc = IntNetR3IfCreateEx(&m_hIf, strNetwork.c_str(), enmTrunkType, + strTrunk.c_str(), _128K /*cbSend*/, _256K /*cbRecv*/, + 0 /*fFlags*/); + if (RT_SUCCESS(rc)) + rc = IntNetR3IfSetActive(m_hIf, true /*fActive*/); + + return rc; +} + + +void VBoxNetDhcpd::ifInputWorker(void *pvFrame, uint32_t cbFrame) +{ + struct pbuf *p = pbuf_alloc(PBUF_RAW, (u16_t)cbFrame + ETH_PAD_SIZE, PBUF_POOL); + AssertPtrReturnVoid(p); + + /* + * The code below is inlined version of: + * + * pbuf_header(p, -ETH_PAD_SIZE); // hide padding + * pbuf_take(p, pvFrame, cbFrame); + * pbuf_header(p, ETH_PAD_SIZE); // reveal padding + */ + struct pbuf *q = p; + uint8_t *pbChunk = (uint8_t *)pvFrame; + do + { + uint8_t *payload = (uint8_t *)q->payload; + size_t len = q->len; + +#if ETH_PAD_SIZE + if (RT_LIKELY(q == p)) /* single pbuf is large enough */ + { + payload += ETH_PAD_SIZE; + len -= ETH_PAD_SIZE; + } +#endif + memcpy(payload, pbChunk, len); + pbChunk += len; + q = q->next; + } while (RT_UNLIKELY(q != NULL)); + + m_LwipNetif.input(p, &m_LwipNetif); +} + + +/** + * Got a frame from the internal network, feed it to the lwIP stack. + */ +/*static*/ +DECLCALLBACK(void) VBoxNetDhcpd::ifInput(void *pvUser, void *pvFrame, uint32_t cbFrame) +{ + AssertReturnVoid(pvFrame); + AssertReturnVoid( cbFrame > sizeof(RTNETETHERHDR) + && cbFrame <= UINT16_MAX - ETH_PAD_SIZE); + + VBoxNetDhcpd *self = static_cast<VBoxNetDhcpd *>(pvUser); + self->ifInputWorker(pvFrame, cbFrame); +} + + +/** + * Got a frame from the lwIP stack, feed it to the internal network. + */ +err_t VBoxNetDhcpd::netifLinkOutput(pbuf *pPBuf) +{ + if (pPBuf->tot_len < sizeof(struct eth_hdr)) /* includes ETH_PAD_SIZE */ + return ERR_ARG; + + u16_t cbFrame = pPBuf->tot_len - ETH_PAD_SIZE; + INTNETFRAME Frame; + int rc = IntNetR3IfQueryOutputFrame(m_hIf, cbFrame, &Frame); + if (RT_FAILURE(rc)) + return ERR_MEM; + + pbuf_copy_partial(pPBuf, Frame.pvFrame, cbFrame, ETH_PAD_SIZE); + IntNetR3IfOutputFrameCommit(m_hIf, &Frame); + return ERR_OK; +} + + +/* static */ DECLCALLBACK(void) VBoxNetDhcpd::lwipInitCB(void *pvArg) +{ + AssertPtrReturnVoid(pvArg); + + VBoxNetDhcpd *self = static_cast<VBoxNetDhcpd *>(pvArg); + self->lwipInit(); +} + + +/* static */ err_t VBoxNetDhcpd::netifInitCB(netif *pNetif) RT_NOTHROW_DEF +{ + AssertPtrReturn(pNetif, ERR_ARG); + + VBoxNetDhcpd *self = static_cast<VBoxNetDhcpd *>(pNetif->state); + return self->netifInit(pNetif); +} + + +/* static */ err_t VBoxNetDhcpd::netifLinkOutputCB(netif *pNetif, pbuf *pPBuf) RT_NOTHROW_DEF +{ + AssertPtrReturn(pNetif, ERR_ARG); + AssertPtrReturn(pPBuf, ERR_ARG); + + VBoxNetDhcpd *self = static_cast<VBoxNetDhcpd *>(pNetif->state); + AssertPtrReturn(self, ERR_IF); + + return self->netifLinkOutput(pPBuf); +} + + +/* static */ void VBoxNetDhcpd::dhcp4RecvCB(void *arg, struct udp_pcb *pcb, + struct pbuf *p, + ip_addr_t *addr, u16_t port) RT_NOTHROW_DEF +{ + AssertPtrReturnVoid(arg); + + VBoxNetDhcpd *self = static_cast<VBoxNetDhcpd *>(arg); + self->dhcp4Recv(pcb, p, addr, port); + pbuf_free(p); +} + + + + + +int VBoxNetDhcpd::main(int argc, char **argv) +{ + /* + * Register string format types. + */ + ClientId::registerFormat(); + Binding::registerFormat(); + + /* + * Parse the command line into a configuration object. + */ + m_Config = Config::create(argc, argv); + if (m_Config == NULL) + return VERR_GENERAL_FAILURE; + + /* + * Initialize the server. + */ + int rc = m_server.init(m_Config); + if (RT_SUCCESS(rc)) + { + /* connect to the intnet */ + rc = ifInit(m_Config->getNetwork(), m_Config->getTrunk(), m_Config->getTrunkType()); + if (RT_SUCCESS(rc)) + { + /* setup lwip */ + rc = vboxLwipCoreInitialize(lwipInitCB, this); + if (RT_SUCCESS(rc)) + { + /* + * Pump packets more or less for ever. + */ + rc = IntNetR3IfPumpPkts(m_hIf, ifInput, this, + NULL /*pfnInputGso*/, NULL /*pvUserGso*/); + } + else + DHCP_LOG_MSG_ERROR(("Terminating - vboxLwipCoreInitialize failed: %Rrc\n", rc)); + } + else + DHCP_LOG_MSG_ERROR(("Terminating - ifInit failed: %Rrc\n", rc)); + } + else + DHCP_LOG_MSG_ERROR(("Terminating - Dhcpd::init failed: %Rrc\n", rc)); + return rc; +} + + +void VBoxNetDhcpd::lwipInit() +{ + err_t error; + + ip_addr_t addr, mask; + ip4_addr_set_u32(&addr, m_Config->getIPv4Address().u); + ip4_addr_set_u32(&mask, m_Config->getIPv4Netmask().u); + + netif *pNetif = netif_add(&m_LwipNetif, + &addr, &mask, + IP_ADDR_ANY, /* gateway */ + this, /* state */ + VBoxNetDhcpd::netifInitCB, /* netif_init_fn */ + tcpip_input); /* netif_input_fn */ + if (pNetif == NULL) + return; + + netif_set_up(pNetif); + netif_set_link_up(pNetif); + + m_Dhcp4Pcb = udp_new(); + if (RT_UNLIKELY(m_Dhcp4Pcb == NULL)) + return; /* XXX? */ + + ip_set_option(m_Dhcp4Pcb, SOF_BROADCAST); + udp_recv(m_Dhcp4Pcb, dhcp4RecvCB, this); + + error = udp_bind(m_Dhcp4Pcb, IP_ADDR_ANY, RTNETIPV4_PORT_BOOTPS); + if (error != ERR_OK) + { + udp_remove(m_Dhcp4Pcb); + m_Dhcp4Pcb = NULL; + return; /* XXX? */ + } +} + + +err_t VBoxNetDhcpd::netifInit(netif *pNetif) +{ + pNetif->hwaddr_len = sizeof(RTMAC); + memcpy(pNetif->hwaddr, &m_Config->getMacAddress(), sizeof(RTMAC)); + + pNetif->mtu = 1500; + + pNetif->flags = NETIF_FLAG_BROADCAST + | NETIF_FLAG_ETHARP + | NETIF_FLAG_ETHERNET; + + pNetif->linkoutput = netifLinkOutputCB; + pNetif->output = etharp_output; + + netif_set_default(pNetif); + return ERR_OK; +} + + +void VBoxNetDhcpd::dhcp4Recv(struct udp_pcb *pcb, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + RT_NOREF(pcb, addr, port); + + if (RT_UNLIKELY(p->next != NULL)) + return; /* XXX: we want it in one chunk */ + + bool broadcasted = ip_addr_cmp(ip_current_dest_addr(), &ip_addr_broadcast) + || ip_addr_cmp(ip_current_dest_addr(), &ip_addr_any); + + try + { + DhcpClientMessage *msgIn = DhcpClientMessage::parse(broadcasted, p->payload, p->len); + if (msgIn == NULL) + return; + + std::unique_ptr<DhcpClientMessage> autoFreeMsgIn(msgIn); + + DhcpServerMessage *msgOut = m_server.process(*msgIn); + if (msgOut == NULL) + return; + + std::unique_ptr<DhcpServerMessage> autoFreeMsgOut(msgOut); + + ip_addr_t dst = { msgOut->dst().u }; + if (ip_addr_cmp(&dst, &ip_addr_any)) + ip_addr_copy(dst, ip_addr_broadcast); + + octets_t data; + int rc = msgOut->encode(data); + if (RT_FAILURE(rc)) + return; + + unique_ptr_pbuf q ( pbuf_alloc(PBUF_RAW, (u16_t)data.size(), PBUF_RAM) ); + if (!q) + return; + + err_t error = pbuf_take(q.get(), &data.front(), (u16_t)data.size()); + if (error != ERR_OK) + return; + + error = udp_sendto(pcb, q.get(), &dst, RTNETIPV4_PORT_BOOTPC); + if (error != ERR_OK) + return; + } + catch (std::bad_alloc &) + { + LogRel(("VBoxNetDhcpd::dhcp4Recv: Caught std::bad_alloc!\n")); + } +} + + + + +/* + * Entry point. + */ +extern "C" DECLEXPORT(int) TrustedMain(int argc, char **argv) +{ + VBoxNetDhcpd Dhcpd; + int rc = Dhcpd.main(argc, argv); + return RT_SUCCESS(rc) ? RTEXITCODE_SUCCESS : RTEXITCODE_FAILURE; +} + + +#ifndef VBOX_WITH_HARDENING + +int main(int argc, char **argv) +{ + int rc = RTR3InitExe(argc, &argv, RTR3INIT_FLAGS_SUPLIB); + if (RT_SUCCESS(rc)) + return TrustedMain(argc, argv); + return RTMsgInitFailure(rc); +} + + +# ifdef RT_OS_WINDOWS +/** (We don't want a console usually.) */ +int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nCmdShow) +{ + RT_NOREF(hInstance, hPrevInstance, lpCmdLine, nCmdShow); + + return main(__argc, __argv); +} +# endif /* RT_OS_WINDOWS */ + +#endif /* !VBOX_WITH_HARDENING */ diff --git a/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpdHardened.cpp b/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpdHardened.cpp new file mode 100644 index 00000000..e29e7e5b --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpdHardened.cpp @@ -0,0 +1,35 @@ +/* $Id: VBoxNetDhcpdHardened.cpp $ */ +/** @file + * VBoxNetDhcpd - Hardened main(). + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include <VBox/sup.h> + + +int main(int argc, char **argv, char **envp) +{ + return SUPR3HardenedMain("VBoxNetDHCP", 0 /* fFlags */, argc, argv, envp); +} + diff --git a/src/VBox/NetworkServices/Dhcpd/lwipopts.h b/src/VBox/NetworkServices/Dhcpd/lwipopts.h new file mode 100644 index 00000000..2aabc448 --- /dev/null +++ b/src/VBox/NetworkServices/Dhcpd/lwipopts.h @@ -0,0 +1,191 @@ +/* $Id: lwipopts.h $ */ +/** @file + * DHCP server - lwIP configuration options. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Dhcpd_lwipopts_h +#define VBOX_INCLUDED_SRC_Dhcpd_lwipopts_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> /* For VBOX_STRICT. */ +#include <iprt/mem.h> +#include <iprt/alloca.h> /* This may include malloc.h (msc), which is something that has + * to be done before redefining any of the functions therein. */ +#include <iprt/rand.h> /* see LWIP_RAND() definition */ + +/** Make lwIP use the libc malloc, or more precisely (see below) the IPRT + * memory allocation functions. */ +#define MEM_LIBC_MALLOC 1 + +/** Set proper memory alignment. */ +#if HC_ARCH_BITS == 64 +# define MEM_ALIGNMENT 8 +#else +#define MEM_ALIGNMENT 4 +#endif + +/* Padding before Ethernet header to make IP header aligned */ +#define ETH_PAD_SIZE 2 + +/* IP */ +#define IP_REASSEMBLY 1 +#define IP_REASS_MAX_PBUFS 128 + + + +/* MEMP_NUM_PBUF: the number of memp struct pbufs. If the application + sends a lot of data out of ROM (or other static memory), this + should be set high. + + NB: This is for PBUF_ROM and PBUF_REF pbufs only! + + Number of PBUF_POOL pbufs is controlled by PBUF_POOL_SIZE that, + somewhat confusingly, breaks MEMP_NUM_* pattern. + + PBUF_RAM pbufs are allocated with mem_malloc (with MEM_LIBC_MALLOC + set to 1 this is just system malloc), not memp_malloc. */ +#define MEMP_NUM_PBUF (1024 * 4) + + +/* MEMP_NUM_MLD6_GROUP: Maximum number of IPv6 multicast groups that + can be joined. + + We need to be able to join solicited node multicast for each + address (potentially different) and two groups for DHCP6. All + routers multicast is hardcoded in ip6.c and does not require + explicit joining. Provide also for a few extra groups just in + case. */ +#define MEMP_NUM_MLD6_GROUP (LWIP_IPV6_NUM_ADDRESSES + /* dhcp6 */ 2 + /* extra */ 8) + + +/* MEMP_NUM_TCPIP_MSG_*: the number of struct tcpip_msg, which is used + for sequential API communication and incoming packets. Used in + src/api/tcpip.c. */ +#define MEMP_NUM_TCPIP_MSG_API 128 +#define MEMP_NUM_TCPIP_MSG_INPKT 1024 + +/* MEMP_NUM_UDP_PCB: the number of UDP protocol control blocks. One + per active UDP "connection". */ +#define MEMP_NUM_UDP_PCB 32 + +/* Pbuf options */ +/* PBUF_POOL_SIZE: the number of buffers in the pbuf pool. + This is only for PBUF_POOL pbufs, primarily used by netif drivers. + + This should have been named with the MEMP_NUM_ prefix (cf. + MEMP_NUM_PBUF for PBUF_ROM and PBUF_REF) as it controls the size of + yet another memp_malloc() pool. */ +#define PBUF_POOL_SIZE (1024 * 4) + +/* PBUF_POOL_BUFSIZE: the size of each pbuf in the pbuf pool. + Use default that is based on TCP_MSS and PBUF_LINK_HLEN. */ +#undef PBUF_POOL_BUFSIZE + +/** Turn on support for lightweight critical region protection. Leaving this + * off uses synchronization code in pbuf.c which is totally polluted with + * races. All the other lwip source files would fall back to semaphore-based + * synchronization, but pbuf.c is just broken, leading to incorrect allocation + * and as a result to assertions due to buffers being double freed. */ +#define SYS_LIGHTWEIGHT_PROT 1 + +/** Attempt to get rid of htons etc. macro issues. */ +#undef LWIP_PREFIX_BYTEORDER_FUNCS + +#define LWIP_TCPIP_CORE_LOCKING_INPUT 0 +#define LWIP_TCPIP_CORE_LOCKING 0 + +#define LWIP_NETCONN 0 +#define LWIP_SOCKET 0 +#define LWIP_COMPAT_SOCKETS 0 +#define LWIP_COMPAT_MUTEX 1 + +#define LWIP_TCP 0 +#define LWI_UDP 1 +#define LWIP_ARP 1 +#define ARP_PROXY 0 +#define LWIP_ETHERNET 1 + +/* accept any->broadcast */ +#define LWIP_IP_ACCEPT_UDP_PORT(port) ((port) == PP_NTOHS(/*DHCP_SERVER_PORT*/ 67)) + +#define LWIP_IPV6 0 +#define LWIP_IPV6_FORWARD 0 +#define LWIP_ND6_PROXY 0 + +#define LWIP_ND6_ALLOW_RA_UPDATES (!LWIP_IPV6_FORWARD) +#define LWIP_IPV6_SEND_ROUTER_SOLICIT (!LWIP_IPV6_FORWARD) +/* IPv6 autoconfig we don't need in proxy, but it required for very seldom cases + * iSCSI over intnet with IPv6 + */ +#define LWIP_IPV6_AUTOCONFIG 1 +#if LWIP_IPV6_FORWARD /* otherwise use the default from lwip/opt.h */ +#define LWIP_IPV6_DUP_DETECT_ATTEMPTS 0 +#endif + +#define LWIP_IPV6_FRAG 1 + +/** + * aka Slirp mode. + */ +#define LWIP_CONNECTION_PROXY 0 +#define IP_FORWARD 0 + +/* MEMP_NUM_SYS_TIMEOUT: the number of simultaneously active + timeouts. */ +#define MEMP_NUM_SYS_TIMEOUT 16 + + +/* this is required for IPv6 and IGMP needs */ +#define LWIP_RAND() RTRandU32() + +/* Debugging stuff. */ +#ifdef DEBUG +# define LWIP_DEBUG +# include "lwip-log.h" + +# define LWIP_PROXY_DEBUG LWIP_DBG_OFF +#endif /* DEBUG */ + +/* printf formatter definitions */ +#define U16_F "hu" +#define S16_F "hd" +#define X16_F "hx" +#define U32_F "u" +#define S32_F "d" +#define X32_F "x" + +/* Redirect libc memory alloc functions to IPRT. */ +#define malloc(x) RTMemAlloc(x) +#define realloc(x,y) RTMemRealloc((x), (y)) +#define free(x) RTMemFree(x) + +/* Align VBOX_STRICT and LWIP_NOASSERT. */ +#ifndef VBOX_STRICT +# define LWIP_NOASSERT 1 +#endif + +#endif /* !VBOX_INCLUDED_SRC_Dhcpd_lwipopts_h */ diff --git a/src/VBox/NetworkServices/IntNetSwitch/IntNetSwitchInternal.h b/src/VBox/NetworkServices/IntNetSwitch/IntNetSwitchInternal.h new file mode 100644 index 00000000..82a3688b --- /dev/null +++ b/src/VBox/NetworkServices/IntNetSwitch/IntNetSwitchInternal.h @@ -0,0 +1,109 @@ +/* $Id: IntNetSwitchInternal.h $ */ +/** @file + * VirtualBox internal network switch process - Internal header. + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_IntNetSwitch_IntNetSwitchInternal_h +#define VBOX_INCLUDED_SRC_IntNetSwitch_IntNetSwitchInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IN_INTNET_TESTCASE +#define IN_INTNET_R3 + +#include <VBox/cdefs.h> +#include <VBox/types.h> + +#undef INTNETR0DECL +#define INTNETR0DECL INTNETR3DECL +#undef DECLR0CALLBACKMEMBER +#define DECLR0CALLBACKMEMBER(type, name, args) DECLR3CALLBACKMEMBER(type, name, args) +typedef struct SUPDRVSESSION *MYPSUPDRVSESSION; +#define PSUPDRVSESSION MYPSUPDRVSESSION + +#include <VBox/intnet.h> +#include <VBox/sup.h> + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ + +/** + * Security objectype. + */ +typedef enum SUPDRVOBJTYPE +{ + /** The usual invalid object. */ + SUPDRVOBJTYPE_INVALID = 0, + /** Internal network. */ + SUPDRVOBJTYPE_INTERNAL_NETWORK, + /** Internal network interface. */ + SUPDRVOBJTYPE_INTERNAL_NETWORK_INTERFACE, + /** The first invalid object type in this end. */ + SUPDRVOBJTYPE_END, + /** The usual 32-bit type size hack. */ + SUPDRVOBJTYPE_32_BIT_HACK = 0x7ffffff +} SUPDRVOBJTYPE; + +/** + * Object destructor callback. + * This is called for reference counted objectes when the count reaches 0. + * + * @param pvObj The object pointer. + * @param pvUser1 The first user argument. + * @param pvUser2 The second user argument. + */ +typedef DECLCALLBACKTYPE(void, FNSUPDRVDESTRUCTOR,(void *pvObj, void *pvUser1, void *pvUser2)); +/** Pointer to a FNSUPDRVDESTRUCTOR(). */ +typedef FNSUPDRVDESTRUCTOR *PFNSUPDRVDESTRUCTOR; + + +RT_C_DECLS_BEGIN + +INTNETR3DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, + PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2); +INTNETR3DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking); +INTNETR3DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession); +INTNETR3DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession); +INTNETR3DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName); +INTNETR3DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3); +INTNETR3DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr); + + +RT_C_DECLS_END + +#endif /* !VBOX_INCLUDED_SRC_IntNetSwitch_IntNetSwitchInternal_h */ + diff --git a/src/VBox/NetworkServices/IntNetSwitch/Makefile.kmk b/src/VBox/NetworkServices/IntNetSwitch/Makefile.kmk new file mode 100644 index 00000000..5e7bd7ad --- /dev/null +++ b/src/VBox/NetworkServices/IntNetSwitch/Makefile.kmk @@ -0,0 +1,75 @@ +# $Id: Makefile.kmk $ +## @file +# Sub-makefile for the Ring-3 based network switch process. +# + +# +# Copyright (C) 2022-2023 Oracle and/or its affiliates. +# +# This file is part of VirtualBox base platform packages, as +# available from https://www.virtualbox.org. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, in version 3 of the +# License. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <https://www.gnu.org/licenses>. +# +# SPDX-License-Identifier: GPL-3.0-only +# + +SUB_DEPTH := ../../../.. +include $(KBUILD_PATH)/subheader.kmk + +# +# The internal network switch module. +# +PROGRAMS += VBoxIntNetSwitch +VBoxIntNetSwitch_TEMPLATE := VBoxR3Exe + +ifdef VBOX_WITH_AUTOMATIC_DEFS_QUOTING + VBoxIntNetSwitch_DEFS = KBUILD_TYPE="$(KBUILD_TYPE)" +else + VBoxIntNetSwitch_DEFS = KBUILD_TYPE=\"$(KBUILD_TYPE)\" +endif +VBoxIntNetSwitch_DEFS += VBOX_WITH_INTNET_SERVICE_IN_R3 +VBoxIntNetSwitch_INST.darwin = $(VBoxIntNetSwitch.xpc_INST)/MacOS/ +VBoxIntNetSwitch_SOURCES = \ + VBoxIntNetSwitch.cpp \ + SrvIntNetWrapper.cpp +VBoxIntNetSwitch_LIBS = $(LIB_RUNTIME) +VBoxIntNetSwitch_LDFLAGS.darwin = \ + -rpath @executable_path/../../../../MacOS + +ifeq ($(KBUILD_TARGET),darwin) + INSTALLS += VBoxIntNetSwitch.xpc + + VBoxIntNetSwitch.xpc_MODE = 644 + VBoxIntNetSwitch.xpc_INST = $(INST_VIRTUALBOX)Contents/XPCServices/org.virtualbox.intnet.xpc/Contents/ + VBoxIntNetSwitch.xpc_SOURCES = \ + $(VBoxIntNetSwitch.xpc_0_OUTDIR)/Info.plist + + $$(VBoxIntNetSwitch.xpc_0_OUTDIR)/Info.plist: $(PATH_SUB_CURRENT)/darwin/Info.plist $(VBOX_VERSION_MK) | $$(@D)/ + $(call MSG_GENERATE,VBoxIntNetSwitch.xpc,$<,$@) + $(QUIET)$(RM) -f $@ + $(QUIET)$(SED) \ + -e 's+@VBOX_VERSION_STRING@+$(VBOX_VERSION_STRING)+g' \ + -e 's+@VBOX_VERSION_MAJOR@+$(VBOX_VERSION_MAJOR)+g' \ + -e 's+@VBOX_VERSION_MINOR@+$(VBOX_VERSION_MINOR)+g' \ + -e 's+@VBOX_VERSION_BUILD@+$(VBOX_VERSION_BUILD)+g' \ + -e 's+@VBOX_VENDOR@+$(VBOX_VENDOR)+g' \ + -e 's+@VBOX_PRODUCT@+$(VBOX_PRODUCT)+g' \ + -e 's+@VBOX_C_YEAR@+$(VBOX_C_YEAR)+g' \ + $< > $@ +endif + +$(call VBOX_SET_VER_INFO_EXE,VBoxIntNetSwitch,VirtualBox Internal Network Switch,$(VBOX_WINDOWS_ICON_FILE)) # Version info / description. + +include $(FILE_KBUILD_SUB_FOOTER) diff --git a/src/VBox/NetworkServices/IntNetSwitch/SrvIntNetWrapper.cpp b/src/VBox/NetworkServices/IntNetSwitch/SrvIntNetWrapper.cpp new file mode 100644 index 00000000..db728b90 --- /dev/null +++ b/src/VBox/NetworkServices/IntNetSwitch/SrvIntNetWrapper.cpp @@ -0,0 +1,63 @@ +/* $Id: SrvIntNetWrapper.cpp $ */ +/** @file + * Internal networking - Wrapper for the R0 network service. + * + * This is a bit hackish as we're mixing context here, however it is + * very useful when making changes to the internal networking service. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "IntNetSwitchInternal.h" + +#include <iprt/asm.h> +#include <iprt/mp.h> + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ + +/* Fake non-existing ring-0 APIs. */ +#define RTThreadIsInInterrupt(hThread) false +#define RTThreadPreemptIsEnabled(hThread) true +#define RTMpCpuId() 0 + +/* No CLI/POPF, please. */ +#include <iprt/spinlock.h> +#undef RTSPINLOCK_FLAGS_INTERRUPT_SAFE +#define RTSPINLOCK_FLAGS_INTERRUPT_SAFE RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE + + +/* ugly but necessary for making R0 code compilable for R3. */ +#undef LOG_GROUP +#include "../../Devices/Network/SrvIntNetR0.cpp" diff --git a/src/VBox/NetworkServices/IntNetSwitch/VBoxIntNetSwitch.cpp b/src/VBox/NetworkServices/IntNetSwitch/VBoxIntNetSwitch.cpp new file mode 100644 index 00000000..44b123b6 --- /dev/null +++ b/src/VBox/NetworkServices/IntNetSwitch/VBoxIntNetSwitch.cpp @@ -0,0 +1,669 @@ +/* $Id: VBoxIntNetSwitch.cpp $ */ +/** @file + * Internal networking - Wrapper for the R0 network service. + * + * This is a bit hackish as we're mixing context here, however it is + * very useful when making changes to the internal networking service. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IN_INTNET_TESTCASE +#define IN_INTNET_R3 +#include "IntNetSwitchInternal.h" + +#include <VBox/err.h> +#include <VBox/vmm/vmm.h> +#include <iprt/asm.h> +#include <iprt/critsect.h> +#include <iprt/initterm.h> +#include <iprt/mem.h> +#include <iprt/message.h> +#include <iprt/string.h> +#include <iprt/thread.h> +#include <iprt/semaphore.h> +#include <iprt/time.h> + +#include <xpc/xpc.h> + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ + +/** + * Registered object. + * This takes care of reference counting and tracking data for access checks. + */ +typedef struct SUPDRVOBJ +{ + /** Pointer to the next in the global list. */ + struct SUPDRVOBJ * volatile pNext; + /** Pointer to the object destructor. + * This may be set to NULL if the image containing the destructor get unloaded. */ + PFNSUPDRVDESTRUCTOR pfnDestructor; + /** User argument 1. */ + void *pvUser1; + /** User argument 2. */ + void *pvUser2; + /** The total sum of all per-session usage. */ + uint32_t volatile cUsage; +} SUPDRVOBJ, *PSUPDRVOBJ; + + +/** + * The per-session object usage record. + */ +typedef struct SUPDRVUSAGE +{ + /** Pointer to the next in the list. */ + struct SUPDRVUSAGE * volatile pNext; + /** Pointer to the object we're recording usage for. */ + PSUPDRVOBJ pObj; + /** The usage count. */ + uint32_t volatile cUsage; +} SUPDRVUSAGE, *PSUPDRVUSAGE; + + +/** + * Device extension. + */ +typedef struct SUPDRVDEVEXT +{ + /** Number of references to this service. */ + uint32_t volatile cRefs; + /** Critical section to serialize the initialization, usage counting and objects. */ + RTCRITSECT CritSect; + /** List of registered objects. Protected by the spinlock. */ + PSUPDRVOBJ volatile pObjs; +} SUPDRVDEVEXT; +typedef SUPDRVDEVEXT *PSUPDRVDEVEXT; + + +/** + * Per session data. + * This is mainly for memory tracking. + */ +typedef struct SUPDRVSESSION +{ + PSUPDRVDEVEXT pDevExt; + /** List of generic usage records. (protected by SUPDRVDEVEXT::CritSect) */ + PSUPDRVUSAGE volatile pUsage; + /** The XPC connection handle for this session. */ + xpc_connection_t hXpcCon; + /** The intnet interface handle to wait on. */ + INTNETIFHANDLE hIfWait; + /** Flag whether a receive wait was initiated. */ + bool volatile fRecvWait; + /** Flag whether there is something to receive. */ + bool volatile fRecvAvail; +} SUPDRVSESSION; + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +static SUPDRVDEVEXT g_DevExt; + + +INTNETR3DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, + PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2) +{ + RT_NOREF(enmType); + + PSUPDRVOBJ pObj = (PSUPDRVOBJ)RTMemAllocZ(sizeof(*pObj)); + if (!pObj) + return NULL; + pObj->cUsage = 1; + pObj->pfnDestructor = pfnDestructor; + pObj->pvUser1 = pvUser1; + pObj->pvUser2 = pvUser2; + + /* + * Insert the object and create the session usage record. + */ + PSUPDRVUSAGE pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage)); + if (!pUsage) + { + RTMemFree(pObj); + return NULL; + } + + PSUPDRVDEVEXT pDevExt = pSession->pDevExt; + RTCritSectEnter(&pDevExt->CritSect); + + /* The object. */ + pObj->pNext = pDevExt->pObjs; + pDevExt->pObjs = pObj; + + /* The session record. */ + pUsage->cUsage = 1; + pUsage->pObj = pObj; + pUsage->pNext = pSession->pUsage; + pSession->pUsage = pUsage; + + RTCritSectLeave(&pDevExt->CritSect); + return pObj; +} + + +INTNETR3DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking) +{ + PSUPDRVDEVEXT pDevExt = pSession->pDevExt; + PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj; + int rc = VINF_SUCCESS; + PSUPDRVUSAGE pUsage; + + RT_NOREF(fNoBlocking); + + RTCritSectEnter(&pDevExt->CritSect); + + /* + * Reference the object. + */ + ASMAtomicIncU32(&pObj->cUsage); + + /* + * Look for the session record. + */ + for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext) + { + if (pUsage->pObj == pObj) + break; + } + + if (pUsage) + pUsage->cUsage++; + else + { + /* create a new session record. */ + pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage)); + if (RT_LIKELY(pUsage)) + { + pUsage->cUsage = 1; + pUsage->pObj = pObj; + pUsage->pNext = pSession->pUsage; + pSession->pUsage = pUsage; + } + else + { + ASMAtomicDecU32(&pObj->cUsage); + rc = VERR_TRY_AGAIN; + } + } + + RTCritSectLeave(&pDevExt->CritSect); + return rc; +} + + +INTNETR3DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession) +{ + return SUPR0ObjAddRefEx(pvObj, pSession, false); +} + + +INTNETR3DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession) +{ + PSUPDRVDEVEXT pDevExt = pSession->pDevExt; + PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj; + int rc = VERR_INVALID_PARAMETER; + PSUPDRVUSAGE pUsage; + PSUPDRVUSAGE pUsagePrev; + + /* + * Acquire the spinlock and look for the usage record. + */ + RTCritSectEnter(&pDevExt->CritSect); + + for (pUsagePrev = NULL, pUsage = pSession->pUsage; + pUsage; + pUsagePrev = pUsage, pUsage = pUsage->pNext) + { + if (pUsage->pObj == pObj) + { + rc = VINF_SUCCESS; + AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage)); + if (pUsage->cUsage > 1) + { + pObj->cUsage--; + pUsage->cUsage--; + } + else + { + /* + * Free the session record. + */ + if (pUsagePrev) + pUsagePrev->pNext = pUsage->pNext; + else + pSession->pUsage = pUsage->pNext; + RTMemFree(pUsage); + + /* What about the object? */ + if (pObj->cUsage > 1) + pObj->cUsage--; + else + { + /* + * Object is to be destroyed, unlink it. + */ + rc = VINF_OBJECT_DESTROYED; + if (pDevExt->pObjs == pObj) + pDevExt->pObjs = pObj->pNext; + else + { + PSUPDRVOBJ pObjPrev; + for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext) + if (pObjPrev->pNext == pObj) + { + pObjPrev->pNext = pObj->pNext; + break; + } + Assert(pObjPrev); + } + } + } + break; + } + } + + RTCritSectLeave(&pDevExt->CritSect); + + /* + * Call the destructor and free the object if required. + */ + if (rc == VINF_OBJECT_DESTROYED) + { + if (pObj->pfnDestructor) + pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2); + RTMemFree(pObj); + } + + return rc; +} + + +INTNETR3DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName) +{ + RT_NOREF(pvObj, pSession, pszObjName); + return VINF_SUCCESS; +} + + +INTNETR3DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3) +{ + RT_NOREF(pSession); + + /* + * This is used to allocate and map the send/receive buffers into the callers process space, meaning + * we have to mmap it with the shareable attribute. + */ + void *pv = mmap(NULL, cb, PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0); + if (pv == MAP_FAILED) + return VERR_NO_MEMORY; + + *ppvR0 = (RTR0PTR)pv; + if (ppvR3) + *ppvR3 = pv; + return VINF_SUCCESS; +} + + +INTNETR3DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr) +{ + RT_NOREF(pSession); + + PINTNETBUF pBuf = (PINTNETBUF)uPtr; /// @todo Hack hack hack! + munmap((void *)uPtr, pBuf->cbBuf); + return VINF_SUCCESS; +} + + +/** + * Destroys the given internal network XPC connection session freeing all allocated resources. + * + * @returns Reference count of the device extension.. + * @param pSession The ession to destroy. + */ +static uint32_t intnetR3SessionDestroy(PSUPDRVSESSION pSession) +{ + PSUPDRVDEVEXT pDevExt = pSession->pDevExt; + uint32_t cRefs = ASMAtomicDecU32(&pDevExt->cRefs); + xpc_transaction_end(); + xpc_connection_set_context(pSession->hXpcCon, NULL); + xpc_connection_cancel(pSession->hXpcCon); + pSession->hXpcCon = NULL; + + ASMAtomicXchgBool(&pSession->fRecvAvail, true); + + if (pSession->pUsage) + { + PSUPDRVUSAGE pUsage; + RTCritSectEnter(&pDevExt->CritSect); + + while ((pUsage = pSession->pUsage) != NULL) + { + PSUPDRVOBJ pObj = pUsage->pObj; + pSession->pUsage = pUsage->pNext; + + AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage)); + if (pUsage->cUsage < pObj->cUsage) + { + pObj->cUsage -= pUsage->cUsage; + } + else + { + /* Destroy the object and free the record. */ + if (pDevExt->pObjs == pObj) + pDevExt->pObjs = pObj->pNext; + else + { + PSUPDRVOBJ pObjPrev; + for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext) + if (pObjPrev->pNext == pObj) + { + pObjPrev->pNext = pObj->pNext; + break; + } + Assert(pObjPrev); + } + + RTCritSectLeave(&pDevExt->CritSect); + + if (pObj->pfnDestructor) + pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2); + RTMemFree(pObj); + + RTCritSectEnter(&pDevExt->CritSect); + } + + /* free it and continue. */ + RTMemFree(pUsage); + } + + RTCritSectLeave(&pDevExt->CritSect); + AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during destruction!\n")); + } + + RTMemFree(pSession); + return cRefs; +} + + +/** + * Data available in th receive buffer callback. + */ +static DECLCALLBACK(void) intnetR3RecvAvail(INTNETIFHANDLE hIf, void *pvUser) +{ + RT_NOREF(hIf); + PSUPDRVSESSION pSession = (PSUPDRVSESSION)pvUser; + + if (ASMAtomicXchgBool(&pSession->fRecvWait, false)) + { + /* Send an empty message. */ + xpc_object_t hObjPoke = xpc_dictionary_create(NULL, NULL, 0); + xpc_connection_send_message(pSession->hXpcCon, hObjPoke); + xpc_release(hObjPoke); + } + else + ASMAtomicXchgBool(&pSession->fRecvAvail, true); +} + + +static void intnetR3RequestProcess(xpc_connection_t hCon, xpc_object_t hObj, PSUPDRVSESSION pSession) +{ + int rc = VINF_SUCCESS; + uint64_t iReq = xpc_dictionary_get_uint64(hObj, "req-id"); + size_t cbReq = 0; + const void *pvReq = xpc_dictionary_get_data(hObj, "req", &cbReq); + union + { + INTNETOPENREQ OpenReq; + INTNETIFCLOSEREQ IfCloseReq; + INTNETIFGETBUFFERPTRSREQ IfGetBufferPtrsReq; + INTNETIFSETPROMISCUOUSMODEREQ IfSetPromiscuousModeReq; + INTNETIFSETMACADDRESSREQ IfSetMacAddressReq; + INTNETIFSETACTIVEREQ IfSetActiveReq; + INTNETIFSENDREQ IfSendReq; + INTNETIFWAITREQ IfWaitReq; + INTNETIFABORTWAITREQ IfAbortWaitReq; + } ReqReply; + + memcpy(&ReqReply, pvReq, RT_MIN(sizeof(ReqReply), cbReq)); + size_t cbReply = 0; + + if (pvReq) + { + switch (iReq) + { + case VMMR0_DO_INTNET_OPEN: + { + if (cbReq == sizeof(INTNETOPENREQ)) + { + rc = IntNetR3Open(pSession, &ReqReply.OpenReq.szNetwork[0], ReqReply.OpenReq.enmTrunkType, ReqReply.OpenReq.szTrunk, + ReqReply.OpenReq.fFlags, ReqReply.OpenReq.cbSend, ReqReply.OpenReq.cbRecv, + intnetR3RecvAvail, pSession, &ReqReply.OpenReq.hIf); + cbReply = sizeof(INTNETOPENREQ); + } + else + rc = VERR_INVALID_PARAMETER; + break; + } + case VMMR0_DO_INTNET_IF_CLOSE: + { + if (cbReq == sizeof(INTNETIFCLOSEREQ)) + { + rc = IntNetR0IfCloseReq(pSession, &ReqReply.IfCloseReq); + cbReply = sizeof(INTNETIFCLOSEREQ); + } + else + rc = VERR_INVALID_PARAMETER; + break; + } + case VMMR0_DO_INTNET_IF_GET_BUFFER_PTRS: + { + if (cbReq == sizeof(INTNETIFGETBUFFERPTRSREQ)) + { + rc = IntNetR0IfGetBufferPtrsReq(pSession, &ReqReply.IfGetBufferPtrsReq); + /* This is special as we need to return a shared memory segment. */ + xpc_object_t hObjReply = xpc_dictionary_create_reply(hObj); + xpc_object_t hObjShMem = xpc_shmem_create(ReqReply.IfGetBufferPtrsReq.pRing3Buf, ReqReply.IfGetBufferPtrsReq.pRing3Buf->cbBuf); + if (hObjShMem) + { + xpc_dictionary_set_value(hObjReply, "buf-ptr", hObjShMem); + xpc_release(hObjShMem); + } + else + rc = VERR_NO_MEMORY; + + xpc_dictionary_set_uint64(hObjReply, "rc", INTNET_R3_SVC_SET_RC(rc)); + xpc_connection_send_message(hCon, hObjReply); + xpc_release(hObjReply); + return; + } + else + rc = VERR_INVALID_PARAMETER; + break; + } + case VMMR0_DO_INTNET_IF_SET_PROMISCUOUS_MODE: + { + if (cbReq == sizeof(INTNETIFSETPROMISCUOUSMODEREQ)) + { + rc = IntNetR0IfSetPromiscuousModeReq(pSession, &ReqReply.IfSetPromiscuousModeReq); + cbReply = sizeof(INTNETIFSETPROMISCUOUSMODEREQ); + } + else + rc = VERR_INVALID_PARAMETER; + break; + } + case VMMR0_DO_INTNET_IF_SET_MAC_ADDRESS: + { + if (cbReq == sizeof(INTNETIFSETMACADDRESSREQ)) + { + rc = IntNetR0IfSetMacAddressReq(pSession, &ReqReply.IfSetMacAddressReq); + cbReply = sizeof(INTNETIFSETMACADDRESSREQ); + } + else + rc = VERR_INVALID_PARAMETER; + break; + } + case VMMR0_DO_INTNET_IF_SET_ACTIVE: + { + if (cbReq == sizeof(INTNETIFSETACTIVEREQ)) + { + rc = IntNetR0IfSetActiveReq(pSession, &ReqReply.IfSetActiveReq); + cbReply = sizeof(INTNETIFSETACTIVEREQ); + } + else + rc = VERR_INVALID_PARAMETER; + break; + } + case VMMR0_DO_INTNET_IF_SEND: + { + if (cbReq == sizeof(INTNETIFSENDREQ)) + { + rc = IntNetR0IfSendReq(pSession, &ReqReply.IfSendReq); + cbReply = sizeof(INTNETIFSENDREQ); + } + else + rc = VERR_INVALID_PARAMETER; + break; + } + case VMMR0_DO_INTNET_IF_WAIT: + { + if (cbReq == sizeof(INTNETIFWAITREQ)) + { + ASMAtomicXchgBool(&pSession->fRecvWait, true); + if (ASMAtomicXchgBool(&pSession->fRecvAvail, false)) + { + ASMAtomicXchgBool(&pSession->fRecvWait, false); + + /* Send an empty message. */ + xpc_object_t hObjPoke = xpc_dictionary_create(NULL, NULL, 0); + xpc_connection_send_message(pSession->hXpcCon, hObjPoke); + xpc_release(hObjPoke); + } + return; + } + else + rc = VERR_INVALID_PARAMETER; + break; + } + case VMMR0_DO_INTNET_IF_ABORT_WAIT: + { + if (cbReq == sizeof(INTNETIFABORTWAITREQ)) + { + ASMAtomicXchgBool(&pSession->fRecvWait, false); + if (ASMAtomicXchgBool(&pSession->fRecvAvail, false)) + { + /* Send an empty message. */ + xpc_object_t hObjPoke = xpc_dictionary_create(NULL, NULL, 0); + xpc_connection_send_message(pSession->hXpcCon, hObjPoke); + xpc_release(hObjPoke); + } + cbReply = sizeof(INTNETIFABORTWAITREQ); + } + else + rc = VERR_INVALID_PARAMETER; + break; + } + default: + rc = VERR_INVALID_PARAMETER; + } + } + + xpc_object_t hObjReply = xpc_dictionary_create_reply(hObj); + xpc_dictionary_set_uint64(hObjReply, "rc", INTNET_R3_SVC_SET_RC(rc)); + xpc_dictionary_set_data(hObjReply, "reply", &ReqReply, cbReply); + xpc_connection_send_message(hCon, hObjReply); + xpc_release(hObjReply); +} + + +DECLCALLBACK(void) xpcConnHandler(xpc_connection_t hXpcCon) +{ + xpc_connection_set_event_handler(hXpcCon, ^(xpc_object_t hObj) { + PSUPDRVSESSION pSession = (PSUPDRVSESSION)xpc_connection_get_context(hXpcCon); + + if (xpc_get_type(hObj) == XPC_TYPE_ERROR) + { + if (hObj == XPC_ERROR_CONNECTION_INVALID) + intnetR3SessionDestroy(pSession); + else if (hObj == XPC_ERROR_TERMINATION_IMMINENT) + { + PSUPDRVDEVEXT pDevExt = pSession->pDevExt; + + uint32_t cRefs = intnetR3SessionDestroy(pSession); + if (!cRefs) + { + /* Last one cleans up the global data. */ + RTCritSectDelete(&pDevExt->CritSect); + } + } + } + else + intnetR3RequestProcess(hXpcCon, hObj, pSession); + }); + + PSUPDRVSESSION pSession = (PSUPDRVSESSION)RTMemAllocZ(sizeof(*pSession)); + if (pSession) + { + pSession->pDevExt = &g_DevExt; + pSession->hXpcCon = hXpcCon; + + xpc_connection_set_context(hXpcCon, pSession); + xpc_connection_resume(hXpcCon); + xpc_transaction_begin(); + ASMAtomicIncU32(&g_DevExt.cRefs); + } +} + + +int main(int argc, char **argv) +{ + int rc = RTR3InitExe(argc, &argv, RTR3INIT_FLAGS_SUPLIB); + if (RT_SUCCESS(rc)) + { + IntNetR0Init(); + + g_DevExt.pObjs = NULL; + rc = RTCritSectInit(&g_DevExt.CritSect); + if (RT_SUCCESS(rc)) + xpc_main(xpcConnHandler); /* Never returns. */ + + exit(EXIT_FAILURE); + } + + return RTMsgInitFailure(rc); +} + diff --git a/src/VBox/NetworkServices/IntNetSwitch/darwin/Info.plist b/src/VBox/NetworkServices/IntNetSwitch/darwin/Info.plist new file mode 100644 index 00000000..ea39dfd8 --- /dev/null +++ b/src/VBox/NetworkServices/IntNetSwitch/darwin/Info.plist @@ -0,0 +1,22 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>CFBundlePackageType</key> <string>XPC!</string> + <key>CFBundleSignature</key> <string>VBOX</string> + <key>CFBundleDevelopmentRegion</key> <string>English</string> + <key>CFBundleIdentifier</key> <string>org.virtualbox.intnet</string> + <key>CFBundleName</key> <string>VBoxIntNetSwitch</string> + <key>CFBundleExecutable</key> <string>VBoxIntNetSwitch</string> + <key>CFBundleVersion</key> <string>@VBOX_VERSION_MAJOR@.@VBOX_VERSION_MINOR@.@VBOX_VERSION_BUILD@</string> + <key>CFBundleShortVersionString</key> <string>@VBOX_VERSION_MAJOR@.@VBOX_VERSION_MINOR@.@VBOX_VERSION_BUILD@</string> + <key>CFBundleGetInfoString</key> <string>@VBOX_PRODUCT@ Manager @VBOX_VERSION_STRING@, © 2007-@VBOX_C_YEAR@ @VBOX_VENDOR@</string> + <key>CFBundleIconFile</key> <string>virtualbox</string> + <key>CFBundleInfoDictionaryVersion</key> <string>6.0</string> + <key>XPCService</key> + <dict> + <key>RunloopType</key> <string>dispatch_main</string> + <key>ServiceType</key> <string>User</string> + </dict> +</dict> +</plist> diff --git a/src/VBox/NetworkServices/Makefile.kmk b/src/VBox/NetworkServices/Makefile.kmk new file mode 100644 index 00000000..e9737066 --- /dev/null +++ b/src/VBox/NetworkServices/Makefile.kmk @@ -0,0 +1,46 @@ +# $Id: Makefile.kmk $ +## @file +# Top-level makefile for the VBox Network Services. +# + +# +# Copyright (C) 2009-2023 Oracle and/or its affiliates. +# +# This file is part of VirtualBox base platform packages, as +# available from https://www.virtualbox.org. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, in version 3 of the +# License. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <https://www.gnu.org/licenses>. +# +# SPDX-License-Identifier: GPL-3.0-only +# + +SUB_DEPTH = ../../.. +include $(KBUILD_PATH)/subheader.kmk + +# Include sub-makefiles. +include $(PATH_SUB_CURRENT)/Dhcpd/Makefile.kmk + +# VBoxNetNAT requires COM/XPCOM. +if defined(VBOX_WITH_NAT_SERVICE) && defined(VBOX_WITH_MAIN) \ + && "$(intersects $(KBUILD_TARGET_ARCH),$(VBOX_SUPPORTED_HOST_ARCHS))" != "" + include $(PATH_SUB_CURRENT)/NAT/Makefile.kmk +endif + +# Internal network switch +if defined(VBOX_WITH_INTNET_SERVICE_IN_R3) + include $(PATH_SUB_CURRENT)/IntNetSwitch/Makefile.kmk +endif + +include $(FILE_KBUILD_SUB_FOOTER) + diff --git a/src/VBox/NetworkServices/NAT/Makefile.kmk b/src/VBox/NetworkServices/NAT/Makefile.kmk new file mode 100644 index 00000000..6e9a1917 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/Makefile.kmk @@ -0,0 +1,162 @@ +# $Id: Makefile.kmk $ +## @file +# Sub-makefile for NAT Networking +# + +# +# Copyright (C) 2006-2023 Oracle and/or its affiliates. +# +# This file is part of VirtualBox base platform packages, as +# available from https://www.virtualbox.org. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, in version 3 of the +# License. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <https://www.gnu.org/licenses>. +# +# SPDX-License-Identifier: GPL-3.0-only +# + +SUB_DEPTH = ../../../.. +include $(KBUILD_PATH)/subheader.kmk +ifdef VBOX_WITH_LWIP_NAT # entire file + + ifndef LWIP_SOURCES + include $(PATH_SUB_CURRENT)/../../Devices/Network/lwip-new/Config.kmk + endif + + # + # Globals + # + VBOX_PATH_NAT_SRC := $(PATH_SUB_CURRENT) + + + # + # Hardened stub exe for VBoxNetNAT. + # + ifdef VBOX_WITH_HARDENING + PROGRAMS += VBoxNetNATHardened + VBoxNetNATHardened_TEMPLATE = VBoxR3HardenedExe + VBoxNetNATHardened_NAME = VBoxNetNAT + ifdef VBOX_WITH_AUTOMATIC_DEFS_QUOTING + VBoxNetNATHardened_DEFS = SERVICE_NAME="VBoxNetNAT" + else + VBoxNetNATHardened_DEFS = SERVICE_NAME=\"VBoxNetNAT\" + endif + VBoxNetNATHardened_SOURCES = VBoxNetNATHardened.cpp + VBoxNetNATHardened_LDFLAGS.win = /SUBSYSTEM:windows + $(call VBOX_SET_VER_INFO_EXE,VBoxNetNATHardened,VirtualBox NAT Engine,$(VBOX_WINDOWS_ICON_FILE)) # Version info / description. + endif + + + # + # VBoxNetNAT. + # + ifdef VBOX_WITH_HARDENING + DLLS += VBoxNetNAT + else + PROGRAMS += VBoxNetNAT + endif + VBoxNetNAT_TEMPLATE := $(if-expr defined(VBOX_WITH_HARDENING),VBoxMainDll,VBoxMainClientExe) + VBoxNetNAT_NAME := VBoxNetNAT + VBoxNetNAT_DEFS = \ + IPv6 \ + $(if $(VBOX_WITH_INTNET_SERVICE_IN_R3),VBOX_WITH_INTNET_SERVICE_IN_R3,) + # VBoxNetNAT_DEFS.linux = WITH_VALGRIND # instrument lwip memp.c + VBoxNetNAT_DEFS.win = VBOX_COM_OUTOFPROC_MODULE _WIN32_WINNT=0x501 # Windows XP + # Convince Solaris headers to expose socket stuff we need. + # + # Setting _XOPEN_SOURCE to either 500 or 600 would always work, but + # <sys/feature_tests.h> insists that 600 requires C99 and so it + # explodes for older g++. It also insists that 500 is NOT to be used + # with C99. + # + # Newer g++ in C++11 mode (formerly known as C++0x) needs 600, so it + # employs sleight of hand to pretend it's C99 to keep feature test + # happy. + # + # Compile the C code with settings that match g++. This probably + # should be centralized so that whole codebase uses consistent + # settings. + ifeq ($(KBUILD_TARGET),solaris) + ifneq ($(VBOX_GCC_VERSION_CXX),) + if $(VBOX_GCC_VERSION_CXX) >= 40600 + # we compile C++ code with -std=c++0x / -std=c++11 + VBoxNetNAT_CFLAGS.solaris += -std=c99 + VBoxNetNAT_DEFS.solaris += _XOPEN_SOURCE=600 + else + VBoxNetNAT_DEFS.solaris += _XOPEN_SOURCE=500 + endif + endif + VBoxNetNAT_DEFS.solaris += __EXTENSIONS__=1 + endif + + # (current dir is for for lwipopts.h) + VBoxNetNAT_INCS += . $(addprefix ../../Devices/Network/lwip-new/,$(LWIP_INCS)) + + VBoxNetNAT_SOURCES = \ + VBoxNetLwipNAT.cpp \ + ../NetLib/IntNetIf.cpp \ + ../NetLib/VBoxNetPortForwardString.cpp \ + $(addprefix ../../Devices/Network/lwip-new/,$(LWIP_SOURCES)) \ + proxy_pollmgr.c \ + proxy_rtadvd.c \ + proxy.c \ + pxremap.c \ + pxtcp.c \ + pxudp.c \ + pxdns.c \ + fwtcp.c \ + fwudp.c \ + portfwd.c \ + proxy_dhcp6ds.c \ + proxy_tftpd.c + + ifeq ($(KBUILD_TARGET),win) + VBoxNetNAT_SOURCES += pxping_win.c # unprivileged Icmp API + else + VBoxNetNAT_SOURCES += pxping.c # raw sockets + VBoxNetNAT_CFLAGS.linux += -Wno-address-of-packed-member # Needed for GCC 9. + endif + + VBoxNetNAT_SOURCES.darwin += rtmon_bsd.c + VBoxNetNAT_SOURCES.freebsd += rtmon_bsd.c + VBoxNetNAT_SOURCES.linux += rtmon_linux.c + VBoxNetNAT_SOURCES.solaris += rtmon_bsd.c + VBoxNetNAT_SOURCES.win += \ + rtmon_win.c \ + RTWinPoll.cpp \ + RTWinSocketPair.cpp + + VBoxNetNAT_LIBS = \ + $(LIB_RUNTIME) + VBoxNetNAT_LIBS.solaris += socket nsl + + VBoxNetNAT_LDFLAGS.win = /SUBSYSTEM:windows + + # ifeq ($(VBOX_WITH_HARDENING),) + # ifn1of ($(KBUILD_TARGET), darwin win) + # # helper for debugging unprivileged + # VBoxNetNAT_DEFS += VBOX_RAWSOCK_DEBUG_HELPER + # VBoxNetNAT_SOURCES += getrawsock.c + # endif + # endif + + + ifdef VBOX_WITH_HARDENING + $(call VBOX_SET_VER_INFO_DLL,VBoxNetNAT,VirtualBox NAT Engine (dll),$(VBOX_WINDOWS_ICON_FILE)) # Version info / description. + else + $(call VBOX_SET_VER_INFO_EXE,VBoxNetNAT,VirtualBox NAT Engine,$(VBOX_WINDOWS_ICON_FILE)) # Version info / description. + endif + +endif # VBOX_WITH_LWIP_NAT +include $(FILE_KBUILD_SUB_FOOTER) + diff --git a/src/VBox/NetworkServices/NAT/RTWinPoll.cpp b/src/VBox/NetworkServices/NAT/RTWinPoll.cpp new file mode 100644 index 00000000..c1cf2707 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/RTWinPoll.cpp @@ -0,0 +1,164 @@ +/* $Id: RTWinPoll.cpp $ */ +/** @file + * NAT Network - poll(2) implementation for winsock. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include <iprt/asm.h> +#include <iprt/assert.h> +#include <iprt/cdefs.h> +#include <iprt/errcore.h> +#include <iprt/string.h> + +#include <iprt/errcore.h> +#include <VBox/log.h> + +#include <iprt/win/winsock2.h> +#include <iprt/win/windows.h> +#include "winpoll.h" + +static HANDLE g_hNetworkEvent; + +int +RTWinPoll(struct pollfd *pFds, unsigned int nfds, int timeout, int *pNready) +{ + AssertPtrReturn(pFds, VERR_INVALID_PARAMETER); + + if (g_hNetworkEvent == WSA_INVALID_EVENT) + { + g_hNetworkEvent = WSACreateEvent(); + AssertReturn(g_hNetworkEvent != WSA_INVALID_EVENT, VERR_INTERNAL_ERROR); + } + + for (unsigned int i = 0; i < nfds; ++i) + { + long eventMask = 0; + short pollEvents = pFds[i].events; + + /* clean revents */ + pFds[i].revents = 0; + + /* ignore invalid sockets */ + if (pFds[i].fd == INVALID_SOCKET) + continue; + + /* + * POLLIN Data other than high priority data may be read without blocking. + * This is equivalent to ( POLLRDNORM | POLLRDBAND ). + * POLLRDBAND Priority data may be read without blocking. + * POLLRDNORM Normal data may be read without blocking. + */ + if (pollEvents & POLLIN) + eventMask |= FD_READ | FD_ACCEPT; + + /* + * POLLOUT Normal data may be written without blocking. This is equivalent + * to POLLWRNORM. + * POLLWRNORM Normal data may be written without blocking. + */ + if (pollEvents & POLLOUT) + eventMask |= FD_WRITE | FD_CONNECT; + + /* + * This is "moral" equivalent to POLLHUP. + */ + eventMask |= FD_CLOSE; + WSAEventSelect(pFds[i].fd, g_hNetworkEvent, eventMask); + } + + DWORD index = WSAWaitForMultipleEvents(1, + &g_hNetworkEvent, + FALSE, + timeout == RT_INDEFINITE_WAIT ? WSA_INFINITE : timeout, + FALSE); + if (index != WSA_WAIT_EVENT_0) + { + if (index == WSA_WAIT_TIMEOUT) + return VERR_TIMEOUT; + } + + int nready = 0; + for (unsigned int i = 0; i < nfds; ++i) + { + short revents = 0; + WSANETWORKEVENTS NetworkEvents; + int err; + + if (pFds[i].fd == INVALID_SOCKET) + continue; + + RT_ZERO(NetworkEvents); + + err = WSAEnumNetworkEvents(pFds[i].fd, + g_hNetworkEvent, + &NetworkEvents); + + if (err == SOCKET_ERROR) + { + if (WSAGetLastError() == WSAENOTSOCK) + { + pFds[i].revents = POLLNVAL; + ++nready; + } + continue; + } + + /* deassociate socket with event */ + WSAEventSelect(pFds[i].fd, g_hNetworkEvent, 0); + +#define WSA_TO_POLL(_wsaev, _pollev) \ + do { \ + if (NetworkEvents.lNetworkEvents & (_wsaev)) { \ + revents |= (_pollev); \ + if (NetworkEvents.iErrorCode[_wsaev##_BIT] != 0) { \ + Log2(("sock %d: %s: %R[sockerr]\n", \ + pFds[i].fd, #_wsaev, \ + NetworkEvents.iErrorCode[_wsaev##_BIT])); \ + revents |= POLLERR; \ + } \ + } \ + } while (0) + + WSA_TO_POLL(FD_READ, POLLIN); + WSA_TO_POLL(FD_ACCEPT, POLLIN); + WSA_TO_POLL(FD_WRITE, POLLOUT); + WSA_TO_POLL(FD_CONNECT, POLLOUT); + WSA_TO_POLL(FD_CLOSE, POLLHUP | (pFds[i].events & POLLIN)); + + Assert((revents & ~(pFds[i].events | POLLHUP | POLLERR)) == 0); + + if (revents != 0) + { + pFds[i].revents = revents; + ++nready; + } + } + WSAResetEvent(g_hNetworkEvent); + + if (pNready) + *pNready = nready; + + return VINF_SUCCESS; +} diff --git a/src/VBox/NetworkServices/NAT/RTWinSocketPair.cpp b/src/VBox/NetworkServices/NAT/RTWinSocketPair.cpp new file mode 100644 index 00000000..5b405bee --- /dev/null +++ b/src/VBox/NetworkServices/NAT/RTWinSocketPair.cpp @@ -0,0 +1,235 @@ +/* $Id: RTWinSocketPair.cpp $ */ +/** @file + * NAT Network - socketpair(2) emulation for winsock. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include <iprt/asm.h> +#include <iprt/assert.h> +#include <iprt/cdefs.h> +#include <iprt/errcore.h> + +#include <iprt/errcore.h> + +#include <iprt/win/winsock2.h> +#include <iprt/win/windows.h> + +#include <stdio.h> +#include <iprt/log.h> + +extern "C" int RTWinSocketPair(int domain, int type, int protocol, SOCKET socket_vector[2]) +{ + LogFlowFunc(("ENTER: domain:%d, type:%d, protocol:%d, socket_vector:%p\n", + domain, type, protocol, socket_vector)); + switch (domain) + { + case AF_INET: + break; + case AF_INET6: /* I dobt we really need it. */ + default: + AssertMsgFailedReturn(("Unsuported domain:%d\n", domain), + VERR_INVALID_PARAMETER); + } + + switch(type) + { + case SOCK_STREAM: + case SOCK_DGRAM: + break; + default: + AssertMsgFailedReturn(("Unsuported type:%d\n", type), + VERR_INVALID_PARAMETER); + } + + AssertPtrReturn(socket_vector, VERR_INVALID_PARAMETER); + if (!socket_vector) + return VERR_INVALID_PARAMETER; + + socket_vector[0] = socket_vector[1] = INVALID_SOCKET; + + SOCKET listener = INVALID_SOCKET; + + union { + struct sockaddr_in in_addr; + struct sockaddr addr; + } sa[2]; + + int cb = sizeof(sa); + memset(&sa, 0, cb); + + sa[0].in_addr.sin_family = domain; + sa[0].in_addr.sin_addr.s_addr = RT_H2N_U32(INADDR_LOOPBACK); + sa[0].in_addr.sin_port = 0; + cb = sizeof(sa[0]); + + if (type == SOCK_STREAM) + { + listener = WSASocket(domain, type, protocol, 0, NULL, 0); + + if (listener == INVALID_SOCKET) + { + return VERR_INTERNAL_ERROR; + } + + int reuse = 1; + cb = sizeof(int); + int rc = setsockopt(listener, SOL_SOCKET, SO_REUSEADDR, (char *)&reuse, cb); + + if (rc) + { + goto close_socket; + } + + cb = sizeof(sa[0]); + rc = bind(listener, &sa[0].addr, cb); + if(rc) + { + goto close_socket; + } + + memset(&sa[0], 0, cb); + rc = getsockname(listener, &sa[0].addr, &cb); + if (rc) + { + goto close_socket; + } + + rc = listen(listener, 1); + if (rc) + { + goto close_socket; + } + + socket_vector[0] = WSASocket(domain, type, protocol, 0, NULL, 0); + if (socket_vector[0] == INVALID_SOCKET) + { + goto close_socket; + } + + rc = connect(socket_vector[0], &sa[0].addr, cb); + if (rc) + goto close_socket; + + + socket_vector[1] = accept(listener, NULL, NULL); + if (socket_vector[1] == INVALID_SOCKET) + { + goto close_socket; + } + + closesocket(listener); + } + else + { + socket_vector[0] = WSASocket(domain, type, protocol, 0, NULL, 0); + + cb = sizeof(sa[0]); + int rc = bind(socket_vector[0], &sa[0].addr, cb); + Assert(rc != SOCKET_ERROR); + if (rc == SOCKET_ERROR) + { + goto close_socket; + } + + sa[1].in_addr.sin_family = domain; + sa[1].in_addr.sin_addr.s_addr = RT_H2N_U32(INADDR_LOOPBACK); + sa[1].in_addr.sin_port = 0; + + socket_vector[1] = WSASocket(domain, type, protocol, 0, NULL, 0); + rc = bind(socket_vector[1], &sa[1].addr, cb); + Assert(rc != SOCKET_ERROR); + if (rc == SOCKET_ERROR) + { + goto close_socket; + } + + { + u_long mode = 0; + rc = ioctlsocket(socket_vector[0], FIONBIO, &mode); + AssertMsgReturn(rc != SOCKET_ERROR, + ("ioctl error: %d\n", WSAGetLastError()), + VERR_INTERNAL_ERROR); + + rc = ioctlsocket(socket_vector[1], FIONBIO, &mode); + AssertMsgReturn(rc != SOCKET_ERROR, + ("ioctl error: %d\n", WSAGetLastError()), + VERR_INTERNAL_ERROR); + } + + memset(&sa, 0, 2 * cb); + rc = getsockname(socket_vector[0], &sa[0].addr, &cb); + Assert(rc != SOCKET_ERROR); + if (rc == SOCKET_ERROR) + { + goto close_socket; + } + + rc = getsockname(socket_vector[1], &sa[1].addr, &cb); + Assert(rc != SOCKET_ERROR); + if (rc == SOCKET_ERROR) + { + goto close_socket; + } + + rc = connect(socket_vector[0], &sa[1].addr, cb); + Assert(rc != SOCKET_ERROR); + if (rc == SOCKET_ERROR) + { + goto close_socket; + } + + rc = connect(socket_vector[1], &sa[0].addr, cb); + Assert(rc != SOCKET_ERROR); + if (rc == SOCKET_ERROR) + { + goto close_socket; + } + } + + for (int i = 0; i < 2; ++i) { + SOCKET s = socket_vector[i]; + u_long mode = 1; + + int status = ioctlsocket(s, FIONBIO, &mode); + if (status == SOCKET_ERROR) { + LogRel(("FIONBIO: %R[sockerr]\n", WSAGetLastError())); + } + } + + LogFlowFuncLeaveRC(VINF_SUCCESS); + return VINF_SUCCESS; + +close_socket: + if (listener != INVALID_SOCKET) + closesocket(listener); + + if (socket_vector[0] != INVALID_SOCKET) + closesocket(socket_vector[0]); + + if (socket_vector[1] != INVALID_SOCKET) + closesocket(socket_vector[1]); + + LogFlowFuncLeaveRC(VERR_INTERNAL_ERROR); + return VERR_INTERNAL_ERROR; +} diff --git a/src/VBox/NetworkServices/NAT/VBoxNetLwipNAT.cpp b/src/VBox/NetworkServices/NAT/VBoxNetLwipNAT.cpp new file mode 100644 index 00000000..b07d404c --- /dev/null +++ b/src/VBox/NetworkServices/NAT/VBoxNetLwipNAT.cpp @@ -0,0 +1,2352 @@ +/* $Id: VBoxNetLwipNAT.cpp $ */ +/** @file + * VBoxNetNAT - NAT Service for connecting to IntNet. + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* Must be included before winutils.h (lwip/def.h), otherwise Windows build breaks. */ +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" + +#include <VBox/com/assert.h> +#include <VBox/com/com.h> +#include <VBox/com/listeners.h> +#include <VBox/com/string.h> +#include <VBox/com/Guid.h> +#include <VBox/com/array.h> +#include <VBox/com/ErrorInfo.h> +#include <VBox/com/errorprint.h> +#include <VBox/com/VirtualBox.h> +#include <VBox/com/NativeEventQueue.h> + +#include <iprt/net.h> +#include <iprt/initterm.h> +#include <iprt/alloca.h> +#ifndef RT_OS_WINDOWS +# include <arpa/inet.h> +#endif +#include <iprt/err.h> +#include <iprt/time.h> +#include <iprt/timer.h> +#include <iprt/thread.h> +#include <iprt/stream.h> +#include <iprt/path.h> +#include <iprt/param.h> +#include <iprt/pipe.h> +#include <iprt/string.h> +#include <iprt/mem.h> +#include <iprt/message.h> +#include <iprt/req.h> +#include <iprt/file.h> +#include <iprt/semaphore.h> +#include <iprt/cpp/utils.h> +#include <VBox/log.h> + +#include <iprt/buildconfig.h> +#include <iprt/getopt.h> +#include <iprt/process.h> + +#include <VBox/sup.h> +#include <VBox/intnet.h> +#include <VBox/intnetinline.h> +#include <VBox/vmm/pdmnetinline.h> +#include <VBox/vmm/vmm.h> +#include <VBox/version.h> + +#ifndef RT_OS_WINDOWS +# include <sys/poll.h> +# include <sys/socket.h> +# include <netinet/in.h> +# ifdef RT_OS_LINUX +# include <linux/icmp.h> /* ICMP_FILTER */ +# endif +# include <netinet/icmp6.h> +#endif + +#include <map> +#include <vector> +#include <iprt/sanitized/string> + +#include <stdio.h> + +#include "../NetLib/IntNetIf.h" +#include "../NetLib/VBoxPortForwardString.h" + +extern "C" +{ +/* bunch of LWIP headers */ +#include "lwip/sys.h" +#include "lwip/pbuf.h" +#include "lwip/netif.h" +#include "lwip/ethip6.h" +#include "lwip/nd6.h" // for proxy_na_hook +#include "lwip/mld6.h" +#include "lwip/tcpip.h" +#include "netif/etharp.h" + +#include "proxy.h" +#include "pxremap.h" +#include "portfwd.h" +} + +#include "VBoxLwipCore.h" + +#ifdef VBOX_RAWSOCK_DEBUG_HELPER +#if defined(VBOX_WITH_HARDENING) /* obviously */ \ + || defined(RT_OS_WINDOWS) /* not used */ \ + || defined(RT_OS_DARWIN) /* not necessary */ +# error Have you forgotten to turn off VBOX_RAWSOCK_DEBUG_HELPER? +#endif +/* ask the privileged helper to create a raw socket for us */ +extern "C" int getrawsock(int type); +#endif + + + +typedef struct NATSERVICEPORTFORWARDRULE +{ + PORTFORWARDRULE Pfr; + fwspec FWSpec; +} NATSERVICEPORTFORWARDRULE, *PNATSERVICEPORTFORWARDRULE; + +typedef std::vector<NATSERVICEPORTFORWARDRULE> VECNATSERVICEPF; +typedef VECNATSERVICEPF::iterator ITERATORNATSERVICEPF; +typedef VECNATSERVICEPF::const_iterator CITERATORNATSERVICEPF; + + +class VBoxNetLwipNAT +{ + static RTGETOPTDEF s_aGetOptDef[]; + + com::Utf8Str m_strNetworkName; + int m_uVerbosity; + + ComPtr<IVirtualBoxClient> virtualboxClient; + ComPtr<IVirtualBox> virtualbox; + ComPtr<IHost> m_host; + ComPtr<INATNetwork> m_net; + + RTMAC m_MacAddress; + INTNETIFCTX m_hIf; + RTTHREAD m_hThrRecv; + + /** Home folder location; used as default directory for several paths. */ + com::Utf8Str m_strHome; + + struct proxy_options m_ProxyOptions; + struct sockaddr_in m_src4; + struct sockaddr_in6 m_src6; + /** + * place for registered local interfaces. + */ + ip4_lomap m_lo2off[10]; + ip4_lomap_desc m_loOptDescriptor; + + uint16_t m_u16Mtu; + netif m_LwipNetIf; + + VECNATSERVICEPF m_vecPortForwardRule4; + VECNATSERVICEPF m_vecPortForwardRule6; + + class Listener + { + class Adapter; + typedef ListenerImpl<Adapter, VBoxNetLwipNAT *> Impl; + + ComObjPtr<Impl> m_pListenerImpl; + ComPtr<IEventSource> m_pEventSource; + + public: + HRESULT init(VBoxNetLwipNAT *pNAT); + void uninit(); + + template <typename IEventful> + HRESULT listen(const ComPtr<IEventful> &pEventful, + const VBoxEventType_T aEvents[]); + HRESULT unlisten(); + + private: + HRESULT doListen(const ComPtr<IEventSource> &pEventSource, + const VBoxEventType_T aEvents[]); + }; + + Listener m_ListenerNATNet; + Listener m_ListenerVirtualBox; + Listener m_ListenerVBoxClient; + +public: + VBoxNetLwipNAT(); + ~VBoxNetLwipNAT(); + + RTEXITCODE parseArgs(int argc, char *argv[]); + + int init(); + int run(); + void shutdown(); + +private: + RTEXITCODE usage(); + + int initCom(); + int initHome(); + int initLog(); + int initIPv4(); + int initIPv4LoopbackMap(); + int initIPv6(); + int initComEvents(); + + int getExtraData(com::Utf8Str &strValueOut, const char *pcszKey); + + static void reportError(const char *a_pcszFormat, ...) RT_IPRT_FORMAT_ATTR(1, 2); + + static HRESULT reportComError(ComPtr<IUnknown> iface, + const com::Utf8Str &strContext, + HRESULT hrc); + static void reportErrorInfoList(const com::ErrorInfo &info, + const com::Utf8Str &strContext); + static void reportErrorInfo(const com::ErrorInfo &info); + + void initIPv4RawSock(); + void initIPv6RawSock(); + + static DECLCALLBACK(void) onLwipTcpIpInit(void *arg); + static DECLCALLBACK(void) onLwipTcpIpFini(void *arg); + static DECLCALLBACK(err_t) netifInit(netif *pNetif) RT_NOTHROW_PROTO; + + HRESULT HandleEvent(VBoxEventType_T aEventType, IEvent *pEvent); + + const char **getHostNameservers(); + + int fetchNatPortForwardRules(VECNATSERVICEPF &vec, bool fIsIPv6); + static int natServiceProcessRegisteredPf(VECNATSERVICEPF &vecPf); + static int natServicePfRegister(NATSERVICEPORTFORWARDRULE &natServicePf); + + static DECLCALLBACK(int) receiveThread(RTTHREAD hThreadSelf, void *pvUser); + + /* input from intnet */ + static DECLCALLBACK(void) processFrame(void *pvUser, void *pvFrame, uint32_t cbFrame); + + /* output to intnet */ + static DECLCALLBACK(err_t) netifLinkoutput(netif *pNetif, pbuf *pBuf) RT_NOTHROW_PROTO; +}; + + + +VBoxNetLwipNAT::VBoxNetLwipNAT() + : m_uVerbosity(0), + m_hThrRecv(NIL_RTTHREAD) +{ + LogFlowFuncEnter(); + + RT_ZERO(m_ProxyOptions.ipv4_addr); + RT_ZERO(m_ProxyOptions.ipv4_mask); + RT_ZERO(m_ProxyOptions.ipv6_addr); + m_ProxyOptions.ipv6_enabled = 0; + m_ProxyOptions.ipv6_defroute = 0; + m_ProxyOptions.icmpsock4 = INVALID_SOCKET; + m_ProxyOptions.icmpsock6 = INVALID_SOCKET; + m_ProxyOptions.tftp_root = NULL; + m_ProxyOptions.src4 = NULL; + m_ProxyOptions.src6 = NULL; + RT_ZERO(m_src4); + RT_ZERO(m_src6); + m_src4.sin_family = AF_INET; + m_src6.sin6_family = AF_INET6; +#if HAVE_SA_LEN + m_src4.sin_len = sizeof(m_src4); + m_src6.sin6_len = sizeof(m_src6); +#endif + m_ProxyOptions.lomap_desc = NULL; + m_ProxyOptions.nameservers = NULL; + + m_LwipNetIf.name[0] = 'N'; + m_LwipNetIf.name[1] = 'T'; + + m_MacAddress.au8[0] = 0x52; + m_MacAddress.au8[1] = 0x54; + m_MacAddress.au8[2] = 0; + m_MacAddress.au8[3] = 0x12; + m_MacAddress.au8[4] = 0x35; + m_MacAddress.au8[5] = 0; + + RT_ZERO(m_lo2off); + m_loOptDescriptor.lomap = NULL; + m_loOptDescriptor.num_lomap = 0; + + LogFlowFuncLeave(); +} + + +VBoxNetLwipNAT::~VBoxNetLwipNAT() +{ + if (m_ProxyOptions.tftp_root) + { + RTStrFree((char *)m_ProxyOptions.tftp_root); + m_ProxyOptions.tftp_root = NULL; + } + if (m_ProxyOptions.nameservers) + { + const char **pv = m_ProxyOptions.nameservers; + while (*pv) + { + RTStrFree((char*)*pv); + pv++; + } + RTMemFree(m_ProxyOptions.nameservers); + m_ProxyOptions.nameservers = NULL; + } +} + + +/** + * Command line options. + */ +RTGETOPTDEF VBoxNetLwipNAT::s_aGetOptDef[] = +{ + { "--network", 'n', RTGETOPT_REQ_STRING }, + { "--verbose", 'v', RTGETOPT_REQ_NOTHING }, +}; + + +/** Icky hack to tell the caller it should exit with RTEXITCODE_SUCCESS */ +#define RTEXITCODE_DONE RTEXITCODE_32BIT_HACK + +RTEXITCODE +VBoxNetLwipNAT::usage() +{ + RTPrintf("%s Version %sr%u\n" + "Copyright (C) 2009-" VBOX_C_YEAR " " VBOX_VENDOR "\n" + "\n" + "Usage: %s <options>\n" + "\n" + "Options:\n", + RTProcShortName(), RTBldCfgVersion(), RTBldCfgRevision(), + RTProcShortName()); + for (size_t i = 0; i < RT_ELEMENTS(s_aGetOptDef); ++i) + RTPrintf(" -%c, %s\n", s_aGetOptDef[i].iShort, s_aGetOptDef[i].pszLong); + + return RTEXITCODE_DONE; +} + + +RTEXITCODE +VBoxNetLwipNAT::parseArgs(int argc, char *argv[]) +{ + unsigned int uVerbosity = 0; + int rc; + + RTGETOPTSTATE State; + rc = RTGetOptInit(&State, argc, argv, + s_aGetOptDef, RT_ELEMENTS(s_aGetOptDef), + 1, 0); + + int ch; + RTGETOPTUNION Val; + while ((ch = RTGetOpt(&State, &Val)) != 0) + { + switch (ch) + { + case 'n': /* --network */ + if (m_strNetworkName.isNotEmpty()) + return RTMsgErrorExit(RTEXITCODE_SYNTAX, "multiple --network options"); + m_strNetworkName = Val.psz; + break; + + case 'v': /* --verbose */ + ++uVerbosity; + break; + + + /* + * Standard options recognized by RTGetOpt() + */ + + case 'V': /* --version */ + RTPrintf("%sr%u\n", RTBldCfgVersion(), RTBldCfgRevision()); + return RTEXITCODE_DONE; + + case 'h': /* --help */ + return usage(); + + case VINF_GETOPT_NOT_OPTION: + return RTMsgErrorExit(RTEXITCODE_SYNTAX, "unexpected non-option argument"); + + default: + return RTGetOptPrintError(ch, &Val); + } + } + + if (m_strNetworkName.isEmpty()) + return RTMsgErrorExit(RTEXITCODE_SYNTAX, "missing --network option"); + + m_uVerbosity = uVerbosity; + return RTEXITCODE_SUCCESS; +} + + +/** + * Perform actual initialization. + * + * This code runs on the main thread. Establish COM connection with + * VBoxSVC so that we can do API calls. Starts the LWIP thread. + */ +int VBoxNetLwipNAT::init() +{ + HRESULT hrc; + int rc; + + LogFlowFuncEnter(); + + /* Get the COM API set up. */ + rc = initCom(); + if (RT_FAILURE(rc)) + return rc; + + /* Get the home folder location. It's ok if it fails. */ + initHome(); + + /* + * We get the network name on the command line. Get hold of its + * API object to get the rest of the configuration from. + */ + hrc = virtualbox->FindNATNetworkByName(com::Bstr(m_strNetworkName).raw(), + m_net.asOutParam()); + if (FAILED(hrc)) + { + reportComError(virtualbox, "FindNATNetworkByName", hrc); + return VERR_NOT_FOUND; + } + + /* + * Now that we know the network name and have ensured that it + * indeed exists we can create the release log file. + */ + initLog(); + + // resolver changes are reported on vbox but are retrieved from + // host so stash a pointer for future lookups + hrc = virtualbox->COMGETTER(Host)(m_host.asOutParam()); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + + + /* Get the settings related to IPv4. */ + rc = initIPv4(); + if (RT_FAILURE(rc)) + return rc; + + /* Get the settings related to IPv6. */ + rc = initIPv6(); + if (RT_FAILURE(rc)) + return rc; + + + fetchNatPortForwardRules(m_vecPortForwardRule4, /* :fIsIPv6 */ false); + if (m_ProxyOptions.ipv6_enabled) + fetchNatPortForwardRules(m_vecPortForwardRule6, /* :fIsIPv6 */ true); + + + if (m_strHome.isNotEmpty()) + { + com::Utf8StrFmt strTftpRoot("%s%c%s", m_strHome.c_str(), RTPATH_DELIMITER, "TFTP"); + char *pszStrTemp; // avoid const char ** vs char ** + rc = RTStrUtf8ToCurrentCP(&pszStrTemp, strTftpRoot.c_str()); + AssertRC(rc); + m_ProxyOptions.tftp_root = pszStrTemp; + } + + m_ProxyOptions.nameservers = getHostNameservers(); + + initComEvents(); + /* end of COM initialization */ + + /* connect to the intnet */ + rc = IntNetR3IfCreate(&m_hIf, m_strNetworkName.c_str()); + if (RT_SUCCESS(rc)) + rc = IntNetR3IfSetActive(m_hIf, true /*fActive*/); + + LogFlowFuncLeaveRC(rc); + return rc; +} + + +/** + * Primary COM initialization performed on the main thread. + * + * This initializes COM and obtains VirtualBox Client and VirtualBox + * objects. + * + * @note The member variables for them are in the base class. We + * currently do it here so that we can report errors properly, because + * the base class' VBoxNetBaseService::init() is a bit naive and + * fixing that would just create unnecessary churn for little + * immediate gain. It's easier to ignore the base class code and do + * it ourselves and do the refactoring later. + */ +int VBoxNetLwipNAT::initCom() +{ + HRESULT hrc; + + hrc = com::Initialize(); + if (FAILED(hrc)) + { +#ifdef VBOX_WITH_XPCOM + if (hrc == NS_ERROR_FILE_ACCESS_DENIED) + { + char szHome[RTPATH_MAX] = ""; + int vrc = com::GetVBoxUserHomeDirectory(szHome, sizeof(szHome), false); + if (RT_SUCCESS(vrc)) + { + return RTMsgErrorExit(RTEXITCODE_INIT, + "Failed to initialize COM: %s: %Rhrf", + szHome, hrc); + } + } +#endif /* VBOX_WITH_XPCOM */ + return RTMsgErrorExit(RTEXITCODE_INIT, + "Failed to initialize COM: %Rhrf", hrc); + } + + hrc = virtualboxClient.createInprocObject(CLSID_VirtualBoxClient); + if (FAILED(hrc)) + { + reportError("Failed to create VirtualBox Client object: %Rhra", hrc); + return VERR_GENERAL_FAILURE; + } + + hrc = virtualboxClient->COMGETTER(VirtualBox)(virtualbox.asOutParam()); + if (FAILED(hrc)) + { + reportError("Failed to obtain VirtualBox object: %Rhra", hrc); + return VERR_GENERAL_FAILURE; + } + + return VINF_SUCCESS; +} + + +/** + * Get the VirtualBox home folder. + * + * It is used as the base directory for the default release log file + * and for the TFTP root location. + */ +int VBoxNetLwipNAT::initHome() +{ + HRESULT hrc; + int rc; + + com::Bstr bstrHome; + hrc = virtualbox->COMGETTER(HomeFolder)(bstrHome.asOutParam()); + if (SUCCEEDED(hrc)) + { + m_strHome = bstrHome; + return VINF_SUCCESS; + } + + /* + * In the unlikely event that we have failed to retrieve + * HomeFolder via the API, try the fallback method. Note that + * despite "com" namespace it does not use COM. + */ + char szHome[RTPATH_MAX] = ""; + rc = com::GetVBoxUserHomeDirectory(szHome, sizeof(szHome), false); + if (RT_SUCCESS(rc)) + { + m_strHome = szHome; + return VINF_SUCCESS; + } + + return rc; +} + + +/* + * Read IPv4 related settings and do necessary initialization. These + * settings will be picked up by the proxy on the lwIP thread. See + * onLwipTcpIpInit(). + */ +int VBoxNetLwipNAT::initIPv4() +{ + HRESULT hrc; + int rc; + + AssertReturn(m_net.isNotNull(), VERR_GENERAL_FAILURE); + + + /* + * IPv4 address and mask. + */ + com::Bstr bstrIPv4Prefix; + hrc = m_net->COMGETTER(Network)(bstrIPv4Prefix.asOutParam()); + if (FAILED(hrc)) + { + reportComError(m_net, "Network", hrc); + return VERR_GENERAL_FAILURE; + } + + RTNETADDRIPV4 Net4, Mask4; + int iPrefixLength; + rc = RTNetStrToIPv4Cidr(com::Utf8Str(bstrIPv4Prefix).c_str(), + &Net4, &iPrefixLength); + if (RT_FAILURE(rc)) + { + reportError("Failed to parse IPv4 prefix %ls\n", bstrIPv4Prefix.raw()); + return rc; + } + + if (iPrefixLength > 30 || 0 >= iPrefixLength) + { + reportError("Invalid IPv4 prefix length %d\n", iPrefixLength); + return VERR_INVALID_PARAMETER; + } + + rc = RTNetPrefixToMaskIPv4(iPrefixLength, &Mask4); + AssertRCReturn(rc, rc); + + /** @todo r=uwe Check the address is unicast, not a loopback, etc. */ + + RTNETADDRIPV4 Addr4; + Addr4.u = Net4.u | RT_H2N_U32_C(0x00000001); + + memcpy(&m_ProxyOptions.ipv4_addr, &Addr4, sizeof(ip_addr)); + memcpy(&m_ProxyOptions.ipv4_mask, &Mask4, sizeof(ip_addr)); + + + /* Raw socket for ICMP. */ + initIPv4RawSock(); + + + /* IPv4 source address (host), if configured. */ + com::Utf8Str strSourceIp4; + rc = getExtraData(strSourceIp4, "SourceIp4"); + if (RT_SUCCESS(rc) && strSourceIp4.isNotEmpty()) + { + RTNETADDRIPV4 addr; + rc = RTNetStrToIPv4Addr(strSourceIp4.c_str(), &addr); + if (RT_SUCCESS(rc)) + { + m_src4.sin_addr.s_addr = addr.u; + m_ProxyOptions.src4 = &m_src4; + + LogRel(("Will use %RTnaipv4 as IPv4 source address\n", + m_src4.sin_addr.s_addr)); + } + else + { + LogRel(("Failed to parse \"%s\" IPv4 source address specification\n", + strSourceIp4.c_str())); + } + } + + /* Make host's loopback(s) available from inside the natnet */ + initIPv4LoopbackMap(); + + return VINF_SUCCESS; +} + + +/** + * Create raw IPv4 socket for sending and snooping ICMP. + */ +void VBoxNetLwipNAT::initIPv4RawSock() +{ + SOCKET icmpsock4 = INVALID_SOCKET; + +#ifndef RT_OS_DARWIN + const int icmpstype = SOCK_RAW; +#else + /* on OS X it's not privileged */ + const int icmpstype = SOCK_DGRAM; +#endif + + icmpsock4 = socket(AF_INET, icmpstype, IPPROTO_ICMP); + if (icmpsock4 == INVALID_SOCKET) + { + perror("IPPROTO_ICMP"); +#ifdef VBOX_RAWSOCK_DEBUG_HELPER + icmpsock4 = getrawsock(AF_INET); +#endif + } + + if (icmpsock4 != INVALID_SOCKET) + { +#ifdef ICMP_FILTER // Linux specific + struct icmp_filter flt = { + ~(uint32_t)( + (1U << ICMP_ECHOREPLY) + | (1U << ICMP_DEST_UNREACH) + | (1U << ICMP_TIME_EXCEEDED) + ) + }; + + int status = setsockopt(icmpsock4, SOL_RAW, ICMP_FILTER, + &flt, sizeof(flt)); + if (status < 0) + { + perror("ICMP_FILTER"); + } +#endif + } + + m_ProxyOptions.icmpsock4 = icmpsock4; +} + + +/** + * Init mapping from the natnet's IPv4 addresses to host's IPv4 + * loopbacks. Plural "loopbacks" because it's now quite common to run + * services on loopback addresses other than 127.0.0.1. E.g. a + * caching dns proxy on 127.0.1.1 or 127.0.0.53. + */ +int VBoxNetLwipNAT::initIPv4LoopbackMap() +{ + HRESULT hrc; + int rc; + + com::SafeArray<BSTR> aStrLocalMappings; + hrc = m_net->COMGETTER(LocalMappings)(ComSafeArrayAsOutParam(aStrLocalMappings)); + if (FAILED(hrc)) + { + reportComError(m_net, "LocalMappings", hrc); + return VERR_GENERAL_FAILURE; + } + + if (aStrLocalMappings.size() == 0) + return VINF_SUCCESS; + + + /* netmask in host order, to verify the offsets */ + uint32_t uMask = RT_N2H_U32(ip4_addr_get_u32(&m_ProxyOptions.ipv4_mask)); + + + /* + * Process mappings of the form "127.x.y.z=off" + */ + unsigned int dst = 0; /* typeof(ip4_lomap_desc::num_lomap) */ + for (size_t i = 0; i < aStrLocalMappings.size(); ++i) + { + com::Utf8Str strMapping(aStrLocalMappings[i]); + const char *pcszRule = strMapping.c_str(); + LogRel(("IPv4 loopback mapping %zu: %s\n", i, pcszRule)); + + RTNETADDRIPV4 Loopback4; + char *pszNext; + rc = RTNetStrToIPv4AddrEx(pcszRule, &Loopback4, &pszNext); + if (RT_FAILURE(rc)) + { + LogRel(("Failed to parse IPv4 address: %Rra\n", rc)); + continue; + } + + if (Loopback4.au8[0] != 127) + { + LogRel(("Not an IPv4 loopback address\n")); + continue; + } + + if (rc != VWRN_TRAILING_CHARS) + { + LogRel(("Missing right hand side\n")); + continue; + } + + pcszRule = RTStrStripL(pszNext); + if (*pcszRule != '=') + { + LogRel(("Invalid rule format\n")); + continue; + } + + pcszRule = RTStrStripL(pcszRule+1); + if (*pszNext == '\0') + { + LogRel(("Empty right hand side\n")); + continue; + } + + uint32_t u32Offset; + rc = RTStrToUInt32Ex(pcszRule, &pszNext, 10, &u32Offset); + if (rc != VINF_SUCCESS && rc != VWRN_TRAILING_SPACES) + { + LogRel(("Invalid offset\n")); + continue; + } + + if (u32Offset <= 1 || u32Offset == ~uMask) + { + LogRel(("Offset maps to a reserved address\n")); + continue; + } + + if ((u32Offset & uMask) != 0) + { + LogRel(("Offset exceeds the network size\n")); + continue; + } + + if (dst >= RT_ELEMENTS(m_lo2off)) + { + LogRel(("Ignoring the mapping, too many mappings already\n")); + continue; + } + + ip4_addr_set_u32(&m_lo2off[dst].loaddr, Loopback4.u); + m_lo2off[dst].off = u32Offset; + ++dst; + } + + if (dst > 0) + { + m_loOptDescriptor.lomap = m_lo2off; + m_loOptDescriptor.num_lomap = dst; + m_ProxyOptions.lomap_desc = &m_loOptDescriptor; + } + + return VINF_SUCCESS; +} + + +/* + * Read IPv6 related settings and do necessary initialization. These + * settings will be picked up by the proxy on the lwIP thread. See + * onLwipTcpIpInit(). + */ +int VBoxNetLwipNAT::initIPv6() +{ + HRESULT hrc; + int rc; + + AssertReturn(m_net.isNotNull(), VERR_GENERAL_FAILURE); + + + /* Is IPv6 enabled for this network at all? */ + BOOL fIPv6Enabled = FALSE; + hrc = m_net->COMGETTER(IPv6Enabled)(&fIPv6Enabled); + if (FAILED(hrc)) + { + reportComError(m_net, "IPv6Enabled", hrc); + return VERR_GENERAL_FAILURE; + } + + m_ProxyOptions.ipv6_enabled = !!fIPv6Enabled; + if (!fIPv6Enabled) + return VINF_SUCCESS; + + + /* + * IPv6 address. + */ + com::Bstr bstrIPv6Prefix; + hrc = m_net->COMGETTER(IPv6Prefix)(bstrIPv6Prefix.asOutParam()); + if (FAILED(hrc)) + { + reportComError(m_net, "IPv6Prefix", hrc); + return VERR_GENERAL_FAILURE; + } + + RTNETADDRIPV6 Net6; + int iPrefixLength; + rc = RTNetStrToIPv6Cidr(com::Utf8Str(bstrIPv6Prefix).c_str(), + &Net6, &iPrefixLength); + if (RT_FAILURE(rc)) + { + reportError("Failed to parse IPv6 prefix %ls\n", bstrIPv6Prefix.raw()); + return rc; + } + + /* Allow both addr:: and addr::/64 */ + if (iPrefixLength == 128) /* no length was specified after the address? */ + iPrefixLength = 64; /* take it to mean /64 which we require anyway */ + else if (iPrefixLength != 64) + { + reportError("Invalid IPv6 prefix length %d," + " must be 64.\n", iPrefixLength); + return rc; + } + + /* Verify the address is unicast. */ + if ( ((Net6.au8[0] & 0xe0) != 0x20) /* global 2000::/3 */ + && ((Net6.au8[0] & 0xfe) != 0xfc)) /* local fc00::/7 */ + { + reportError("IPv6 prefix %RTnaipv6 is not unicast.\n", &Net6); + return VERR_INVALID_PARAMETER; + } + + /* Verify the interfaces ID part is zero */ + if (Net6.au64[1] != 0) + { + reportError("Non-zero bits in the interface ID part" + " of the IPv6 prefix %RTnaipv6/64.\n", &Net6); + return VERR_INVALID_PARAMETER; + } + + /* Use ...::1 as our address */ + RTNETADDRIPV6 Addr6 = Net6; + Addr6.au8[15] = 0x01; + memcpy(&m_ProxyOptions.ipv6_addr, &Addr6, sizeof(ip6_addr_t)); + + + /* + * Should we advertise ourselves as default IPv6 route? If the + * host doesn't have IPv6 connectivity, it's probably better not + * to, to prevent the guest from IPv6 connection attempts doomed + * to fail. + * + * We might want to make this modifiable while the natnet is + * running. + */ + BOOL fIPv6DefaultRoute = FALSE; + hrc = m_net->COMGETTER(AdvertiseDefaultIPv6RouteEnabled)(&fIPv6DefaultRoute); + if (FAILED(hrc)) + { + reportComError(m_net, "AdvertiseDefaultIPv6RouteEnabled", hrc); + return VERR_GENERAL_FAILURE; + } + + m_ProxyOptions.ipv6_defroute = fIPv6DefaultRoute; + + + /* Raw socket for ICMP. */ + initIPv6RawSock(); + + + /* IPv6 source address, if configured. */ + com::Utf8Str strSourceIp6; + rc = getExtraData(strSourceIp6, "SourceIp6"); + if (RT_SUCCESS(rc) && strSourceIp6.isNotEmpty()) + { + RTNETADDRIPV6 addr; + char *pszZone = NULL; + rc = RTNetStrToIPv6Addr(strSourceIp6.c_str(), &addr, &pszZone); + if (RT_SUCCESS(rc)) + { + memcpy(&m_src6.sin6_addr, &addr, sizeof(addr)); + m_ProxyOptions.src6 = &m_src6; + + LogRel(("Will use %RTnaipv6 as IPv6 source address\n", + &m_src6.sin6_addr)); + } + else + { + LogRel(("Failed to parse \"%s\" IPv6 source address specification\n", + strSourceIp6.c_str())); + } + } + + return VINF_SUCCESS; +} + + +/** + * Create raw IPv6 socket for sending and snooping ICMP6. + */ +void VBoxNetLwipNAT::initIPv6RawSock() +{ + SOCKET icmpsock6 = INVALID_SOCKET; + +#ifndef RT_OS_DARWIN + const int icmpstype = SOCK_RAW; +#else + /* on OS X it's not privileged */ + const int icmpstype = SOCK_DGRAM; +#endif + + icmpsock6 = socket(AF_INET6, icmpstype, IPPROTO_ICMPV6); + if (icmpsock6 == INVALID_SOCKET) + { + perror("IPPROTO_ICMPV6"); +#ifdef VBOX_RAWSOCK_DEBUG_HELPER + icmpsock6 = getrawsock(AF_INET6); +#endif + } + + if (icmpsock6 != INVALID_SOCKET) + { +#ifdef ICMP6_FILTER // Windows doesn't support RFC 3542 API + /* + * XXX: We do this here for now, not in pxping.c, to avoid + * name clashes between lwIP and system headers. + */ + struct icmp6_filter flt; + ICMP6_FILTER_SETBLOCKALL(&flt); + + ICMP6_FILTER_SETPASS(ICMP6_ECHO_REPLY, &flt); + + ICMP6_FILTER_SETPASS(ICMP6_DST_UNREACH, &flt); + ICMP6_FILTER_SETPASS(ICMP6_PACKET_TOO_BIG, &flt); + ICMP6_FILTER_SETPASS(ICMP6_TIME_EXCEEDED, &flt); + ICMP6_FILTER_SETPASS(ICMP6_PARAM_PROB, &flt); + + int status = setsockopt(icmpsock6, IPPROTO_ICMPV6, ICMP6_FILTER, + &flt, sizeof(flt)); + if (status < 0) + { + perror("ICMP6_FILTER"); + } +#endif + } + + m_ProxyOptions.icmpsock6 = icmpsock6; +} + + + +/** + * Adapter for the ListenerImpl template. It has to be a separate + * object because ListenerImpl deletes it. Just a small wrapper that + * delegates the real work back to VBoxNetLwipNAT. + */ +class VBoxNetLwipNAT::Listener::Adapter +{ + VBoxNetLwipNAT *m_pNAT; +public: + Adapter() : m_pNAT(NULL) {} + HRESULT init() { return init(NULL); } + void uninit() { m_pNAT = NULL; } + + HRESULT init(VBoxNetLwipNAT *pNAT) + { + m_pNAT = pNAT; + return S_OK; + } + + HRESULT HandleEvent(VBoxEventType_T aEventType, IEvent *pEvent) + { + if (RT_LIKELY(m_pNAT != NULL)) + return m_pNAT->HandleEvent(aEventType, pEvent); + else + return S_OK; + } +}; + + +HRESULT +VBoxNetLwipNAT::Listener::init(VBoxNetLwipNAT *pNAT) +{ + HRESULT hrc; + + hrc = m_pListenerImpl.createObject(); + if (FAILED(hrc)) + return hrc; + + hrc = m_pListenerImpl->init(new Adapter(), pNAT); + if (FAILED(hrc)) + { + VBoxNetLwipNAT::reportComError(m_pListenerImpl, "init", hrc); + return hrc; + } + + return hrc; +} + + +void +VBoxNetLwipNAT::Listener::uninit() +{ + unlisten(); + m_pListenerImpl.setNull(); +} + + +/* + * There's no base interface that exposes "eventSource" so fake it + * with a template. + */ +template <typename IEventful> +HRESULT +VBoxNetLwipNAT::Listener::listen(const ComPtr<IEventful> &pEventful, + const VBoxEventType_T aEvents[]) +{ + HRESULT hrc; + + if (m_pListenerImpl.isNull()) + return S_OK; + + ComPtr<IEventSource> pEventSource; + hrc = pEventful->COMGETTER(EventSource)(pEventSource.asOutParam()); + if (FAILED(hrc)) + { + VBoxNetLwipNAT::reportComError(pEventful, "EventSource", hrc); + return hrc; + } + + /* got a real interface, punt to the non-template code */ + hrc = doListen(pEventSource, aEvents); + if (FAILED(hrc)) + return hrc; + + return hrc; +} + + +HRESULT +VBoxNetLwipNAT::Listener::doListen(const ComPtr<IEventSource> &pEventSource, + const VBoxEventType_T aEvents[]) +{ + HRESULT hrc; + + com::SafeArray<VBoxEventType_T> aInteresting; + for (size_t i = 0; aEvents[i] != VBoxEventType_Invalid; ++i) + aInteresting.push_back(aEvents[i]); + + BOOL fActive = true; + hrc = pEventSource->RegisterListener(m_pListenerImpl, + ComSafeArrayAsInParam(aInteresting), + fActive); + if (FAILED(hrc)) + { + VBoxNetLwipNAT::reportComError(m_pEventSource, "RegisterListener", hrc); + return hrc; + } + + m_pEventSource = pEventSource; + return hrc; +} + + +HRESULT +VBoxNetLwipNAT::Listener::unlisten() +{ + HRESULT hrc; + + if (m_pEventSource.isNull()) + return S_OK; + + const ComPtr<IEventSource> pEventSource = m_pEventSource; + m_pEventSource.setNull(); + + hrc = pEventSource->UnregisterListener(m_pListenerImpl); + if (FAILED(hrc)) + { + VBoxNetLwipNAT::reportComError(pEventSource, "UnregisterListener", hrc); + return hrc; + } + + return hrc; +} + + + +/** + * Create and register API event listeners. + */ +int VBoxNetLwipNAT::initComEvents() +{ + /** + * @todo r=uwe These events are reported on both IVirtualBox and + * INATNetwork objects. We used to listen for them on our + * network, but it was changed later to listen on vbox. Leave it + * that way for now. Note that HandleEvent() has to do additional + * check for them to ignore events for other networks. + */ + static const VBoxEventType_T s_aNATNetEvents[] = { + VBoxEventType_OnNATNetworkPortForward, + VBoxEventType_OnNATNetworkSetting, + VBoxEventType_Invalid + }; + m_ListenerNATNet.init(this); + m_ListenerNATNet.listen(virtualbox, s_aNATNetEvents); // sic! + + static const VBoxEventType_T s_aVirtualBoxEvents[] = { + VBoxEventType_OnHostNameResolutionConfigurationChange, + VBoxEventType_OnNATNetworkStartStop, + VBoxEventType_Invalid + }; + m_ListenerVirtualBox.init(this); + m_ListenerVirtualBox.listen(virtualbox, s_aVirtualBoxEvents); + + static const VBoxEventType_T s_aVBoxClientEvents[] = { + VBoxEventType_OnVBoxSVCAvailabilityChanged, + VBoxEventType_Invalid + }; + m_ListenerVBoxClient.init(this); + m_ListenerVBoxClient.listen(virtualboxClient, s_aVBoxClientEvents); + + return VINF_SUCCESS; +} + + +/** + * Perform lwIP initialization on the lwIP "tcpip" thread. + * + * The lwIP thread was created in init() and this function is run + * before the main lwIP loop is started. It is responsible for + * setting up lwIP state, configuring interface(s), etc. + a*/ +/*static*/ +DECLCALLBACK(void) VBoxNetLwipNAT::onLwipTcpIpInit(void *arg) +{ + AssertPtrReturnVoid(arg); + VBoxNetLwipNAT *self = static_cast<VBoxNetLwipNAT *>(arg); + + HRESULT hrc = com::Initialize(); + AssertComRCReturnVoid(hrc); + + proxy_arp_hook = pxremap_proxy_arp; + proxy_ip4_divert_hook = pxremap_ip4_divert; + + proxy_na_hook = pxremap_proxy_na; + proxy_ip6_divert_hook = pxremap_ip6_divert; + + netif *pNetif = netif_add(&self->m_LwipNetIf /* Lwip Interface */, + &self->m_ProxyOptions.ipv4_addr, /* IP address*/ + &self->m_ProxyOptions.ipv4_mask, /* Network mask */ + &self->m_ProxyOptions.ipv4_addr, /* XXX: Gateway address */ + self /* state */, + VBoxNetLwipNAT::netifInit /* netif_init_fn */, + tcpip_input /* netif_input_fn */); + + AssertPtrReturnVoid(pNetif); + + LogRel(("netif %c%c%d: mac %RTmac\n", + pNetif->name[0], pNetif->name[1], pNetif->num, + pNetif->hwaddr)); + LogRel(("netif %c%c%d: inet %RTnaipv4 netmask %RTnaipv4\n", + pNetif->name[0], pNetif->name[1], pNetif->num, + pNetif->ip_addr, pNetif->netmask)); + for (int i = 0; i < LWIP_IPV6_NUM_ADDRESSES; ++i) { + if (!ip6_addr_isinvalid(netif_ip6_addr_state(pNetif, i))) { + LogRel(("netif %c%c%d: inet6 %RTnaipv6\n", + pNetif->name[0], pNetif->name[1], pNetif->num, + netif_ip6_addr(pNetif, i))); + } + } + + netif_set_up(pNetif); + netif_set_link_up(pNetif); + + if (self->m_ProxyOptions.ipv6_enabled) { + /* + * XXX: lwIP currently only ever calls mld6_joingroup() in + * nd6_tmr() for fresh tentative addresses, which is a wrong place + * to do it - but I'm not keen on fixing this properly for now + * (with correct handling of interface up and down transitions, + * etc). So stick it here as a kludge. + */ + for (int i = 0; i <= 1; ++i) { + ip6_addr_t *paddr = netif_ip6_addr(pNetif, i); + + ip6_addr_t solicited_node_multicast_address; + ip6_addr_set_solicitednode(&solicited_node_multicast_address, + paddr->addr[3]); + mld6_joingroup(paddr, &solicited_node_multicast_address); + } + + /* + * XXX: We must join the solicited-node multicast for the + * addresses we do IPv6 NA-proxy for. We map IPv6 loopback to + * proxy address + 1. We only need the low 24 bits, and those are + * fixed. + */ + { + ip6_addr_t solicited_node_multicast_address; + + ip6_addr_set_solicitednode(&solicited_node_multicast_address, + /* last 24 bits of the address */ + PP_HTONL(0x00000002)); + mld6_netif_joingroup(pNetif, &solicited_node_multicast_address); + } + } + + proxy_init(&self->m_LwipNetIf, &self->m_ProxyOptions); + + natServiceProcessRegisteredPf(self->m_vecPortForwardRule4); + natServiceProcessRegisteredPf(self->m_vecPortForwardRule6); +} + + +/** + * lwIP's callback to configure the interface. + * + * Called from onLwipTcpIpInit() via netif_add(). Called after the + * initerface is mostly initialized, and its IPv4 address is already + * configured. Here we still need to configure the MAC address and + * IPv6 addresses. It's best to consult the source of netif_add() for + * the exact details. + */ +/* static */ DECLCALLBACK(err_t) +VBoxNetLwipNAT::netifInit(netif *pNetif) RT_NOTHROW_DEF +{ + err_t rcLwip = ERR_OK; + + AssertPtrReturn(pNetif, ERR_ARG); + + VBoxNetLwipNAT *self = static_cast<VBoxNetLwipNAT *>(pNetif->state); + AssertPtrReturn(self, ERR_ARG); + + LogFlowFunc(("ENTER: pNetif[%c%c%d]\n", pNetif->name[0], pNetif->name[1], pNetif->num)); + /* validity */ + AssertReturn( pNetif->name[0] == 'N' + && pNetif->name[1] == 'T', ERR_ARG); + + + pNetif->hwaddr_len = sizeof(RTMAC); + memcpy(pNetif->hwaddr, &self->m_MacAddress, sizeof(RTMAC)); + + self->m_u16Mtu = 1500; // XXX: FIXME + pNetif->mtu = self->m_u16Mtu; + + pNetif->flags = NETIF_FLAG_BROADCAST + | NETIF_FLAG_ETHARP /* Don't bother driver with ARP and let Lwip resolve ARP handling */ + | NETIF_FLAG_ETHERNET; /* Lwip works with ethernet too */ + + pNetif->linkoutput = netifLinkoutput; /* ether-level-pipe */ + pNetif->output = etharp_output; /* ip-pipe */ + + if (self->m_ProxyOptions.ipv6_enabled) { + pNetif->output_ip6 = ethip6_output; + + /* IPv6 link-local address in slot 0 */ + netif_create_ip6_linklocal_address(pNetif, /* :from_mac_48bit */ 1); + netif_ip6_addr_set_state(pNetif, 0, IP6_ADDR_PREFERRED); // skip DAD + + /* INATNetwork::IPv6Prefix in slot 1 */ + memcpy(netif_ip6_addr(pNetif, 1), + &self->m_ProxyOptions.ipv6_addr, sizeof(ip6_addr_t)); + netif_ip6_addr_set_state(pNetif, 1, IP6_ADDR_PREFERRED); + +#if LWIP_IPV6_SEND_ROUTER_SOLICIT + pNetif->rs_count = 0; +#endif + } + + LogFlowFunc(("LEAVE: %d\n", rcLwip)); + return rcLwip; +} + + +/** + * Run the pumps. + * + * Spawn the intnet pump thread that gets packets from the intnet and + * feeds them to lwIP. Enter COM event loop here, on the main thread. + */ +int +VBoxNetLwipNAT::run() +{ + int rc; + + AssertReturn(m_hThrRecv == NIL_RTTHREAD, VERR_INVALID_STATE); + + /* spawn the lwIP tcpip thread */ + vboxLwipCoreInitialize(VBoxNetLwipNAT::onLwipTcpIpInit, this); + + /* spawn intnet input pump */ + rc = RTThreadCreate(&m_hThrRecv, + VBoxNetLwipNAT::receiveThread, this, + 0, /* :cbStack */ + RTTHREADTYPE_IO, RTTHREADFLAGS_WAITABLE, + "RECV"); + AssertRCReturn(rc, rc); + + /* main thread will run the API event queue pump */ + com::NativeEventQueue *pQueue = com::NativeEventQueue::getMainEventQueue(); + if (pQueue == NULL) + { + LogRel(("run: getMainEventQueue() == NULL\n")); + return VERR_GENERAL_FAILURE; + } + + /* dispatch API events to our listeners */ + for (;;) + { + rc = pQueue->processEventQueue(RT_INDEFINITE_WAIT); + if (rc == VERR_INTERRUPTED) + { + LogRel(("run: shutdown\n")); + break; + } + else if (rc != VINF_SUCCESS) + { + /* note any unexpected rc */ + LogRel(("run: processEventQueue: %Rrc\n", rc)); + } + } + + /* + * We are out of the event loop, so we were told to shut down. + * Tell other threads to wrap up. + */ + + /* tell the intnet input pump to terminate */ + IntNetR3IfWaitAbort(m_hIf); + + /* tell the lwIP tcpip thread to terminate */ + vboxLwipCoreFinalize(VBoxNetLwipNAT::onLwipTcpIpFini, this); + + rc = RTThreadWait(m_hThrRecv, 5000, NULL); + m_hThrRecv = NIL_RTTHREAD; + + return VINF_SUCCESS; +} + + +void +VBoxNetLwipNAT::shutdown() +{ + int rc; + + com::NativeEventQueue *pQueue = com::NativeEventQueue::getMainEventQueue(); + if (pQueue == NULL) + { + LogRel(("shutdown: getMainEventQueue() == NULL\n")); + return; + } + + /* unregister listeners */ + m_ListenerNATNet.unlisten(); + m_ListenerVirtualBox.unlisten(); + m_ListenerVBoxClient.unlisten(); + + /* tell the event loop in run() to stop */ + rc = pQueue->interruptEventQueueProcessing(); + if (RT_FAILURE(rc)) + LogRel(("shutdown: interruptEventQueueProcessing: %Rrc\n", rc)); +} + + +/** + * Run finalization on the lwIP "tcpip" thread. + */ +/* static */ +DECLCALLBACK(void) VBoxNetLwipNAT::onLwipTcpIpFini(void *arg) +{ + AssertPtrReturnVoid(arg); + VBoxNetLwipNAT *self = static_cast<VBoxNetLwipNAT *>(arg); + + /* XXX: proxy finalization */ + netif_set_link_down(&self->m_LwipNetIf); + netif_set_down(&self->m_LwipNetIf); + netif_remove(&self->m_LwipNetIf); +} + + +/** + * @note: this work on Event thread. + */ +HRESULT VBoxNetLwipNAT::HandleEvent(VBoxEventType_T aEventType, IEvent *pEvent) +{ + HRESULT hrc = S_OK; + switch (aEventType) + { + case VBoxEventType_OnNATNetworkSetting: + { + ComPtr<INATNetworkSettingEvent> pSettingsEvent(pEvent); + + com::Bstr networkName; + hrc = pSettingsEvent->COMGETTER(NetworkName)(networkName.asOutParam()); + AssertComRCReturn(hrc, hrc); + if (networkName != m_strNetworkName) + break; /* change not for our network */ + + // XXX: only handle IPv6 default route for now + if (!m_ProxyOptions.ipv6_enabled) + break; + + BOOL fIPv6DefaultRoute = FALSE; + hrc = pSettingsEvent->COMGETTER(AdvertiseDefaultIPv6RouteEnabled)(&fIPv6DefaultRoute); + AssertComRCReturn(hrc, hrc); + + if (m_ProxyOptions.ipv6_defroute == fIPv6DefaultRoute) + break; + + m_ProxyOptions.ipv6_defroute = fIPv6DefaultRoute; + tcpip_callback_with_block(proxy_rtadvd_do_quick, &m_LwipNetIf, 0); + break; + } + + case VBoxEventType_OnNATNetworkPortForward: + { + ComPtr<INATNetworkPortForwardEvent> pForwardEvent = pEvent; + + com::Bstr networkName; + hrc = pForwardEvent->COMGETTER(NetworkName)(networkName.asOutParam()); + AssertComRCReturn(hrc, hrc); + if (networkName != m_strNetworkName) + break; /* change not for our network */ + + BOOL fCreateFW; + hrc = pForwardEvent->COMGETTER(Create)(&fCreateFW); + AssertComRCReturn(hrc, hrc); + + BOOL fIPv6FW; + hrc = pForwardEvent->COMGETTER(Ipv6)(&fIPv6FW); + AssertComRCReturn(hrc, hrc); + + com::Bstr name; + hrc = pForwardEvent->COMGETTER(Name)(name.asOutParam()); + AssertComRCReturn(hrc, hrc); + + NATProtocol_T proto = NATProtocol_TCP; + hrc = pForwardEvent->COMGETTER(Proto)(&proto); + AssertComRCReturn(hrc, hrc); + + com::Bstr strHostAddr; + hrc = pForwardEvent->COMGETTER(HostIp)(strHostAddr.asOutParam()); + AssertComRCReturn(hrc, hrc); + + LONG lHostPort; + hrc = pForwardEvent->COMGETTER(HostPort)(&lHostPort); + AssertComRCReturn(hrc, hrc); + + com::Bstr strGuestAddr; + hrc = pForwardEvent->COMGETTER(GuestIp)(strGuestAddr.asOutParam()); + AssertComRCReturn(hrc, hrc); + + LONG lGuestPort; + hrc = pForwardEvent->COMGETTER(GuestPort)(&lGuestPort); + AssertComRCReturn(hrc, hrc); + + VECNATSERVICEPF& rules = fIPv6FW ? m_vecPortForwardRule6 + : m_vecPortForwardRule4; + + NATSERVICEPORTFORWARDRULE r; + RT_ZERO(r); + + r.Pfr.fPfrIPv6 = fIPv6FW; + + switch (proto) + { + case NATProtocol_TCP: + r.Pfr.iPfrProto = IPPROTO_TCP; + break; + case NATProtocol_UDP: + r.Pfr.iPfrProto = IPPROTO_UDP; + break; + + default: + LogRel(("Event: %s %s port-forwarding rule \"%s\": invalid protocol %d\n", + fCreateFW ? "Add" : "Remove", + fIPv6FW ? "IPv6" : "IPv4", + com::Utf8Str(name).c_str(), + (int)proto)); + goto port_forward_done; + } + + LogRel(("Event: %s %s port-forwarding rule \"%s\": %s %s%s%s:%d -> %s%s%s:%d\n", + fCreateFW ? "Add" : "Remove", + fIPv6FW ? "IPv6" : "IPv4", + com::Utf8Str(name).c_str(), + proto == NATProtocol_TCP ? "TCP" : "UDP", + /* from */ + fIPv6FW ? "[" : "", + com::Utf8Str(strHostAddr).c_str(), + fIPv6FW ? "]" : "", + lHostPort, + /* to */ + fIPv6FW ? "[" : "", + com::Utf8Str(strGuestAddr).c_str(), + fIPv6FW ? "]" : "", + lGuestPort)); + + if (name.length() > sizeof(r.Pfr.szPfrName)) + { + hrc = E_INVALIDARG; + goto port_forward_done; + } + + RTStrPrintf(r.Pfr.szPfrName, sizeof(r.Pfr.szPfrName), + "%s", com::Utf8Str(name).c_str()); + + RTStrPrintf(r.Pfr.szPfrHostAddr, sizeof(r.Pfr.szPfrHostAddr), + "%s", com::Utf8Str(strHostAddr).c_str()); + + /* XXX: limits should be checked */ + r.Pfr.u16PfrHostPort = (uint16_t)lHostPort; + + RTStrPrintf(r.Pfr.szPfrGuestAddr, sizeof(r.Pfr.szPfrGuestAddr), + "%s", com::Utf8Str(strGuestAddr).c_str()); + + /* XXX: limits should be checked */ + r.Pfr.u16PfrGuestPort = (uint16_t)lGuestPort; + + if (fCreateFW) /* Addition */ + { + int rc = natServicePfRegister(r); + if (RT_SUCCESS(rc)) + rules.push_back(r); + } + else /* Deletion */ + { + ITERATORNATSERVICEPF it; + for (it = rules.begin(); it != rules.end(); ++it) + { + /* compare */ + NATSERVICEPORTFORWARDRULE &natFw = *it; + if ( natFw.Pfr.iPfrProto == r.Pfr.iPfrProto + && natFw.Pfr.u16PfrHostPort == r.Pfr.u16PfrHostPort + && strncmp(natFw.Pfr.szPfrHostAddr, r.Pfr.szPfrHostAddr, INET6_ADDRSTRLEN) == 0 + && natFw.Pfr.u16PfrGuestPort == r.Pfr.u16PfrGuestPort + && strncmp(natFw.Pfr.szPfrGuestAddr, r.Pfr.szPfrGuestAddr, INET6_ADDRSTRLEN) == 0) + { + fwspec *pFwCopy = (fwspec *)RTMemDup(&natFw.FWSpec, sizeof(natFw.FWSpec)); + if (pFwCopy) + { + int status = portfwd_rule_del(pFwCopy); + if (status == 0) + rules.erase(it); /* (pFwCopy is owned by lwip thread now.) */ + else + RTMemFree(pFwCopy); + } + break; + } + } /* loop over vector elements */ + } /* condition add or delete */ + port_forward_done: + /* clean up strings */ + name.setNull(); + strHostAddr.setNull(); + strGuestAddr.setNull(); + break; + } + + case VBoxEventType_OnHostNameResolutionConfigurationChange: + { + const char **ppcszNameServers = getHostNameservers(); + err_t error; + + error = tcpip_callback_with_block(pxdns_set_nameservers, + ppcszNameServers, + /* :block */ 0); + if (error != ERR_OK && ppcszNameServers != NULL) + RTMemFree(ppcszNameServers); + break; + } + + case VBoxEventType_OnNATNetworkStartStop: + { + ComPtr <INATNetworkStartStopEvent> pStartStopEvent = pEvent; + + com::Bstr networkName; + hrc = pStartStopEvent->COMGETTER(NetworkName)(networkName.asOutParam()); + AssertComRCReturn(hrc, hrc); + if (networkName != m_strNetworkName) + break; /* change not for our network */ + + BOOL fStart = TRUE; + hrc = pStartStopEvent->COMGETTER(StartEvent)(&fStart); + AssertComRCReturn(hrc, hrc); + + if (!fStart) + shutdown(); + break; + } + + case VBoxEventType_OnVBoxSVCAvailabilityChanged: + { + LogRel(("VBoxSVC became unavailable, exiting.\n")); + shutdown(); + break; + } + + default: break; /* Shut up MSC. */ + } + return hrc; +} + + +/** + * Read the list of host's resolvers via the API. + * + * Called during initialization and in response to the + * VBoxEventType_OnHostNameResolutionConfigurationChange event. + */ +const char **VBoxNetLwipNAT::getHostNameservers() +{ + if (m_host.isNull()) + return NULL; + + com::SafeArray<BSTR> aNameServers; + HRESULT hrc = m_host->COMGETTER(NameServers)(ComSafeArrayAsOutParam(aNameServers)); + if (FAILED(hrc)) + return NULL; + + const size_t cNameServers = aNameServers.size(); + if (cNameServers == 0) + return NULL; + + const char **ppcszNameServers = + (const char **)RTMemAllocZ(sizeof(char *) * (cNameServers + 1)); + if (ppcszNameServers == NULL) + return NULL; + + size_t idxLast = 0; + for (size_t i = 0; i < cNameServers; ++i) + { + com::Utf8Str strNameServer(aNameServers[i]); + ppcszNameServers[idxLast] = RTStrDup(strNameServer.c_str()); + if (ppcszNameServers[idxLast] != NULL) + ++idxLast; + } + + if (idxLast == 0) + { + RTMemFree(ppcszNameServers); + return NULL; + } + + return ppcszNameServers; +} + + +/** + * Fetch port-forwarding rules via the API. + * + * Reads the initial sets of rules from VBoxSVC. The rules will be + * activated when all the initialization and plumbing is done. See + * natServiceProcessRegisteredPf(). + */ +int VBoxNetLwipNAT::fetchNatPortForwardRules(VECNATSERVICEPF &vec, bool fIsIPv6) +{ + HRESULT hrc; + + com::SafeArray<BSTR> rules; + if (fIsIPv6) + hrc = m_net->COMGETTER(PortForwardRules6)(ComSafeArrayAsOutParam(rules)); + else + hrc = m_net->COMGETTER(PortForwardRules4)(ComSafeArrayAsOutParam(rules)); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + + NATSERVICEPORTFORWARDRULE Rule; + for (size_t idxRules = 0; idxRules < rules.size(); ++idxRules) + { + Log(("%d-%s rule: %ls\n", idxRules, (fIsIPv6 ? "IPv6" : "IPv4"), rules[idxRules])); + RT_ZERO(Rule); + + int rc = netPfStrToPf(com::Utf8Str(rules[idxRules]).c_str(), fIsIPv6, + &Rule.Pfr); + if (RT_FAILURE(rc)) + continue; + + vec.push_back(Rule); + } + + return VINF_SUCCESS; +} + + +/** + * Activate the initial set of port-forwarding rules. + * + * Happens after lwIP and lwIP proxy is initialized, right before lwIP + * thread starts processing messages. + */ +/* static */ +int VBoxNetLwipNAT::natServiceProcessRegisteredPf(VECNATSERVICEPF& vecRules) +{ + ITERATORNATSERVICEPF it; + for (it = vecRules.begin(); it != vecRules.end(); ++it) + { + NATSERVICEPORTFORWARDRULE &natPf = *it; + + LogRel(("Loading %s port-forwarding rule \"%s\": %s %s%s%s:%d -> %s%s%s:%d\n", + natPf.Pfr.fPfrIPv6 ? "IPv6" : "IPv4", + natPf.Pfr.szPfrName, + natPf.Pfr.iPfrProto == IPPROTO_TCP ? "TCP" : "UDP", + /* from */ + natPf.Pfr.fPfrIPv6 ? "[" : "", + natPf.Pfr.szPfrHostAddr, + natPf.Pfr.fPfrIPv6 ? "]" : "", + natPf.Pfr.u16PfrHostPort, + /* to */ + natPf.Pfr.fPfrIPv6 ? "[" : "", + natPf.Pfr.szPfrGuestAddr, + natPf.Pfr.fPfrIPv6 ? "]" : "", + natPf.Pfr.u16PfrGuestPort)); + + natServicePfRegister(natPf); + } + + return VINF_SUCCESS; +} + + +/** + * Activate a single port-forwarding rule. + * + * This is used both when we activate all the initial rules on startup + * and when port-forwarding rules are changed and we are notified via + * an API event. + */ +/* static */ +int VBoxNetLwipNAT::natServicePfRegister(NATSERVICEPORTFORWARDRULE &natPf) +{ + int lrc; + + int sockFamily = (natPf.Pfr.fPfrIPv6 ? PF_INET6 : PF_INET); + int socketSpec; + switch(natPf.Pfr.iPfrProto) + { + case IPPROTO_TCP: + socketSpec = SOCK_STREAM; + break; + case IPPROTO_UDP: + socketSpec = SOCK_DGRAM; + break; + default: + return VERR_IGNORED; + } + + const char *pszHostAddr = natPf.Pfr.szPfrHostAddr; + if (pszHostAddr[0] == '\0') + { + if (sockFamily == PF_INET) + pszHostAddr = "0.0.0.0"; + else + pszHostAddr = "::"; + } + + lrc = fwspec_set(&natPf.FWSpec, + sockFamily, + socketSpec, + pszHostAddr, + natPf.Pfr.u16PfrHostPort, + natPf.Pfr.szPfrGuestAddr, + natPf.Pfr.u16PfrGuestPort); + if (lrc != 0) + return VERR_IGNORED; + + fwspec *pFwCopy = (fwspec *)RTMemDup(&natPf.FWSpec, sizeof(natPf.FWSpec)); + if (pFwCopy) + { + lrc = portfwd_rule_add(pFwCopy); + if (lrc == 0) + return VINF_SUCCESS; /* (pFwCopy is owned by lwip thread now.) */ + RTMemFree(pFwCopy); + } + else + LogRel(("Unable to allocate memory for %s rule \"%s\"\n", + natPf.Pfr.fPfrIPv6 ? "IPv6" : "IPv4", + natPf.Pfr.szPfrName)); + return VERR_IGNORED; +} + + +/** + * IntNetIf receive thread. Runs intnet pump with our processFrame() + * as input callback. + */ +/* static */ DECLCALLBACK(int) +VBoxNetLwipNAT::receiveThread(RTTHREAD hThreadSelf, void *pvUser) +{ + HRESULT hrc; + int rc; + + RT_NOREF(hThreadSelf); + + AssertReturn(pvUser != NULL, VERR_INVALID_PARAMETER); + VBoxNetLwipNAT *self = static_cast<VBoxNetLwipNAT *>(pvUser); + + /* do we relaly need to init com on this thread? */ + hrc = com::Initialize(); + if (FAILED(hrc)) + return VERR_GENERAL_FAILURE; + + rc = IntNetR3IfPumpPkts(self->m_hIf, VBoxNetLwipNAT::processFrame, self, + NULL /*pfnInputGso*/, NULL /*pvUserGso*/); + if (rc == VERR_SEM_DESTROYED) + return VINF_SUCCESS; + + LogRel(("receiveThread: IntNetR3IfPumpPkts: unexpected %Rrc\n", rc)); + return VERR_INVALID_STATE; +} + + +/** + * Process an incoming frame received from the intnet. + */ +/* static */ DECLCALLBACK(void) +VBoxNetLwipNAT::processFrame(void *pvUser, void *pvFrame, uint32_t cbFrame) +{ + AssertReturnVoid(pvFrame != NULL); + + /* shouldn't happen, but if it does, don't even bother */ + if (RT_UNLIKELY(cbFrame < sizeof(RTNETETHERHDR))) + return; + + /* we expect normal ethernet frame including .1Q and FCS */ + if (cbFrame > 1522) + return; + + AssertReturnVoid(pvUser != NULL); + VBoxNetLwipNAT *self = static_cast<VBoxNetLwipNAT *>(pvUser); + + struct pbuf *p = pbuf_alloc(PBUF_RAW, (u16_t)cbFrame + ETH_PAD_SIZE, PBUF_POOL); + if (RT_UNLIKELY(p == NULL)) + return; + + /* + * The code below is inlined version of: + * + * pbuf_header(p, -ETH_PAD_SIZE); // hide padding + * pbuf_take(p, pvFrame, cbFrame); + * pbuf_header(p, ETH_PAD_SIZE); // reveal padding + */ + struct pbuf *q = p; + uint8_t *pu8Chunk = (uint8_t *)pvFrame; + do { + uint8_t *payload = (uint8_t *)q->payload; + size_t len = q->len; + +#if ETH_PAD_SIZE + if (RT_LIKELY(q == p)) // single pbuf is large enough + { + payload += ETH_PAD_SIZE; + len -= ETH_PAD_SIZE; + } +#endif + memcpy(payload, pu8Chunk, len); + pu8Chunk += len; + q = q->next; + } while (RT_UNLIKELY(q != NULL)); + + /* pass input to lwIP: netif input funcion tcpip_input() */ + self->m_LwipNetIf.input(p, &self->m_LwipNetIf); +} + + +/** + * Send an outgoing frame from lwIP to intnet. + */ +/* static */ DECLCALLBACK(err_t) +VBoxNetLwipNAT::netifLinkoutput(netif *pNetif, pbuf *pPBuf) RT_NOTHROW_DEF +{ + int rc; + + AssertPtrReturn(pNetif, ERR_ARG); + AssertPtrReturn(pPBuf, ERR_ARG); + + VBoxNetLwipNAT *self = static_cast<VBoxNetLwipNAT *>(pNetif->state); + AssertPtrReturn(self, ERR_IF); + AssertReturn(pNetif == &self->m_LwipNetIf, ERR_IF); + + LogFlowFunc(("ENTER: pNetif[%c%c%d], pPbuf:%p\n", + pNetif->name[0], + pNetif->name[1], + pNetif->num, + pPBuf)); + + if (pPBuf->tot_len < sizeof(struct eth_hdr)) /* includes ETH_PAD_SIZE */ + return ERR_ARG; + + size_t cbFrame = (size_t)pPBuf->tot_len - ETH_PAD_SIZE; + INTNETFRAME Frame; + rc = IntNetR3IfQueryOutputFrame(self->m_hIf, (uint32_t)cbFrame, &Frame); + if (RT_FAILURE(rc)) + return ERR_MEM; + + pbuf_copy_partial(pPBuf, Frame.pvFrame, (u16_t)cbFrame, ETH_PAD_SIZE); + rc = IntNetR3IfOutputFrameCommit(self->m_hIf, &Frame); + if (RT_FAILURE(rc)) + return ERR_IF; + + LogFlowFunc(("LEAVE: %d\n", ERR_OK)); + return ERR_OK; +} + + +/** + * Retrieve network-specific extra data item. + */ +int VBoxNetLwipNAT::getExtraData(com::Utf8Str &strValueOut, const char *pcszKey) +{ + HRESULT hrc; + + AssertReturn(!virtualbox.isNull(), E_FAIL); + AssertReturn(m_strNetworkName.isNotEmpty(), E_FAIL); + AssertReturn(pcszKey != NULL, E_FAIL); + AssertReturn(*pcszKey != '\0', E_FAIL); + + com::BstrFmt bstrKey("NAT/%s/%s", m_strNetworkName.c_str(), pcszKey); + com::Bstr bstrValue; + hrc = virtualbox->GetExtraData(bstrKey.raw(), bstrValue.asOutParam()); + if (FAILED(hrc)) + { + reportComError(virtualbox, "GetExtraData", hrc); + return VERR_GENERAL_FAILURE; + } + + strValueOut = bstrValue; + return VINF_SUCCESS; +} + + +/* static */ +HRESULT VBoxNetLwipNAT::reportComError(ComPtr<IUnknown> iface, + const com::Utf8Str &strContext, + HRESULT hrc) +{ + const com::ErrorInfo info(iface, COM_IIDOF(IUnknown)); + if (info.isFullAvailable() || info.isBasicAvailable()) + { + reportErrorInfoList(info, strContext); + } + else + { + if (strContext.isNotEmpty()) + reportError("%s: %Rhra", strContext.c_str(), hrc); + else + reportError("%Rhra", hrc); + } + + return hrc; +} + + +/* static */ +void VBoxNetLwipNAT::reportErrorInfoList(const com::ErrorInfo &info, + const com::Utf8Str &strContext) +{ + if (strContext.isNotEmpty()) + reportError("%s", strContext.c_str()); + + bool fFirst = true; + for (const com::ErrorInfo *pInfo = &info; + pInfo != NULL; + pInfo = pInfo->getNext()) + { + if (fFirst) + fFirst = false; + else + reportError("--------"); + + reportErrorInfo(*pInfo); + } +} + + +/* static */ +void VBoxNetLwipNAT::reportErrorInfo(const com::ErrorInfo &info) +{ +#if defined (RT_OS_WIN) + bool haveResultCode = info.isFullAvailable(); + bool haveComponent = true; + bool haveInterfaceID = true; +#else /* !RT_OS_WIN */ + bool haveResultCode = true; + bool haveComponent = info.isFullAvailable(); + bool haveInterfaceID = info.isFullAvailable(); +#endif + com::Utf8Str message; + if (info.getText().isNotEmpty()) + message = info.getText(); + + const char *pcszDetails = "Details: "; + const char *pcszComma = ", "; + const char *pcszSeparator = pcszDetails; + + if (haveResultCode) + { + message.appendPrintf("%s" "code %Rhrc (0x%RX32)", + pcszSeparator, info.getResultCode(), info.getResultCode()); + pcszSeparator = pcszComma; + } + + if (haveComponent) + { + message.appendPrintf("%s" "component %ls", + pcszSeparator, info.getComponent().raw()); + pcszSeparator = pcszComma; + } + + if (haveInterfaceID) + { + message.appendPrintf("%s" "interface %ls", + pcszSeparator, info.getInterfaceName().raw()); + pcszSeparator = pcszComma; + } + + if (info.getCalleeName().isNotEmpty()) + { + message.appendPrintf("%s" "callee %ls", + pcszSeparator, info.getCalleeName().raw()); + pcszSeparator = pcszComma; + } + + reportError("%s", message.c_str()); +} + + +/* static */ +void VBoxNetLwipNAT::reportError(const char *a_pcszFormat, ...) +{ + va_list ap; + + va_start(ap, a_pcszFormat); + com::Utf8Str message(a_pcszFormat, ap); + va_end(ap); + + RTMsgError("%s", message.c_str()); + LogRel(("%s", message.c_str())); +} + + + +/** + * Create release logger. + * + * The NAT network name is sanitized so that it can be used in a path + * component. By default the release log is written to the file + * ~/.VirtualBox/${netname}.log but its destiation and content can be + * overridden with VBOXNET_${netname}_RELEASE_LOG family of + * environment variables (also ..._DEST and ..._FLAGS). + */ +/* static */ +int VBoxNetLwipNAT::initLog() +{ + size_t cch; + int rc; + + if (m_strNetworkName.isEmpty()) + return VERR_MISSING; + + char szNetwork[RTPATH_MAX]; + rc = RTStrCopy(szNetwork, sizeof(szNetwork), m_strNetworkName.c_str()); + if (RT_FAILURE(rc)) + return rc; + + // sanitize network name to be usable as a path component + for (char *p = szNetwork; *p != '\0'; ++p) + { + if (RTPATH_IS_SEP(*p)) + *p = '_'; + } + + const char *pcszLogFile = NULL; + char szLogFile[RTPATH_MAX]; + if (m_strHome.isNotEmpty()) + { + cch = RTStrPrintf(szLogFile, sizeof(szLogFile), + "%s%c%s.log", m_strHome.c_str(), RTPATH_DELIMITER, szNetwork); + if (cch < sizeof(szLogFile)) + pcszLogFile = szLogFile; + } + + // sanitize network name some more to be usable as environment variable + for (char *p = szNetwork; *p != '\0'; ++p) + { + if (*p != '_' + && (*p < '0' || '9' < *p) + && (*p < 'a' || 'z' < *p) + && (*p < 'A' || 'Z' < *p)) + { + *p = '_'; + } + } + + char szEnvVarBase[128]; + const char *pcszEnvVarBase = szEnvVarBase; + cch = RTStrPrintf(szEnvVarBase, sizeof(szEnvVarBase), + "VBOXNET_%s_RELEASE_LOG", szNetwork); + if (cch >= sizeof(szEnvVarBase)) + pcszEnvVarBase = NULL; + + rc = com::VBoxLogRelCreate("NAT Network", + pcszLogFile, + RTLOGFLAGS_PREFIX_TIME_PROG, + "all all.restrict -default.restrict", + pcszEnvVarBase, + RTLOGDEST_FILE, + 32768 /* cMaxEntriesPerGroup */, + 0 /* cHistory */, + 0 /* uHistoryFileTime */, + 0 /* uHistoryFileSize */, + NULL /*pErrInfo*/); + + /* + * Provide immediate feedback if corresponding LogRel level is + * enabled. It's frustrating when you chase some rare event and + * discover you didn't actually have the corresponding log level + * enabled because of a typo in the environment variable name or + * its content. + */ +#define LOG_PING(_log) _log((#_log " enabled\n")) + LOG_PING(LogRel2); + LOG_PING(LogRel3); + LOG_PING(LogRel4); + LOG_PING(LogRel5); + LOG_PING(LogRel6); + LOG_PING(LogRel7); + LOG_PING(LogRel8); + LOG_PING(LogRel9); + LOG_PING(LogRel10); + LOG_PING(LogRel11); + LOG_PING(LogRel12); + + return rc; +} + + +/** + * Entry point. + */ +extern "C" DECLEXPORT(int) TrustedMain(int argc, char **argv, char **envp) +{ + LogFlowFuncEnter(); + NOREF(envp); + +#ifdef RT_OS_WINDOWS + WSADATA WsaData = {0}; + int err = WSAStartup(MAKEWORD(2,2), &WsaData); + if (err) + { + fprintf(stderr, "wsastartup: failed (%d)\n", err); + return RTEXITCODE_INIT; + } +#endif + + VBoxNetLwipNAT NAT; + + int rcExit = NAT.parseArgs(argc, argv); + if (rcExit != RTEXITCODE_SUCCESS) + { + /* messages are already printed */ + return rcExit == RTEXITCODE_DONE ? RTEXITCODE_SUCCESS : rcExit; + } + + int rc = NAT.init(); + if (RT_FAILURE(rc)) + return RTEXITCODE_INIT; + + NAT.run(); + + LogRel(("Terminating\n")); + return RTEXITCODE_SUCCESS; +} + + +#ifndef VBOX_WITH_HARDENING + +int main(int argc, char **argv, char **envp) +{ + int rc = RTR3InitExe(argc, &argv, RTR3INIT_FLAGS_SUPLIB); + if (RT_SUCCESS(rc)) + return TrustedMain(argc, argv, envp); + return RTMsgInitFailure(rc); +} + +# if defined(RT_OS_WINDOWS) + +# if 0 /* Some copy and paste from DHCP that nobody explained why was diabled. */ +static LRESULT CALLBACK WindowProc(HWND hwnd, + UINT uMsg, + WPARAM wParam, + LPARAM lParam +) +{ + if(uMsg == WM_DESTROY) + { + PostQuitMessage(0); + return 0; + } + return DefWindowProc (hwnd, uMsg, wParam, lParam); +} + +static LPCWSTR g_WndClassName = L"VBoxNetNatLwipClass"; + +static DWORD WINAPI MsgThreadProc(__in LPVOID lpParameter) +{ + HWND hwnd = 0; + HINSTANCE hInstance = (HINSTANCE)GetModuleHandle (NULL); + bool bExit = false; + + /* Register the Window Class. */ + WNDCLASS wc; + wc.style = 0; + wc.lpfnWndProc = WindowProc; + wc.cbClsExtra = 0; + wc.cbWndExtra = sizeof(void *); + wc.hInstance = hInstance; + wc.hIcon = NULL; + wc.hCursor = NULL; + wc.hbrBackground = (HBRUSH)(COLOR_BACKGROUND + 1); + wc.lpszMenuName = NULL; + wc.lpszClassName = g_WndClassName; + + ATOM atomWindowClass = RegisterClass(&wc); + + if (atomWindowClass != 0) + { + /* Create the window. */ + hwnd = CreateWindowEx(WS_EX_TOOLWINDOW | WS_EX_TRANSPARENT | WS_EX_TOPMOST, + g_WndClassName, g_WndClassName, WS_POPUPWINDOW, + -200, -200, 100, 100, NULL, NULL, hInstance, NULL); + + if (hwnd) + { + SetWindowPos(hwnd, HWND_TOPMOST, -200, -200, 0, 0, + SWP_NOACTIVATE | SWP_HIDEWINDOW | SWP_NOCOPYBITS | SWP_NOREDRAW | SWP_NOSIZE); + + MSG msg; + while (GetMessage(&msg, NULL, 0, 0)) + { + TranslateMessage(&msg); + DispatchMessage(&msg); + } + + DestroyWindow (hwnd); + + bExit = true; + } + + UnregisterClass (g_WndClassName, hInstance); + } + + if(bExit) + { + /* no need any accuracy here, in anyway the DHCP server usually gets terminated with TerminateProcess */ + exit(0); + } + + return 0; +} +# endif + + +/** (We don't want a console usually.) */ +int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nCmdShow) +{ + RT_NOREF(hInstance, hPrevInstance, lpCmdLine, nCmdShow); +# if 0 /* some copy and paste from DHCP that nobody explained why was diabled. */ + NOREF(hInstance); NOREF(hPrevInstance); NOREF(lpCmdLine); NOREF(nCmdShow); + + HANDLE hThread = CreateThread( + NULL, /*__in_opt LPSECURITY_ATTRIBUTES lpThreadAttributes, */ + 0, /*__in SIZE_T dwStackSize, */ + MsgThreadProc, /*__in LPTHREAD_START_ROUTINE lpStartAddress,*/ + NULL, /*__in_opt LPVOID lpParameter,*/ + 0, /*__in DWORD dwCreationFlags,*/ + NULL /*__out_opt LPDWORD lpThreadId*/ + ); + + if(hThread != NULL) + CloseHandle(hThread); + +# endif + return main(__argc, __argv, environ); +} +# endif /* RT_OS_WINDOWS */ + +#endif /* !VBOX_WITH_HARDENING */ diff --git a/src/VBox/NetworkServices/NAT/VBoxNetNATHardened.cpp b/src/VBox/NetworkServices/NAT/VBoxNetNATHardened.cpp new file mode 100644 index 00000000..34a0992b --- /dev/null +++ b/src/VBox/NetworkServices/NAT/VBoxNetNATHardened.cpp @@ -0,0 +1,37 @@ +/* $Id: VBoxNetNATHardened.cpp $ */ +/** @file + * VBoxNetNAT - Hardened main(). + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include <VBox/sup.h> + +#ifndef SERVICE_NAME +# error "Please define SERVICE_NAME" +#endif + +int main(int argc, char **argv, char **envp) +{ + return SUPR3HardenedMain(SERVICE_NAME, 0 /* fFlags */, argc, argv, envp); +} diff --git a/src/VBox/NetworkServices/NAT/dhcp6.h b/src/VBox/NetworkServices/NAT/dhcp6.h new file mode 100644 index 00000000..03b26eae --- /dev/null +++ b/src/VBox/NetworkServices/NAT/dhcp6.h @@ -0,0 +1,61 @@ +/* $Id: dhcp6.h $ */ +/** @file + * NAT Network - DHCPv6 protocol definitions. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NAT_dhcp6_h +#define VBOX_INCLUDED_SRC_NAT_dhcp6_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +/* UDP ports */ +#define DHCP6_CLIENT_PORT 546 +#define DHCP6_SERVER_PORT 547 + +/* Message types */ +#define DHCP6_REPLY 7 +#define DHCP6_INFORMATION_REQUEST 11 +#define DHCP6_RELAY_FORW 12 +#define DHCP6_RELAY_REPLY 13 + +/* DUID types */ +#define DHCP6_DUID_LLT 1 +#define DHCP6_DUID_EN 2 +#define DHCP6_DUID_LL 3 + +/* Hardware type for DUID-LLT and DUID-LL */ +#define ARES_HRD_ETHERNET 1 /* RFC 826*/ + +/* Options */ +#define DHCP6_OPTION_CLIENTID 1 +#define DHCP6_OPTION_SERVERID 2 +#define DHCP6_OPTION_ORO 6 +#define DHCP6_OPTION_ELAPSED_TIME 8 +#define DHCP6_OPTION_STATUS_CODE 13 +#define DHCP6_OPTION_DNS_SERVERS 23 /* RFC 3646 */ +#define DHCP6_OPTION_DOMAIN_LIST 24 /* RFC 3646 */ + +#endif /* !VBOX_INCLUDED_SRC_NAT_dhcp6_h */ diff --git a/src/VBox/NetworkServices/NAT/fwtcp.c b/src/VBox/NetworkServices/NAT/fwtcp.c new file mode 100644 index 00000000..79cdfc04 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/fwtcp.c @@ -0,0 +1,333 @@ +/* $Id: fwtcp.c $ */ +/** @file + * NAT Network - TCP port-forwarding. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "portfwd.h" +#include "pxtcp.h" + +#ifndef RT_OS_WINDOWS +#include <sys/types.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <stdio.h> +#include <poll.h> + +#include <err.h> /* BSD'ism */ +#else +#include <stdio.h> +#include "winpoll.h" +#endif + +#include "lwip/opt.h" + +#include "lwip/sys.h" +#include "lwip/tcpip.h" + + +/** + */ +struct fwtcp { + /** + * Our poll manager handler. + */ + struct pollmgr_handler pmhdl; + + /** + * Forwarding specification. + */ + struct fwspec fwspec; + + /** + * Listening socket. + */ + SOCKET sock; + + /** + * Mailbox for new inbound connections. + * + * XXX: since we have single producer and single consumer we can + * use lockless ringbuf like for pxtcp. + */ + sys_mbox_t connmbox; + + struct tcpip_msg msg_connect; + struct tcpip_msg msg_delete; + + /** + * Linked list entry. + */ + struct fwtcp *next; +}; + + +static struct fwtcp *fwtcp_create(struct fwspec *); + +/* poll manager callback for fwtcp listening socket */ +static int fwtcp_pmgr_listen(struct pollmgr_handler *, SOCKET, int); + +/* lwip thread callbacks called via proxy_lwip_post() */ +static void fwtcp_pcb_connect(void *); +static void fwtcp_pcb_delete(void *); + + +/** + * Linked list of active fwtcp forwarders. + */ +struct fwtcp *fwtcp_list = NULL; + + +void +fwtcp_init(void) +{ + return; +} + + +void +fwtcp_add(struct fwspec *fwspec) +{ + struct fwtcp *fwtcp; + + fwtcp = fwtcp_create(fwspec); + if (fwtcp == NULL) { + DPRINTF0(("%s: failed to add rule for TCP ...\n", __func__)); + return; + } + + DPRINTF0(("%s\n", __func__)); + /* fwtcp_create has put fwtcp on the linked list */ +} + + +void +fwtcp_del(struct fwspec *fwspec) +{ + struct fwtcp *fwtcp; + struct fwtcp **pprev; + + for (pprev = &fwtcp_list; (fwtcp = *pprev) != NULL; pprev = &fwtcp->next) { + if (fwspec_equal(&fwtcp->fwspec, fwspec)) { + *pprev = fwtcp->next; + fwtcp->next = NULL; + break; + } + } + + if (fwtcp == NULL) { + DPRINTF0(("%s: not found\n", __func__)); + return; + } + + DPRINTF0(("%s\n", __func__)); + + pollmgr_del_slot(fwtcp->pmhdl.slot); + fwtcp->pmhdl.slot = -1; + + closesocket(fwtcp->sock); + fwtcp->sock = INVALID_SOCKET; + + /* let pending msg_connect be processed before we delete fwtcp */ + proxy_lwip_post(&fwtcp->msg_delete); +} + + +struct fwtcp * +fwtcp_create(struct fwspec *fwspec) +{ + struct fwtcp *fwtcp; + SOCKET lsock; + int status; + err_t error; + + lsock = proxy_bound_socket(fwspec->sdom, fwspec->stype, &fwspec->src.sa); + if (lsock == INVALID_SOCKET) { + return NULL; + } + + fwtcp = (struct fwtcp *)malloc(sizeof(*fwtcp)); + if (fwtcp == NULL) { + closesocket(lsock); + return NULL; + } + + fwtcp->pmhdl.callback = fwtcp_pmgr_listen; + fwtcp->pmhdl.data = (void *)fwtcp; + fwtcp->pmhdl.slot = -1; + + fwtcp->sock = lsock; + fwtcp->fwspec = *fwspec; /* struct copy */ + + error = sys_mbox_new(&fwtcp->connmbox, 16); + if (error != ERR_OK) { + closesocket(lsock); + free(fwtcp); + return (NULL); + } + +#define CALLBACK_MSG(MSG, FUNC) \ + do { \ + fwtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \ + fwtcp->MSG.sem = NULL; \ + fwtcp->MSG.msg.cb.function = FUNC; \ + fwtcp->MSG.msg.cb.ctx = (void *)fwtcp; \ + } while (0) + + CALLBACK_MSG(msg_connect, fwtcp_pcb_connect); + CALLBACK_MSG(msg_delete, fwtcp_pcb_delete); + +#undef CALLBACK_MSG + + status = pollmgr_add(&fwtcp->pmhdl, fwtcp->sock, POLLIN); + if (status < 0) { + sys_mbox_free(&fwtcp->connmbox); + closesocket(lsock); + free(fwtcp); + return NULL; + } + + fwtcp->next = fwtcp_list; + fwtcp_list = fwtcp; + + return fwtcp; +} + + +int +fwtcp_pmgr_listen(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct fwtcp *fwtcp; + struct sockaddr_storage ss; + socklen_t sslen; + struct pxtcp *pxtcp; + SOCKET newsock; + int status; + err_t error; + + fwtcp = (struct fwtcp *)handler->data; + pxtcp = NULL; + + LWIP_ASSERT1(fwtcp != NULL); + LWIP_ASSERT1(fd == fwtcp->sock); + LWIP_ASSERT1(revents == POLLIN); + LWIP_UNUSED_ARG(fd); + LWIP_UNUSED_ARG(revents); + + LWIP_ASSERT1(sys_mbox_valid(&fwtcp->connmbox)); + + sslen = sizeof(ss); + newsock = accept(fwtcp->sock, (struct sockaddr *)&ss, &sslen); + if (newsock == INVALID_SOCKET) { + return POLLIN; + } + +#ifdef RT_OS_LINUX + status = proxy_fixup_accepted_socket(newsock); + if (status < 0) { + proxy_reset_socket(newsock); + return POLLIN; + } +#endif + + if (ss.ss_family == PF_INET) { + struct sockaddr_in *peer4 = (struct sockaddr_in *)&ss; + RT_NOREF(peer4); + DPRINTF(("<--- TCP %RTnaipv4:%d\n", + peer4->sin_addr.s_addr, ntohs(peer4->sin_port))); + } + else { /* PF_INET6 */ + struct sockaddr_in6 *peer6 = (struct sockaddr_in6 *)&ss; + RT_NOREF(peer6); + DPRINTF(("<--- TCP %RTnaipv6:%d\n", + &peer6->sin6_addr, ntohs(peer6->sin6_port))); + } + + pxtcp = pxtcp_create_forwarded(newsock); + if (pxtcp == NULL) { + proxy_reset_socket(newsock); + return POLLIN; + } + + status = pxtcp_pmgr_add(pxtcp); + if (status < 0) { + pxtcp_cancel_forwarded(pxtcp); + return POLLIN; + } + + error = sys_mbox_trypost(&fwtcp->connmbox, (void *)pxtcp); + if (error != ERR_OK) { + pxtcp_pmgr_del(pxtcp); + pxtcp_cancel_forwarded(pxtcp); + return POLLIN; + } + + proxy_lwip_post(&fwtcp->msg_connect); + return POLLIN; +} + + +void +fwtcp_pcb_connect(void *arg) +{ + struct fwtcp *fwtcp = (struct fwtcp *)arg; + struct pxtcp *pxtcp; + u32_t timo; + + if (!sys_mbox_valid(&fwtcp->connmbox)) { + return; + } + + pxtcp = NULL; + timo = sys_mbox_tryfetch(&fwtcp->connmbox, (void **)&pxtcp); + if (timo == SYS_MBOX_EMPTY) { + return; + } + + LWIP_ASSERT1(pxtcp != NULL); + + /* hand off to pxtcp */ + pxtcp_pcb_connect(pxtcp, &fwtcp->fwspec); +} + + +static void +fwtcp_pcb_delete(void *arg) +{ + struct fwtcp *fwtcp = (struct fwtcp *)arg; + void *data; + u32_t timo; + + timo = sys_mbox_tryfetch(&fwtcp->connmbox, &data); + LWIP_ASSERT1(timo == SYS_MBOX_EMPTY); + LWIP_UNUSED_ARG(timo); /* only in assert */ + + sys_mbox_free(&fwtcp->connmbox); + free(fwtcp); +} diff --git a/src/VBox/NetworkServices/NAT/fwudp.c b/src/VBox/NetworkServices/NAT/fwudp.c new file mode 100644 index 00000000..7c0b69f9 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/fwudp.c @@ -0,0 +1,555 @@ +/* $Id: fwudp.c $ */ +/** @file + * NAT Network - UDP port-forwarding. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "portfwd.h" +#include "pxremap.h" + +#ifndef RT_OS_WINDOWS +#include <sys/types.h> +#include <sys/socket.h> +#include <stdio.h> +#include <string.h> +#include <poll.h> + +#include <err.h> /* BSD'ism */ +#else +#include <stdio.h> +#include <string.h> +#include "winpoll.h" +#endif + +#include "lwip/opt.h" +#include "lwip/memp.h" /* XXX: for bulk delete of pcbs */ + +#include "lwip/sys.h" +#include "lwip/tcpip.h" +#include "lwip/udp.h" + +struct fwudp_dgram { + struct pbuf *p; + ipX_addr_t src_addr; + u16_t src_port; +}; + +/** + * UDP port-forwarding. + * + * Unlike pxudp that uses 1:1 mapping between pcb and socket, for + * port-forwarded UDP the setup is bit more elaborated. + * + * For fwtcp things are simple since incoming TCP connection get a new + * socket that we just hand off to pxtcp. Thus fwtcp only handles + * connection initiation. + * + * For fwudp all proxied UDP conversations share the same socket, so + * single fwudp multiplexes to several UDP pcbs. + * + * XXX: TODO: Currently pcbs point back directly to fwudp. It might + * make sense to introduce a per-pcb structure that points to fwudp + * and carries additional information, like pre-mapped peer address. + */ +struct fwudp { + /** + * Our poll manager handler. + */ + struct pollmgr_handler pmhdl; + + /** + * Forwarding specification. + */ + struct fwspec fwspec; + + /** + * XXX: lwip-format copy of destination + */ + ipX_addr_t dst_addr; + u16_t dst_port; + + /** + * Listening socket. + */ + SOCKET sock; + + /** + * Ring-buffer for inbound datagrams. + */ + struct { + struct fwudp_dgram *buf; + size_t bufsize; + volatile size_t vacant; + volatile size_t unsent; + } inbuf; + + struct tcpip_msg msg_send; + struct tcpip_msg msg_delete; + + struct fwudp *next; +}; + + +struct fwudp *fwudp_create(struct fwspec *); + +/* poll manager callback for fwudp socket */ +static int fwudp_pmgr_pump(struct pollmgr_handler *, SOCKET, int); + +/* lwip thread callbacks called via proxy_lwip_post() */ +static void fwudp_pcb_send(void *); +static void fwudp_pcb_delete(void *); + +static void fwudp_pcb_recv(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t); +static void fwudp_pcb_forward_outbound(struct fwudp *, struct udp_pcb *, struct pbuf *); + + +/** + * Linked list of active fwtcp forwarders. + */ +struct fwudp *fwudp_list = NULL; + + +void +fwudp_init(void) +{ + return; +} + + +void +fwudp_add(struct fwspec *fwspec) +{ + struct fwudp *fwudp; + + fwudp = fwudp_create(fwspec); + if (fwudp == NULL) { + DPRINTF0(("%s: failed to add rule for UDP ...\n", __func__)); + return; + } + + DPRINTF0(("%s\n", __func__)); + /* fwudp_create has put fwudp on the linked list */ +} + + +void +fwudp_del(struct fwspec *fwspec) +{ + struct fwudp *fwudp; + struct fwudp **pprev; + + for (pprev = &fwudp_list; (fwudp = *pprev) != NULL; pprev = &fwudp->next) { + if (fwspec_equal(&fwudp->fwspec, fwspec)) { + *pprev = fwudp->next; + fwudp->next = NULL; + break; + } + } + + if (fwudp == NULL) { + DPRINTF0(("%s: not found\n", __func__)); + return; + } + + DPRINTF0(("%s\n", __func__)); + + pollmgr_del_slot(fwudp->pmhdl.slot); + fwudp->pmhdl.slot = -1; + + /* let pending msg_send be processed before we delete fwudp */ + proxy_lwip_post(&fwudp->msg_delete); +} + + +struct fwudp * +fwudp_create(struct fwspec *fwspec) +{ + struct fwudp *fwudp; + SOCKET sock; + int status; + + sock = proxy_bound_socket(fwspec->sdom, fwspec->stype, &fwspec->src.sa); + if (sock == INVALID_SOCKET) { + return NULL; + } + + fwudp = (struct fwudp *)malloc(sizeof(*fwudp)); + if (fwudp == NULL) { + closesocket(sock); + return NULL; + } + + fwudp->pmhdl.callback = fwudp_pmgr_pump; + fwudp->pmhdl.data = (void *)fwudp; + fwudp->pmhdl.slot = -1; + + fwudp->sock = sock; + fwudp->fwspec = *fwspec; /* struct copy */ + + /* XXX */ + if (fwspec->sdom == PF_INET) { + struct sockaddr_in *dst4 = &fwspec->dst.sin; + memcpy(&fwudp->dst_addr.ip4, &dst4->sin_addr, sizeof(ip_addr_t)); + fwudp->dst_port = htons(dst4->sin_port); + } + else { /* PF_INET6 */ + struct sockaddr_in6 *dst6 = &fwspec->dst.sin6; + memcpy(&fwudp->dst_addr.ip6, &dst6->sin6_addr, sizeof(ip6_addr_t)); + fwudp->dst_port = htons(dst6->sin6_port); + } + + fwudp->inbuf.bufsize = 256; /* elements */ + fwudp->inbuf.buf + = (struct fwudp_dgram *)calloc(fwudp->inbuf.bufsize, + sizeof(struct fwudp_dgram)); + if (fwudp->inbuf.buf == NULL) { + closesocket(sock); + free(fwudp); + return (NULL); + } + fwudp->inbuf.vacant = 0; + fwudp->inbuf.unsent = 0; + +#define CALLBACK_MSG(MSG, FUNC) \ + do { \ + fwudp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \ + fwudp->MSG.sem = NULL; \ + fwudp->MSG.msg.cb.function = FUNC; \ + fwudp->MSG.msg.cb.ctx = (void *)fwudp; \ + } while (0) + + CALLBACK_MSG(msg_send, fwudp_pcb_send); + CALLBACK_MSG(msg_delete, fwudp_pcb_delete); + +#undef CALLBACK_MSG + + status = pollmgr_add(&fwudp->pmhdl, fwudp->sock, POLLIN); + if (status < 0) { + closesocket(sock); + free(fwudp->inbuf.buf); + free(fwudp); + return NULL; + } + + fwudp->next = fwudp_list; + fwudp_list = fwudp; + + return fwudp; +} + + +/** + * Poll manager callaback for fwudp::sock + */ +int +fwudp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct fwudp *fwudp; + struct sockaddr_storage ss; + socklen_t sslen = sizeof(ss); + size_t beg, lim; + struct fwudp_dgram *dgram; + struct pbuf *p; + ssize_t nread; + int status; + err_t error; + + fwudp = (struct fwudp *)handler->data; + + LWIP_ASSERT1(fwudp != NULL); + LWIP_ASSERT1(fd == fwudp->sock); + LWIP_ASSERT1(revents == POLLIN); + LWIP_UNUSED_ARG(fd); + LWIP_UNUSED_ARG(revents); + +#ifdef RT_OS_WINDOWS + nread = recvfrom(fwudp->sock, (char *)pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0, + (struct sockaddr *)&ss, &sslen); +#else + nread = recvfrom(fwudp->sock, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0, + (struct sockaddr *)&ss, &sslen); +#endif + if (nread < 0) { + DPRINTF(("%s: %R[sockerr]\n", __func__, SOCKERRNO())); + return POLLIN; + } + + /* Check that ring buffer is not full */ + lim = fwudp->inbuf.unsent; + if (lim == 0) { + lim = fwudp->inbuf.bufsize - 1; /* guard slot at the end */ + } + else { + --lim; + } + + beg = fwudp->inbuf.vacant; + if (beg == lim) { /* no vacant slot */ + return POLLIN; + } + + + dgram = &fwudp->inbuf.buf[beg]; + + + status = fwany_ipX_addr_set_src(&dgram->src_addr, (struct sockaddr *)&ss); + if (status == PXREMAP_FAILED) { + return POLLIN; + } + + if (ss.ss_family == AF_INET) { + const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss; + dgram->src_port = htons(peer4->sin_port); + } + else { /* PF_INET6 */ + const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss; + dgram->src_port = htons(peer6->sin6_port); + } + + p = pbuf_alloc(PBUF_RAW, nread, PBUF_RAM); + if (p == NULL) { + DPRINTF(("%s: pbuf_alloc(%d) failed\n", __func__, (int)nread)); + return POLLIN; + } + + error = pbuf_take(p, pollmgr_udpbuf, nread); + if (error != ERR_OK) { + DPRINTF(("%s: pbuf_take(%d) failed\n", __func__, (int)nread)); + pbuf_free(p); + return POLLIN; + } + + dgram->p = p; + + ++beg; + if (beg == fwudp->inbuf.bufsize) { + beg = 0; + } + fwudp->inbuf.vacant = beg; + + proxy_lwip_post(&fwudp->msg_send); + + return POLLIN; +} + + +/** + * Lwip thread callback invoked via fwudp::msg_send + */ +void +fwudp_pcb_send(void *arg) +{ + struct fwudp *fwudp = (struct fwudp *)arg; + struct fwudp_dgram dgram; + struct udp_pcb *pcb; + struct udp_pcb **pprev; + int isv6; + size_t idx; + + idx = fwudp->inbuf.unsent; + + if (idx == fwudp->inbuf.vacant) { + /* empty buffer - shouldn't happen! */ + DPRINTF(("%s: ring buffer empty!\n", __func__)); + return; + } + + dgram = fwudp->inbuf.buf[idx]; /* struct copy */ +#if 1 /* valgrind hint */ + fwudp->inbuf.buf[idx].p = NULL; +#endif + if (++idx == fwudp->inbuf.bufsize) { + idx = 0; + } + fwudp->inbuf.unsent = idx; + + /* XXX: this is *STUPID* */ + isv6 = (fwudp->fwspec.sdom == PF_INET6); + pprev = &udp_proxy_pcbs; + for (pcb = udp_proxy_pcbs; pcb != NULL; pcb = pcb->next) { + if (PCB_ISIPV6(pcb) == isv6 + && pcb->remote_port == fwudp->dst_port + && ipX_addr_cmp(isv6, &fwudp->dst_addr, &pcb->remote_ip) + && pcb->local_port == dgram.src_port + && ipX_addr_cmp(isv6, &dgram.src_addr, &pcb->local_ip)) + { + break; + } + else { + pprev = &pcb->next; + } + } + + if (pcb != NULL) { + *pprev = pcb->next; + pcb->next = udp_proxy_pcbs; + udp_proxy_pcbs = pcb; + + /* + * XXX: check that its ours and not accidentally created by + * outbound traffic. + * + * ???: Otherwise? Expire it and set pcb = NULL; to create a + * new one below? + */ + } + + if (pcb == NULL) { + pcb = udp_new(); + if (pcb == NULL) { + goto out; + } + + ip_set_v6(pcb, isv6); + + /* equivalent of udp_bind */ + ipX_addr_set(isv6, &pcb->local_ip, &dgram.src_addr); + pcb->local_port = dgram.src_port; + + /* equivalent to udp_connect */ + ipX_addr_set(isv6, &pcb->remote_ip, &fwudp->dst_addr); + pcb->remote_port = fwudp->dst_port; + pcb->flags |= UDP_FLAGS_CONNECTED; + + udp_recv(pcb, fwudp_pcb_recv, fwudp); + + pcb->next = udp_proxy_pcbs; + udp_proxy_pcbs = pcb; + udp_proxy_timer_needed(); + } + + udp_send(pcb, dgram.p); + + out: + pbuf_free(dgram.p); +} + + +/** + * udp_recv() callback. + */ +void +fwudp_pcb_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + struct fwudp *fwudp = (struct fwudp *)arg; + + LWIP_UNUSED_ARG(addr); + LWIP_UNUSED_ARG(port); + + LWIP_ASSERT1(fwudp != NULL); + + if (p == NULL) { + DPRINTF(("%s: pcb %p (fwudp %p); sock %d: expired\n", + __func__, (void *)pcb, (void *)fwudp, fwudp->sock)); + /* NB: fwudp is "global" and not deleted */ + /* XXX: TODO: delete local reference when we will keep one */ + udp_remove(pcb); + return; + } + else { + fwudp_pcb_forward_outbound(fwudp, pcb, p); + } +} + + +/* + * XXX: This is pxudp_pcb_forward_outbound modulo: + * - s/pxudp/fwudp/g + * - addr/port (unused in either) dropped + * - destination is specified since host socket is not connected + */ +static void +fwudp_pcb_forward_outbound(struct fwudp *fwudp, struct udp_pcb *pcb, + struct pbuf *p) +{ + union { + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } peer; + socklen_t namelen; + + memset(&peer, 0, sizeof(peer)); /* XXX: shut up valgrind */ + + if (fwudp->fwspec.sdom == PF_INET) { + peer.sin.sin_family = AF_INET; +#if HAVE_SA_LEN + peer.sin.sin_len = +#endif + namelen = sizeof(peer.sin); + pxremap_outbound_ip4((ip_addr_t *)&peer.sin.sin_addr, &pcb->local_ip.ip4); + peer.sin.sin_port = htons(pcb->local_port); + } + else { + peer.sin6.sin6_family = AF_INET6; +#if HAVE_SA_LEN + peer.sin6.sin6_len = +#endif + namelen = sizeof(peer.sin6); + + pxremap_outbound_ip6((ip6_addr_t *)&peer.sin6.sin6_addr, &pcb->local_ip.ip6); + peer.sin6.sin6_port = htons(pcb->local_port); + } + + proxy_sendto(fwudp->sock, p, &peer, namelen); + pbuf_free(p); +} + + +/** + * Lwip thread callback invoked via fwudp::msg_delete + */ +static void +fwudp_pcb_delete(void *arg) +{ + struct fwudp *fwudp = (struct fwudp *)arg; + struct udp_pcb *pcb; + struct udp_pcb **pprev; + + LWIP_ASSERT1(fwudp->inbuf.unsent == fwudp->inbuf.vacant); + + pprev = &udp_proxy_pcbs; + pcb = udp_proxy_pcbs; + while (pcb != NULL) { + if (pcb->recv_arg != fwudp) { + pprev = &pcb->next; + pcb = pcb->next; + } + else { + struct udp_pcb *dead = pcb; + pcb = pcb->next; + *pprev = pcb; + memp_free(MEMP_UDP_PCB, dead); + } + } + + closesocket(fwudp->sock); + free(fwudp->inbuf.buf); + free(fwudp); +} diff --git a/src/VBox/NetworkServices/NAT/getrawsock.c b/src/VBox/NetworkServices/NAT/getrawsock.c new file mode 100644 index 00000000..8b93ff6f --- /dev/null +++ b/src/VBox/NetworkServices/NAT/getrawsock.c @@ -0,0 +1,165 @@ +/* $Id: getrawsock.c $ */ +/** @file + * Obtain raw-sockets from a server when debugging unprivileged. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <netinet/in.h> +#include <errno.h> +#include <pwd.h> +#include <signal.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +/* XXX: this should be in a header, but isn't. naughty me. :( */ +int getrawsock(int type); + + +int +getrawsock(int type) +{ + struct sockaddr_un sux; /* because solaris */ + struct passwd *pw; + size_t pathlen; + int rawsock, server; + struct msghdr mh; + struct iovec iov[1]; + char buf[1]; + struct cmsghdr *cmh; + char cmsg[CMSG_SPACE(sizeof(int))]; + ssize_t nread, nsent; + int status; + + server = -1; + rawsock = -1; + + memset(&sux, 0, sizeof(sux)); + sux.sun_family = AF_UNIX; + + if (geteuid() == 0) { + return -1; + } + + if (type == AF_INET) { + buf[0] = '4'; + } + else if (type == AF_INET6) { + buf[0] = '6'; + } + else { + return -1; + } + + errno = 0; + pw = getpwuid(getuid()); + if (pw == NULL) { + perror("getpwuid"); + return -1; + } + + pathlen = snprintf(sux.sun_path, sizeof(sux.sun_path), + "/tmp/.vbox-%s-aux/mkrawsock", pw->pw_name); + if (pathlen > sizeof(sux.sun_path)) { + fprintf(stderr, "socket pathname truncated\n"); + return -1; + } + + server = socket(PF_UNIX, SOCK_STREAM, 0); + if (server < 0) { + perror("socket"); + return -1; + } + + status = connect(server, (struct sockaddr *)&sux, + (sizeof(sux) - sizeof(sux.sun_path) + + strlen(sux.sun_path) + 1)); + if (status < 0) { + perror(sux.sun_path); + goto out; + } + + nsent = send(server, buf, 1, 0); + if (nsent != 1) { + if (nsent < 0) { + perror("send"); + } + else { + fprintf(stderr, "failed to contact mkrawsock\n"); + } + goto out; + } + + buf[0] = '\0'; + + iov[0].iov_base = buf; + iov[0].iov_len = 1; + + memset(&mh, 0, sizeof(mh)); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = cmsg; + mh.msg_controllen = sizeof(cmsg); + + nread = recvmsg(server, &mh, 0); + if (nread != 1) { + if (nread < 0) { + perror("recvmsg"); + } + else { + fprintf(stderr, "EOF from mkrawsock\n"); + } + goto out; + } + + if ((type == AF_INET && buf[0] != '4') + || (type == AF_INET6 && buf[0] != '6') + || mh.msg_controllen == 0) + { + goto out; + } + + for (cmh = CMSG_FIRSTHDR(&mh); cmh != NULL; cmh = CMSG_NXTHDR(&mh, cmh)) { + if ((cmh->cmsg_level == SOL_SOCKET) + && (cmh->cmsg_type == SCM_RIGHTS) + && (cmh->cmsg_len == CMSG_LEN(sizeof(rawsock)))) + { + rawsock = *((int *)CMSG_DATA(cmh)); + break; + } + } + + out: + if (server != -1) { + close(server); + } + if (rawsock != -1) { + printf("%s: got ICMPv%c socket %d\n", + __func__, type == AF_INET ? '4' : '6', rawsock); + } + return rawsock; +} diff --git a/src/VBox/NetworkServices/NAT/lwipopts.h b/src/VBox/NetworkServices/NAT/lwipopts.h new file mode 100644 index 00000000..e36b36e0 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/lwipopts.h @@ -0,0 +1,215 @@ +/* $Id: lwipopts.h $ */ +/** @file + * NAT Network - lwIP configuration options. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NAT_lwipopts_h +#define VBOX_INCLUDED_SRC_NAT_lwipopts_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> /* For VBOX_STRICT. */ +#include <iprt/mem.h> +#include <iprt/alloca.h> /* This may include malloc.h (msc), which is something that has + * to be done before redefining any of the functions therein. */ +#include <iprt/rand.h> /* see LWIP_RAND() definition */ + +/* lwip/sockets.h assumes that if FD_SET is defined (in case of Innotek GCC + * its definition is dragged through iprt/types.h) then struct timeval is + * defined as well, but it's not the case. So include it manually. */ +#ifdef RT_OS_OS2 +# include <sys/time.h> +#endif + +/** Make lwIP use the libc malloc, or more precisely (see below) the IPRT + * memory allocation functions. */ +#define MEM_LIBC_MALLOC 1 + +/** Set proper memory alignment. */ +#if HC_ARCH_BITS == 64 +# define MEM_ALIGNMENT 8 +#else +#define MEM_ALIGNMENT 4 +#endif + +/* Padding before Ethernet header to make IP header aligned */ +#define ETH_PAD_SIZE 2 + +/* IP */ +#define IP_REASSEMBLY 1 +#define IP_REASS_MAX_PBUFS 128 + + + +/** Increase maximum TCP window size. */ +#define TCP_WND 32768 + +/** Increase TCP maximum segment size. */ +#define TCP_MSS 1460 + +/** Enable queueing of out-of-order segments. */ +#define TCP_QUEUE_OOSEQ 1 + +/** TCP sender buffer space (bytes). */ +#define TCP_SND_BUF (32 * TCP_MSS) + +/* TCP sender buffer space (pbufs). This must be at least = 2 * + TCP_SND_BUF/TCP_MSS for things to work. */ +#define TCP_SND_QUEUELEN 128 + +/* MEMP_NUM_PBUF: the number of memp struct pbufs. If the application + sends a lot of data out of ROM (or other static memory), this + should be set high. + + NB: This is for PBUF_ROM and PBUF_REF pbufs only! + + Number of PBUF_POOL pbufs is controlled by PBUF_POOL_SIZE that, + somewhat confusingly, breaks MEMP_NUM_* pattern. + + PBUF_RAM pbufs are allocated with mem_malloc (with MEM_LIBC_MALLOC + set to 1 this is just system malloc), not memp_malloc. */ +#define MEMP_NUM_PBUF (1024 * 4) + + +/* MEMP_NUM_MLD6_GROUP: Maximum number of IPv6 multicast groups that + can be joined. + + We need to be able to join solicited node multicast for each + address (potentially different) and two groups for DHCP6. All + routers multicast is hardcoded in ip6.c and does not require + explicit joining. Provide also for a few extra groups just in + case. */ +#define MEMP_NUM_MLD6_GROUP (LWIP_IPV6_NUM_ADDRESSES + /* dhcp6 */ 2 + /* extra */ 8) + + +/* MEMP_NUM_TCP_SEG: the number of simultaneously queued TCP + segments. */ +#define MEMP_NUM_TCP_SEG (MEMP_NUM_TCP_PCB * TCP_SND_QUEUELEN / 2) + +/* MEMP_NUM_TCP_PCB: the number of simulatenously active TCP + connections. */ +#define MEMP_NUM_TCP_PCB 128 + +/* MEMP_NUM_TCPIP_MSG_*: the number of struct tcpip_msg, which is used + for sequential API communication and incoming packets. Used in + src/api/tcpip.c. */ +#define MEMP_NUM_TCPIP_MSG_API 128 +#define MEMP_NUM_TCPIP_MSG_INPKT 1024 + +/* MEMP_NUM_UDP_PCB: the number of UDP protocol control blocks. One + per active UDP "connection". */ +#define MEMP_NUM_UDP_PCB 32 + +/* Pbuf options */ +/* PBUF_POOL_SIZE: the number of buffers in the pbuf pool. + This is only for PBUF_POOL pbufs, primarily used by netif drivers. + + This should have been named with the MEMP_NUM_ prefix (cf. + MEMP_NUM_PBUF for PBUF_ROM and PBUF_REF) as it controls the size of + yet another memp_malloc() pool. */ +#define PBUF_POOL_SIZE (1024 * 4) + +/* PBUF_POOL_BUFSIZE: the size of each pbuf in the pbuf pool. + Use default that is based on TCP_MSS and PBUF_LINK_HLEN. */ +#undef PBUF_POOL_BUFSIZE + +/** Turn on support for lightweight critical region protection. Leaving this + * off uses synchronization code in pbuf.c which is totally polluted with + * races. All the other lwip source files would fall back to semaphore-based + * synchronization, but pbuf.c is just broken, leading to incorrect allocation + * and as a result to assertions due to buffers being double freed. */ +#define SYS_LIGHTWEIGHT_PROT 1 + +/** Attempt to get rid of htons etc. macro issues. */ +#undef LWIP_PREFIX_BYTEORDER_FUNCS + +#define LWIP_TCPIP_CORE_LOCKING_INPUT 0 +#define LWIP_TCPIP_CORE_LOCKING 0 +#define LWIP_TCP 1 +#define LWIP_SOCKET 0 +#define LWIP_ARP 1 +#define ARP_PROXY 1 +#define LWIP_ETHERNET 1 +#define LWIP_COMPAT_SOCKETS 0 +#define LWIP_COMPAT_MUTEX 1 + +#define LWIP_IPV6 1 +#define LWIP_IPV6_FORWARD 1 +#define LWIP_ND6_PROXY 1 + +#define LWIP_ND6_ALLOW_RA_UPDATES (!LWIP_IPV6_FORWARD) +#define LWIP_IPV6_SEND_ROUTER_SOLICIT (!LWIP_IPV6_FORWARD) +/* IPv6 autoconfig we don't need in proxy, but it required for very seldom cases + * iSCSI over intnet with IPv6 + */ +#define LWIP_IPV6_AUTOCONFIG 1 +#if LWIP_IPV6_FORWARD /* otherwise use the default from lwip/opt.h */ +#define LWIP_IPV6_DUP_DETECT_ATTEMPTS 0 +#endif + +#define LWIP_IPV6_FRAG 1 + +/** + * aka Slirp mode. + */ +#define LWIP_CONNECTION_PROXY 1 +#define IP_FORWARD 1 + +/* MEMP_NUM_SYS_TIMEOUT: the number of simultaneously active + timeouts. */ +#define MEMP_NUM_SYS_TIMEOUT 16 + + +/* this is required for IPv6 and IGMP needs */ +#define LWIP_RAND() RTRandU32() + +/* Debugging stuff. */ +#ifdef DEBUG +# define LWIP_DEBUG +# include "lwip-log.h" + +# define LWIP_PROXY_DEBUG LWIP_DBG_OFF +#endif /* DEBUG */ + +/* printf formatter definitions */ +#define U16_F "hu" +#define S16_F "hd" +#define X16_F "hx" +#define U32_F "u" +#define S32_F "d" +#define X32_F "x" + +/* Redirect libc memory alloc functions to IPRT. */ +#define malloc(x) RTMemAlloc(x) +#define realloc(x,y) RTMemRealloc((x), (y)) +#define free(x) RTMemFree(x) + +/* Align VBOX_STRICT and LWIP_NOASSERT. */ +#ifndef VBOX_STRICT +# define LWIP_NOASSERT 1 +#endif + +#endif /* !VBOX_INCLUDED_SRC_NAT_lwipopts_h */ diff --git a/src/VBox/NetworkServices/NAT/mkrawsock.c b/src/VBox/NetworkServices/NAT/mkrawsock.c new file mode 100644 index 00000000..50e9831f --- /dev/null +++ b/src/VBox/NetworkServices/NAT/mkrawsock.c @@ -0,0 +1,349 @@ +/* $Id: mkrawsock.c $ */ +/** @file + * Auxiliary server to create raw-sockets when debugging unprivileged. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifdef __linux__ +#define _GNU_SOURCE +#endif + +#ifdef __sun__ +#if __STDC_VERSION__ - 0 >= 199901L +#define _XOPEN_SOURCE 600 +#else +#define _XOPEN_SOURCE 500 +#endif +#define __EXTENSIONS__ 1 +#endif + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <netinet/in.h> +#ifdef __linux__ +#include <linux/icmp.h> /* for ICMP_FILTER */ +#endif +#include <errno.h> +#include <fcntl.h> +#include <pwd.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + + +static void handler(int sig); +static void serve(int s); +static int mkrawsock(int family); + +volatile sig_atomic_t signaled = 0; + +int +main(int argc, char **argv) +{ + struct sigaction sa; + struct sockaddr_un sux; /* because solaris */ + struct passwd *pw; + size_t pathlen; + char *slash; + int s, client; + int status; + + memset(&sux, 0, sizeof(sux)); + sux.sun_family = AF_UNIX; + + if (getuid() == 0) { + if (argc != 2) { + fprintf(stderr, "username required when run as root\n"); + return EXIT_FAILURE; + } + + errno = 0; + pw = getpwnam(argv[1]); + if (pw == NULL) { + perror("getpwnam"); + return EXIT_FAILURE; + } + if (pw->pw_uid == 0) { + fprintf(stderr, "%s is superuser\n", pw->pw_name); + return EXIT_FAILURE; + } + } + else { + errno = 0; + pw = getpwuid(getuid()); + if (pw == NULL) { + perror("getpwuid"); + return EXIT_FAILURE; + } + } + + pathlen = snprintf(sux.sun_path, sizeof(sux.sun_path), + "/tmp/.vbox-%s-aux/mkrawsock", pw->pw_name); + if (pathlen > sizeof(sux.sun_path)) { + fprintf(stderr, "socket pathname truncated\n"); + return EXIT_FAILURE; + } + + slash = strrchr(sux.sun_path, '/'); + if (slash == NULL) { + fprintf(stderr, "%s: no directory separator\n", sux.sun_path); + return EXIT_FAILURE; + } + + *slash = '\0'; + + status = mkdir(sux.sun_path, 0700); + if (status == 0) { + status = chown(sux.sun_path, pw->pw_uid, pw->pw_gid); + if (status < 0) { + perror("chown"); + return EXIT_FAILURE; + } + } + else if (errno != EEXIST) { + perror("mkdir"); + return EXIT_FAILURE; + } + else { + int dirfd; + struct stat st; + + dirfd = open(sux.sun_path, O_RDONLY, O_DIRECTORY); + if (dirfd < 0) { + perror(sux.sun_path); + return EXIT_FAILURE; + } + + status = fstat(dirfd, &st); + close(dirfd); + + if (status < 0) { + perror(sux.sun_path); + return EXIT_FAILURE; + } + + if (st.st_uid != pw->pw_uid) { + fprintf(stderr, "%s: exists but not owned by %s\n", + sux.sun_path, pw->pw_name); + return EXIT_FAILURE; + } + + if ((st.st_mode & 0777) != 0700) { + fprintf(stderr, "%s: bad mode %04o\n", + sux.sun_path, (unsigned int)(st.st_mode & 0777)); + return EXIT_FAILURE; + } + } + + *slash = '/'; + +#if 0 + status = unlink(sux.sun_path); + if (status < 0 && errno != ENOENT) { + perror("unlink"); + } +#endif + + s = socket(PF_UNIX, SOCK_STREAM, 0); + if (s < 0) { + perror("socket"); + return EXIT_FAILURE; + } + + status = bind(s, (struct sockaddr *)&sux, + (sizeof(sux) - sizeof(sux.sun_path) + + strlen(sux.sun_path) + 1)); + if (status < 0) { + perror(sux.sun_path); + close(s); + return EXIT_FAILURE; + } + + status = chown(sux.sun_path, pw->pw_uid, pw->pw_gid); + if (status < 0) { + perror("chown"); + close(s); + return EXIT_FAILURE; + } + + status = chmod(sux.sun_path, 0600); + if (status < 0) { + perror("chmod"); + close(s); + return EXIT_FAILURE; + } + + status = listen(s, 1); + if (status < 0) { + perror("listen"); + close(s); + return EXIT_FAILURE; + } + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = handler; + sigemptyset(&sa.sa_mask); + + sigaction(SIGINT, &sa, NULL); + sigaction(SIGTERM, &sa, NULL); + + while (!signaled) { + client = accept(s, NULL, 0); + if (client < 0) { + perror("accept"); + continue; + } + + serve(client); + close(client); + } + + close(s); + status = unlink(sux.sun_path); + if (status < 0) { + perror("unlink"); + } + + return EXIT_SUCCESS; +} + + +static void +handler(int sig) +{ + signaled = 1; +} + + +static void +serve(int client) +{ +#ifdef SO_PEERCRED + struct ucred cr; + socklen_t crlen; +#endif + ssize_t nread, nsent; + struct msghdr mh; + struct iovec iov[1]; + char buf[1]; + struct cmsghdr *cmh; + char cmsg[CMSG_SPACE(sizeof(int))]; + int fd; + int status; + +#ifdef SO_PEERCRED + crlen = sizeof(cr); + status = getsockopt(client, SOL_SOCKET, SO_PEERCRED, &cr, &crlen); + if (status < 0) { + perror("SO_PEERCRED"); + return; + } + + fprintf(stderr, "request from pid %lu uid %lu ", + (unsigned long)cr.pid, (unsigned long)cr.uid); +#endif + + nread = read(client, buf, 1); + if (nread < 0) { + perror("recv"); + return; + } + + fd = -1; + switch (buf[0]) { + + case '4': + fprintf(stderr, "for ICMPv4 socket\n"); + fd = mkrawsock(PF_INET); + break; + + case '6': + fprintf(stderr, "for ICMPv6 socket\n"); + fd = mkrawsock(PF_INET6); + break; + + default: + fprintf(stderr, "bad request 0x%02x\n", (unsigned int)buf[0]); + return; + } + + if (fd < 0) { + buf[0] = '\0'; /* NAK */ + nsent = write(client, buf, 1); + (void)nsent; + return; + } + + memset(&mh, 0, sizeof(mh)); + memset(cmsg, 0, sizeof(cmsg)); + + iov[0].iov_base = buf; + iov[0].iov_len = 1; + + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = cmsg; + mh.msg_controllen = sizeof(cmsg); + + cmh = CMSG_FIRSTHDR(&mh); + cmh->cmsg_level = SOL_SOCKET; + cmh->cmsg_type = SCM_RIGHTS; + cmh->cmsg_len = CMSG_LEN(sizeof(fd)); + *((int *) CMSG_DATA(cmh)) = fd; + + nsent = sendmsg(client, &mh, 0); + if (nsent < 0) { + perror("sendmsg"); + } + + close(fd); +} + + +static int +mkrawsock(int family) +{ + int fd; + + if (family == PF_INET) { + fd = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP); + if (fd < 0) { + perror("IPPROTO_ICMP"); + return -1; + } + } + else { + fd = socket(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6); + if (fd < 0) { + perror("IPPROTO_ICMPV6"); + return -1; + } + } + + return fd; +} diff --git a/src/VBox/NetworkServices/NAT/portfwd.c b/src/VBox/NetworkServices/NAT/portfwd.c new file mode 100644 index 00000000..9e859944 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/portfwd.c @@ -0,0 +1,273 @@ +/* $Id: portfwd.c $ */ +/** @file + * NAT Network - port-forwarding rules. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" +#include "portfwd.h" + +#ifndef RT_OS_WINDOWS +#include <arpa/inet.h> +#include <netdb.h> +#include <poll.h> +#else +# include "winpoll.h" +#endif +#include <stdio.h> +#include <string.h> + +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "pxremap.h" + +#include "lwip/netif.h" + + +struct portfwd_msg { + struct fwspec *fwspec; + int add; +}; + + +static int portfwd_chan_send(struct portfwd_msg *); +static int portfwd_rule_add_del(struct fwspec *, int); +static int portfwd_pmgr_chan(struct pollmgr_handler *, SOCKET, int); + + +static struct pollmgr_handler portfwd_pmgr_chan_hdl; + + +void +portfwd_init(void) +{ + portfwd_pmgr_chan_hdl.callback = portfwd_pmgr_chan; + portfwd_pmgr_chan_hdl.data = NULL; + portfwd_pmgr_chan_hdl.slot = -1; + pollmgr_add_chan(POLLMGR_CHAN_PORTFWD, &portfwd_pmgr_chan_hdl); + + /* add preconfigured forwarders */ + fwtcp_init(); + fwudp_init(); +} + + +static int +portfwd_chan_send(struct portfwd_msg *msg) +{ + ssize_t nsent; + + nsent = pollmgr_chan_send(POLLMGR_CHAN_PORTFWD, &msg, sizeof(msg)); + if (nsent < 0) { + free(msg); + return -1; + } + + return 0; +} + + +static int +portfwd_rule_add_del(struct fwspec *fwspec, int add) +{ + struct portfwd_msg *msg; + + msg = (struct portfwd_msg *)malloc(sizeof(*msg)); + if (msg == NULL) { + DPRINTF0(("%s: failed to allocate message\n", __func__)); + return -1; + } + + msg->fwspec = fwspec; + msg->add = add; + + return portfwd_chan_send(msg); +} + + +int +portfwd_rule_add(struct fwspec *fwspec) +{ + return portfwd_rule_add_del(fwspec, 1); +} + + +int +portfwd_rule_del(struct fwspec *fwspec) +{ + return portfwd_rule_add_del(fwspec, 0); +} + + +/** + * POLLMGR_CHAN_PORTFWD handler. + */ +static int +portfwd_pmgr_chan(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + void *ptr = pollmgr_chan_recv_ptr(handler, fd, revents); + struct portfwd_msg *msg = (struct portfwd_msg *)ptr; + + if (msg->fwspec->stype == SOCK_STREAM) { + if (msg->add) { + fwtcp_add(msg->fwspec); + } + else { + fwtcp_del(msg->fwspec); + } + } + else { /* SOCK_DGRAM */ + if (msg->add) { + fwudp_add(msg->fwspec); + } + else { + fwudp_del(msg->fwspec); + } + } + + free(msg->fwspec); + free(msg); + + return POLLIN; +} + + +int +fwspec_set(struct fwspec *fwspec, int sdom, int stype, + const char *src_addr_str, uint16_t src_port, + const char *dst_addr_str, uint16_t dst_port) +{ + struct addrinfo hints; + struct addrinfo *ai; + int status; + + LWIP_ASSERT1(sdom == PF_INET || sdom == PF_INET6); + LWIP_ASSERT1(stype == SOCK_STREAM || stype == SOCK_DGRAM); + + fwspec->sdom = sdom; + fwspec->stype = stype; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = (sdom == PF_INET) ? AF_INET : AF_INET6; + hints.ai_socktype = stype; + hints.ai_flags = AI_NUMERICHOST; + + status = getaddrinfo(src_addr_str, NULL, &hints, &ai); + if (status != 0) { + LogRel(("\"%s\": %s\n", src_addr_str, gai_strerror(status))); + return -1; + } + LWIP_ASSERT1(ai != NULL); + LWIP_ASSERT1(ai->ai_addrlen <= sizeof(fwspec->src)); + memcpy(&fwspec->src, ai->ai_addr, ai->ai_addrlen); + freeaddrinfo(ai); + ai = NULL; + + status = getaddrinfo(dst_addr_str, NULL, &hints, &ai); + if (status != 0) { + LogRel(("\"%s\": %s\n", dst_addr_str, gai_strerror(status))); + return -1; + } + LWIP_ASSERT1(ai != NULL); + LWIP_ASSERT1(ai->ai_addrlen <= sizeof(fwspec->dst)); + memcpy(&fwspec->dst, ai->ai_addr, ai->ai_addrlen); + freeaddrinfo(ai); + ai = NULL; + + if (sdom == PF_INET) { + fwspec->src.sin.sin_port = htons(src_port); + fwspec->dst.sin.sin_port = htons(dst_port); + } + else { /* PF_INET6 */ + fwspec->src.sin6.sin6_port = htons(src_port); + fwspec->dst.sin6.sin6_port = htons(dst_port); + } + + return 0; +} + + +int +fwspec_equal(struct fwspec *a, struct fwspec *b) +{ + LWIP_ASSERT1(a != NULL); + LWIP_ASSERT1(b != NULL); + + if (a->sdom != b->sdom || a->stype != b->stype) { + return 0; + } + + if (a->sdom == PF_INET) { + return a->src.sin.sin_port == b->src.sin.sin_port + && a->dst.sin.sin_port == b->dst.sin.sin_port + && a->src.sin.sin_addr.s_addr == b->src.sin.sin_addr.s_addr + && a->dst.sin.sin_addr.s_addr == b->dst.sin.sin_addr.s_addr; + } + else { /* PF_INET6 */ + return a->src.sin6.sin6_port == b->src.sin6.sin6_port + && a->dst.sin6.sin6_port == b->dst.sin6.sin6_port + && IN6_ARE_ADDR_EQUAL(&a->src.sin6.sin6_addr, &b->src.sin6.sin6_addr) + && IN6_ARE_ADDR_EQUAL(&a->dst.sin6.sin6_addr, &b->dst.sin6.sin6_addr); + } +} + + +/** + * Set fwdsrc to the IP address of the peer. + * + * For port-forwarded connections originating from hosts loopback the + * source address is set to the address of one of lwIP interfaces. + * + * Currently we only have one interface so there's not much logic + * here. In the future we might need to additionally consult fwspec + * and routing table to determine which netif is used for connections + * to the specified guest. + */ +int +fwany_ipX_addr_set_src(ipX_addr_t *fwdsrc, const struct sockaddr *peer) +{ + int mapping; + + if (peer->sa_family == AF_INET) { + const struct sockaddr_in *peer4 = (const struct sockaddr_in *)peer; + ip_addr_t peerip4; + + peerip4.addr = peer4->sin_addr.s_addr; + mapping = pxremap_inbound_ip4(&fwdsrc->ip4, &peerip4); + } + else if (peer->sa_family == AF_INET6) { + const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)peer; + ip6_addr_t peerip6; + + memcpy(&peerip6, &peer6->sin6_addr, sizeof(ip6_addr_t)); + mapping = pxremap_inbound_ip6(&fwdsrc->ip6, &peerip6); + } + else { + mapping = PXREMAP_FAILED; + } + + return mapping; +} diff --git a/src/VBox/NetworkServices/NAT/portfwd.h b/src/VBox/NetworkServices/NAT/portfwd.h new file mode 100644 index 00000000..47c3f817 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/portfwd.h @@ -0,0 +1,84 @@ +/* $Id: portfwd.h $ */ +/** @file + * NAT Network - port-forwarding rules, definitions and declarations. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NAT_portfwd_h +#define VBOX_INCLUDED_SRC_NAT_portfwd_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#ifndef RT_OS_WINDOWS +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#endif + +#include "lwip/ip_addr.h" + + +struct fwspec { + int sdom; /* PF_INET, PF_INET6 */ + int stype; /* SOCK_STREAM, SOCK_DGRAM */ + + /* listen on */ + union { + struct sockaddr sa; + struct sockaddr_in sin; /* sdom == PF_INET */ + struct sockaddr_in6 sin6; /* sdom == PF_INET6 */ + } src; + + /* forward to */ + union { + struct sockaddr sa; + struct sockaddr_in sin; /* sdom == PF_INET */ + struct sockaddr_in6 sin6; /* sdom == PF_INET6 */ + } dst; +}; + + +void portfwd_init(void); +int portfwd_rule_add(struct fwspec *); +int portfwd_rule_del(struct fwspec *); + + +int fwspec_set(struct fwspec *, int, int, + const char *, uint16_t, + const char *, uint16_t); + +int fwspec_equal(struct fwspec *, struct fwspec *); + +void fwtcp_init(void); +void fwudp_init(void); + +void fwtcp_add(struct fwspec *); +void fwtcp_del(struct fwspec *); +void fwudp_add(struct fwspec *); +void fwudp_del(struct fwspec *); + +int fwany_ipX_addr_set_src(ipX_addr_t *, const struct sockaddr *); + +#endif /* !VBOX_INCLUDED_SRC_NAT_portfwd_h */ diff --git a/src/VBox/NetworkServices/NAT/proxy.c b/src/VBox/NetworkServices/NAT/proxy.c new file mode 100644 index 00000000..900eaf25 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/proxy.c @@ -0,0 +1,746 @@ +/* $Id: proxy.c $ */ +/** @file + * NAT Network - proxy setup and utilities. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" + +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "portfwd.h" + +#include "lwip/opt.h" + +#include "lwip/sys.h" +#include "lwip/tcpip.h" + +#ifndef RT_OS_WINDOWS +#include <sys/poll.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <arpa/inet.h> +#include <fcntl.h> +#include <stdio.h> +#include <iprt/string.h> +#include <unistd.h> +#include <err.h> +#else +# include <iprt/string.h> +#endif + +#if defined(SOCK_NONBLOCK) && defined(RT_OS_NETBSD) /* XXX: PR kern/47569 */ +# undef SOCK_NONBLOCK +#endif + +#ifndef __arraycount +# define __arraycount(a) (sizeof(a)/sizeof(a[0])) +#endif + +static FNRTSTRFORMATTYPE proxy_sockerr_rtstrfmt; + +static SOCKET proxy_create_socket(int, int); + +volatile struct proxy_options *g_proxy_options; +static sys_thread_t pollmgr_tid; + +/* XXX: for mapping loopbacks to addresses in our network (ip4) */ +struct netif *g_proxy_netif; + + +/* + * Called on the lwip thread (aka tcpip thread) from tcpip_init() via + * its "tcpip_init_done" callback. Raw API is ok to use here + * (e.g. rtadvd), but netconn API is not. + */ +void +proxy_init(struct netif *proxy_netif, struct proxy_options *opts) +{ + int status; + + LWIP_ASSERT1(opts != NULL); + LWIP_UNUSED_ARG(proxy_netif); + + status = RTStrFormatTypeRegister("sockerr", proxy_sockerr_rtstrfmt, NULL); + AssertRC(status); + + g_proxy_options = opts; + g_proxy_netif = proxy_netif; + +#if 1 + proxy_rtadvd_start(proxy_netif); +#endif + + /* + * XXX: We use stateless DHCPv6 only to report IPv6 address(es) of + * nameserver(s). Since we don't yet support IPv6 addresses in + * HostDnsService, there's no point in running DHCPv6. + */ +#if 0 + dhcp6ds_init(proxy_netif); +#endif + + if (opts->tftp_root != NULL) { + tftpd_init(proxy_netif, opts->tftp_root); + } + + status = pollmgr_init(); + if (status < 0) { + errx(EXIT_FAILURE, "failed to initialize poll manager"); + /* NOTREACHED */ + } + + pxtcp_init(); + pxudp_init(); + + portfwd_init(); + + pxdns_init(proxy_netif); + + pxping_init(proxy_netif, opts->icmpsock4, opts->icmpsock6); + + pollmgr_tid = sys_thread_new("pollmgr_thread", + pollmgr_thread, NULL, + DEFAULT_THREAD_STACKSIZE, + DEFAULT_THREAD_PRIO); + if (!pollmgr_tid) { + errx(EXIT_FAILURE, "failed to create poll manager thread"); + /* NOTREACHED */ + } +} + + +#if !defined(RT_OS_WINDOWS) +/** + * Formatter for %R[sockerr] - unix strerror_r() version. + */ +static DECLCALLBACK(size_t) +proxy_sockerr_rtstrfmt(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, const void *pvValue, + int cchWidth, int cchPrecision, unsigned int fFlags, + void *pvUser) +{ + const int error = (int)(intptr_t)pvValue; + + const char *msg; + char buf[128]; + + NOREF(cchWidth); + NOREF(cchPrecision); + NOREF(fFlags); + NOREF(pvUser); + + AssertReturn(strcmp(pszType, "sockerr") == 0, 0); + + /* make sure return type mismatch is caught */ + buf[0] = '\0'; +#if defined(RT_OS_LINUX) && defined(_GNU_SOURCE) + msg = strerror_r(error, buf, sizeof(buf)); +#else + strerror_r(error, buf, sizeof(buf)); + msg = buf; +#endif + return RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, "%s", msg); +} + +#else /* RT_OS_WINDOWS */ + +/** + * Formatter for %R[sockerr] - windows FormatMessage() version. + */ +static DECLCALLBACK(size_t) +proxy_sockerr_rtstrfmt(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, const void *pvValue, + int cchWidth, int cchPrecision, unsigned int fFlags, + void *pvUser) +{ + const int error = (int)(intptr_t)pvValue; + size_t cb = 0; + + NOREF(cchWidth); + NOREF(cchPrecision); + NOREF(fFlags); + NOREF(pvUser); + + AssertReturn(strcmp(pszType, "sockerr") == 0, 0); + + /* + * XXX: Windows strerror() doesn't handle posix error codes, but + * since winsock uses its own, it shouldn't be much of a problem. + * If you see a strange error message, it's probably from + * FormatMessage() for an error from <WinError.h> that has the + * same numeric value. + */ + if (error < _sys_nerr) { + char buf[128] = ""; + int status; + + status = strerror_s(buf, sizeof(buf), error); + if (status == 0) { + if (strcmp(buf, "Unknown error") == 0) { + /* windows strerror() doesn't add the numeric value */ + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, + "Unknown error: %d", error); + } + else { + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, + "%s", buf); + } + } + else { + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, + "Unknown error: %d", error); + } + } + else { + DWORD nchars; + char *msg = NULL; + + nchars = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM + | FORMAT_MESSAGE_ALLOCATE_BUFFER, + NULL, error, LANG_NEUTRAL, + (LPSTR)&msg, 0, + NULL); + if (nchars == 0 || msg == NULL) { + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, + "Unknown error: %d", error); + } + else { + /* FormatMessage() "helpfully" adds newline; get rid of it */ + char *crpos = strchr(msg, '\r'); + if (crpos != NULL) { + *crpos = '\0'; + } + + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, + "%s", msg); + } + + if (msg != NULL) { + LocalFree(msg); + } + } + + return cb; +} +#endif /* RT_OS_WINDOWS */ + + +/** + * Send static callback message from poll manager thread to lwip + * thread, scheduling a function call in lwip thread context. + * + * XXX: Existing lwip api only provides non-blocking version for this. + * It may fail when lwip thread is not running (mbox invalid) or if + * post failed (mbox full). How to handle these? + */ +void +proxy_lwip_post(struct tcpip_msg *msg) +{ + struct tcpip_callback_msg *m; + err_t error; + + LWIP_ASSERT1(msg != NULL); + + /* + * lwip plays games with fake incomplete struct tag to enforce API + */ + m = (struct tcpip_callback_msg *)msg; + error = tcpip_callbackmsg(m); + + if (error == ERR_VAL) { + /* XXX: lwip thread is not running (mbox invalid) */ + LWIP_ASSERT1(error != ERR_VAL); + } + + LWIP_ASSERT1(error == ERR_OK); +} + + +/** + * Create a non-blocking socket. Disable SIGPIPE for TCP sockets if + * possible. On Linux it's not possible and should be disabled for + * each send(2) individually. + */ +static SOCKET +proxy_create_socket(int sdom, int stype) +{ + SOCKET s; + int stype_and_flags; + int status; + + LWIP_UNUSED_ARG(status); /* depends on ifdefs */ + + + stype_and_flags = stype; + +#if defined(SOCK_NONBLOCK) + stype_and_flags |= SOCK_NONBLOCK; +#endif + + /* + * Disable SIGPIPE on disconnected socket. It might be easier to + * forgo it and just use MSG_NOSIGNAL on each send*(2), since we + * have to do it for Linux anyway, but Darwin does NOT have that + * flag (but has SO_NOSIGPIPE socket option). + */ +#if !defined(SOCK_NOSIGPIPE) && !defined(SO_NOSIGPIPE) && !defined(MSG_NOSIGNAL) +#if 0 /* XXX: Solaris has neither, the program should ignore SIGPIPE globally */ +#error Need a way to disable SIGPIPE on connection oriented sockets! +#endif +#endif + +#if defined(SOCK_NOSIGPIPE) + if (stype == SOCK_STREAM) { + stype_and_flags |= SOCK_NOSIGPIPE; + } +#endif + + s = socket(sdom, stype_and_flags, 0); + if (s == INVALID_SOCKET) { + DPRINTF(("socket: %R[sockerr]\n", SOCKERRNO())); + return INVALID_SOCKET; + } + +#if defined(RT_OS_WINDOWS) + { + u_long mode = 1; + status = ioctlsocket(s, FIONBIO, &mode); + if (status == SOCKET_ERROR) { + DPRINTF(("FIONBIO: %R[sockerr]\n", SOCKERRNO())); + closesocket(s); + return INVALID_SOCKET; + } + } +#elif !defined(SOCK_NONBLOCK) + { + int sflags; + + sflags = fcntl(s, F_GETFL, 0); + if (sflags < 0) { + DPRINTF(("F_GETFL: %R[sockerr]\n", SOCKERRNO())); + closesocket(s); + return INVALID_SOCKET; + } + + status = fcntl(s, F_SETFL, sflags | O_NONBLOCK); + if (status < 0) { + DPRINTF(("O_NONBLOCK: %R[sockerr]\n", SOCKERRNO())); + closesocket(s); + return INVALID_SOCKET; + } + } +#endif + +#if !defined(SOCK_NOSIGPIPE) && defined(SO_NOSIGPIPE) + if (stype == SOCK_STREAM) { + int on = 1; + const socklen_t onlen = sizeof(on); + + status = setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &on, onlen); + if (status < 0) { + DPRINTF(("SO_NOSIGPIPE: %R[sockerr]\n", SOCKERRNO())); + closesocket(s); + return INVALID_SOCKET; + } + } +#endif + + /* + * Disable the Nagle algorithm. Otherwise the host may hold back + * packets that the guest wants to go out, causing potentially + * horrible performance. The guest is already applying the Nagle + * algorithm (or not) the way it wants. + */ + if (stype == SOCK_STREAM) { + int on = 1; + const socklen_t onlen = sizeof(on); + + status = setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (char *)&on, onlen); + if (status < 0) { + DPRINTF(("TCP_NODELAY: %R[sockerr]\n", SOCKERRNO())); + } + } + +#if defined(RT_OS_WINDOWS) + /* + * lwIP only holds one packet of "refused data" for us. Proxy + * relies on OS socket send buffer and doesn't do its own + * buffering. Unfortunately on Windows send buffer is very small + * (8K by default) and is not dynamically adpated by the OS it + * seems. So a single large write will fill it up and that will + * make lwIP drop segments, causing guest TCP into pathologic + * resend patterns. As a quick and dirty fix just bump it up. + */ + if (stype == SOCK_STREAM) { + int sndbuf; + socklen_t optlen = sizeof(sndbuf); + + status = getsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, &optlen); + if (status == 0) { + if (sndbuf < 64 * 1024) { + sndbuf = 64 * 1024; + status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, + (char *)&sndbuf, optlen); + if (status != 0) { + DPRINTF(("SO_SNDBUF: setsockopt: %R[sockerr]\n", SOCKERRNO())); + } + } + } + else { + DPRINTF(("SO_SNDBUF: getsockopt: %R[sockerr]\n", SOCKERRNO())); + } + } +#endif + + return s; +} + + +#ifdef RT_OS_LINUX +/** + * Fixup a socket returned by accept(2). + * + * On Linux a socket returned by accept(2) does NOT inherit the socket + * options from the listening socket! We need to repeat parts of the + * song and dance we did above to make it non-blocking. + */ +int +proxy_fixup_accepted_socket(SOCKET s) +{ + int sflags; + int status; + + sflags = fcntl(s, F_GETFL, 0); + if (sflags < 0) { + DPRINTF(("F_GETFL: %R[sockerr]\n", SOCKERRNO())); + return -1; + } + + status = fcntl(s, F_SETFL, sflags | O_NONBLOCK); + if (status < 0) { + DPRINTF(("O_NONBLOCK: %R[sockerr]\n", SOCKERRNO())); + return -1; + } + + return 0; +} +#endif /* RT_OS_LINUX */ + + +/** + * Create a socket for outbound connection to dst_addr:dst_port. + * + * The socket is non-blocking and TCP sockets has SIGPIPE disabled if + * possible. On Linux it's not possible and should be disabled for + * each send(2) individually. + */ +SOCKET +proxy_connected_socket(int sdom, int stype, + ipX_addr_t *dst_addr, u16_t dst_port) +{ + struct sockaddr_in6 dst_sin6; + struct sockaddr_in dst_sin; + struct sockaddr *pdst_sa; + socklen_t dst_sa_len; + void *pdst_addr; + const struct sockaddr *psrc_sa; + socklen_t src_sa_len; + int status; + int sockerr; + SOCKET s; + + LWIP_ASSERT1(sdom == PF_INET || sdom == PF_INET6); + LWIP_ASSERT1(stype == SOCK_STREAM || stype == SOCK_DGRAM); + + DPRINTF(("---> %s ", stype == SOCK_STREAM ? "TCP" : "UDP")); + if (sdom == PF_INET6) { + pdst_sa = (struct sockaddr *)&dst_sin6; + pdst_addr = (void *)&dst_sin6.sin6_addr; + + memset(&dst_sin6, 0, sizeof(dst_sin6)); +#if HAVE_SA_LEN + dst_sin6.sin6_len = +#endif + dst_sa_len = sizeof(dst_sin6); + dst_sin6.sin6_family = AF_INET6; + memcpy(&dst_sin6.sin6_addr, &dst_addr->ip6, sizeof(ip6_addr_t)); + dst_sin6.sin6_port = htons(dst_port); + + DPRINTF(("[%RTnaipv6]:%d ", &dst_sin6.sin6_addr, dst_port)); + } + else { /* sdom = PF_INET */ + pdst_sa = (struct sockaddr *)&dst_sin; + pdst_addr = (void *)&dst_sin.sin_addr; + + memset(&dst_sin, 0, sizeof(dst_sin)); +#if HAVE_SA_LEN + dst_sin.sin_len = +#endif + dst_sa_len = sizeof(dst_sin); + dst_sin.sin_family = AF_INET; + dst_sin.sin_addr.s_addr = dst_addr->ip4.addr; /* byte-order? */ + dst_sin.sin_port = htons(dst_port); + + DPRINTF(("%RTnaipv4:%d ", dst_sin.sin_addr.s_addr, dst_port)); + } + + s = proxy_create_socket(sdom, stype); + if (s == INVALID_SOCKET) { + return INVALID_SOCKET; + } + DPRINTF(("socket %d\n", s)); + + /** @todo needs locking if dynamic modifyvm is allowed */ + if (sdom == PF_INET6) { + psrc_sa = (const struct sockaddr *)g_proxy_options->src6; + src_sa_len = sizeof(struct sockaddr_in6); + } + else { + psrc_sa = (const struct sockaddr *)g_proxy_options->src4; + src_sa_len = sizeof(struct sockaddr_in); + } + if (psrc_sa != NULL) { + status = bind(s, psrc_sa, src_sa_len); + if (status == SOCKET_ERROR) { + sockerr = SOCKERRNO(); + DPRINTF(("socket %d: bind: %R[sockerr]\n", s, sockerr)); + closesocket(s); + SET_SOCKERRNO(sockerr); + return INVALID_SOCKET; + } + } + + status = connect(s, pdst_sa, dst_sa_len); + if (status == SOCKET_ERROR +#if !defined(RT_OS_WINDOWS) + && SOCKERRNO() != EINPROGRESS +#else + && SOCKERRNO() != EWOULDBLOCK +#endif + ) + { + sockerr = SOCKERRNO(); + DPRINTF(("socket %d: connect: %R[sockerr]\n", s, sockerr)); + closesocket(s); + SET_SOCKERRNO(sockerr); + return INVALID_SOCKET; + } + + return s; +} + + +/** + * Create a socket for inbound (port-forwarded) connections to + * src_addr (port is part of sockaddr, so not a separate argument). + * + * The socket is non-blocking and TCP sockets has SIGPIPE disabled if + * possible. On Linux it's not possible and should be disabled for + * each send(2) individually. + * + * TODO?: Support v6-mapped v4 so that user can specify she wants + * "udp" and get both versions? + */ +SOCKET +proxy_bound_socket(int sdom, int stype, struct sockaddr *src_addr) +{ + SOCKET s; + int on; + const socklen_t onlen = sizeof(on); + int status; + int sockerr; + + s = proxy_create_socket(sdom, stype); + if (s == INVALID_SOCKET) { + return INVALID_SOCKET; + } + DPRINTF(("socket %d\n", s)); + + on = 1; + status = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&on, onlen); + if (status < 0) { /* not good, but not fatal */ + DPRINTF(("SO_REUSEADDR: %R[sockerr]\n", SOCKERRNO())); + } + + status = bind(s, src_addr, + sdom == PF_INET ? + sizeof(struct sockaddr_in) + : sizeof(struct sockaddr_in6)); + if (status == SOCKET_ERROR) { + sockerr = SOCKERRNO(); + DPRINTF(("bind: %R[sockerr]\n", sockerr)); + closesocket(s); + SET_SOCKERRNO(sockerr); + return INVALID_SOCKET; + } + + if (stype == SOCK_STREAM) { + status = listen(s, 5); + if (status == SOCKET_ERROR) { + sockerr = SOCKERRNO(); + DPRINTF(("listen: %R[sockerr]\n", sockerr)); + closesocket(s); + SET_SOCKERRNO(sockerr); + return INVALID_SOCKET; + } + } + + return s; +} + + +void +proxy_reset_socket(SOCKET s) +{ + struct linger linger; + + linger.l_onoff = 1; + linger.l_linger = 0; + + /* On Windows we can run into issue here, perhaps SO_LINGER isn't enough, and + * we should use WSA{Send,Recv}Disconnect instead. + * + * Links for the reference: + * http://msdn.microsoft.com/en-us/library/windows/desktop/ms738547%28v=vs.85%29.aspx + * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4468997 + */ + setsockopt(s, SOL_SOCKET, SO_LINGER, (char *)&linger, sizeof(linger)); + + closesocket(s); +} + + +int +proxy_sendto(SOCKET sock, struct pbuf *p, void *name, size_t namelen) +{ + struct pbuf *q; + size_t i, clen; +#ifndef RT_OS_WINDOWS + struct msghdr mh; + ssize_t nsent; +#else + DWORD nsent; +#endif + int rc; + IOVEC fixiov[8]; /* fixed size (typical case) */ + const size_t fixiovsize = sizeof(fixiov)/sizeof(fixiov[0]); + IOVEC *dyniov; /* dynamically sized */ + IOVEC *iov; + int error = 0; + + /* + * Static iov[] is usually enough since UDP protocols use small + * datagrams to avoid fragmentation, but be prepared. + */ + clen = pbuf_clen(p); + if (clen > fixiovsize) { + /* + * XXX: TODO: check that clen is shorter than IOV_MAX + */ + dyniov = (IOVEC *)malloc(clen * sizeof(*dyniov)); + if (dyniov == NULL) { + error = -errno; /* sic: not a socket error */ + goto out; + } + iov = dyniov; + } + else { + dyniov = NULL; + iov = fixiov; + } + + + for (q = p, i = 0; i < clen; q = q->next, ++i) { + LWIP_ASSERT1(q != NULL); + + IOVEC_SET_BASE(iov[i], q->payload); + IOVEC_SET_LEN(iov[i], q->len); + } + +#ifndef RT_OS_WINDOWS + memset(&mh, 0, sizeof(mh)); + mh.msg_name = name; + mh.msg_namelen = namelen; + mh.msg_iov = iov; + mh.msg_iovlen = clen; + + nsent = sendmsg(sock, &mh, 0); + rc = (nsent >= 0) ? 0 : SOCKET_ERROR; +#else + rc = WSASendTo(sock, iov, (DWORD)clen, &nsent, 0, + name, (int)namelen, NULL, NULL); +#endif + if (rc == SOCKET_ERROR) { + error = SOCKERRNO(); + DPRINTF(("%s: socket %d: sendmsg: %R[sockerr]\n", + __func__, sock, error)); + error = -error; + } + + out: + if (dyniov != NULL) { + free(dyniov); + } + return error; +} + + +static const char *lwiperr[] = { + "ERR_OK", + "ERR_MEM", + "ERR_BUF", + "ERR_TIMEOUT", + "ERR_RTE", + "ERR_INPROGRESS", + "ERR_VAL", + "ERR_WOULDBLOCK", + "ERR_USE", + "ERR_ISCONN", + "ERR_ABRT", + "ERR_RST", + "ERR_CLSD", + "ERR_CONN", + "ERR_ARG", + "ERR_IF" +}; + + +const char * +proxy_lwip_strerr(err_t error) +{ + static char buf[32]; + int e = -error; + + if (0 <= e && e < (int)__arraycount(lwiperr)) { + return lwiperr[e]; + } + else { + RTStrPrintf(buf, sizeof(buf), "unknown error %d", error); + return buf; + } +} diff --git a/src/VBox/NetworkServices/NAT/proxy.h b/src/VBox/NetworkServices/NAT/proxy.h new file mode 100644 index 00000000..7da34e30 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/proxy.h @@ -0,0 +1,137 @@ +/* $Id: proxy.h $ */ +/** @file + * NAT Network - common definitions and declarations. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NAT_proxy_h +#define VBOX_INCLUDED_SRC_NAT_proxy_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#if !defined(VBOX) +#include "vbox-compat.h" +#endif + +#include "lwip/err.h" +#include "lwip/ip_addr.h" +#include "winutils.h" + +/* forward */ +struct netif; +struct tcpip_msg; +struct pbuf; +struct sockaddr; +struct sockaddr_in; +struct sockaddr_in6; + +struct ip4_lomap +{ + ip_addr_t loaddr; + uint32_t off; +}; + +struct ip4_lomap_desc +{ + const struct ip4_lomap *lomap; + unsigned int num_lomap; +}; + +struct proxy_options { + ip_addr_t ipv4_addr; + ip_addr_t ipv4_mask; + ip6_addr_t ipv6_addr; + int ipv6_enabled; + int ipv6_defroute; + SOCKET icmpsock4; + SOCKET icmpsock6; + const char *tftp_root; + const struct sockaddr_in *src4; + const struct sockaddr_in6 *src6; + const struct ip4_lomap_desc *lomap_desc; + const char **nameservers; +}; + +extern volatile struct proxy_options *g_proxy_options; +extern struct netif *g_proxy_netif; + +void proxy_init(struct netif *, struct proxy_options *); +SOCKET proxy_connected_socket(int, int, ipX_addr_t *, u16_t); +SOCKET proxy_bound_socket(int, int, struct sockaddr *); +#ifdef RT_OS_LINUX +int proxy_fixup_accepted_socket(SOCKET); +#endif +void proxy_reset_socket(SOCKET); +int proxy_sendto(SOCKET, struct pbuf *, void *, size_t); +void proxy_lwip_post(struct tcpip_msg *); +const char *proxy_lwip_strerr(err_t); + +/* proxy_rtadvd.c */ +void proxy_rtadvd_start(struct netif *); +void proxy_rtadvd_do_quick(void *); + +/* rtmon_*.c */ +int rtmon_get_defaults(void); + +/* proxy_dhcp6ds.c */ +err_t dhcp6ds_init(struct netif *); + +/* proxy_tftpd.c */ +err_t tftpd_init(struct netif *, const char *); + +/* pxtcp.c */ +void pxtcp_init(void); + +/* pxudp.c */ +void pxudp_init(void); + +/* pxdns.c */ +err_t pxdns_init(struct netif *); +void pxdns_set_nameservers(void *); + +/* pxping.c */ +err_t pxping_init(struct netif *, SOCKET, SOCKET); + + +#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(RT_OS_WINDOWS) +# define HAVE_SA_LEN 0 +#else +# define HAVE_SA_LEN 1 +#endif + +#define LWIP_ASSERT1(condition) LWIP_ASSERT(#condition, condition) + +/* + * TODO: DPRINTF0 should probably become LogRel but its usage needs to + * be cleaned up a bit before. + */ +#define DPRINTF0(a) Log(a) + +#define DPRINTF(a) DPRINTF1(a) +#define DPRINTF1(a) Log2(a) +#define DPRINTF2(a) Log3(a) + +#endif /* !VBOX_INCLUDED_SRC_NAT_proxy_h */ + diff --git a/src/VBox/NetworkServices/NAT/proxy_dhcp6ds.c b/src/VBox/NetworkServices/NAT/proxy_dhcp6ds.c new file mode 100644 index 00000000..a42629f7 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/proxy_dhcp6ds.c @@ -0,0 +1,327 @@ +/* $Id: proxy_dhcp6ds.c $ */ +/** @file + * NAT Network - Simple stateless DHCPv6 (RFC 3736) server. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" +#include "dhcp6.h" +#include "proxy.h" + +#include <string.h> + +#include "lwip/opt.h" +#include "lwip/mld6.h" +#include "lwip/udp.h" + + +static void dhcp6ds_recv(void *, struct udp_pcb *, struct pbuf *, ip6_addr_t *, u16_t); + + +/* ff02::1:2 - "All_DHCP_Relay_Agents_and_Servers" link-scoped multicast */ +static /* const */ ip6_addr_t all_dhcp_relays_and_servers = { + { PP_HTONL(0xff020000UL), 0, 0, PP_HTONL(0x00010002UL) } +}; + +/* ff05::1:3 - "All_DHCP_Servers" site-scoped multicast */ +static /* const */ ip6_addr_t all_dhcp_servers = { + { PP_HTONL(0xff050000UL), 0, 0, PP_HTONL(0x00010003UL) } +}; + + +static struct udp_pcb *dhcp6ds_pcb; + +/* prebuilt Server ID option */ +#define DUID_LL_LEN (/* duid type */ 2 + /* hw type */ 2 + /* ether addr */ 6) +static u8_t dhcp6ds_serverid[/* opt */ 2 + /* optlen */ 2 + DUID_LL_LEN]; + +/* prebuilt DNS Servers option */ +static u8_t dhcp6ds_dns[/* opt */ 2 + /* optlen */ 2 + /* IPv6 addr */ 16]; + + +/** + * Initialize DHCP6 server. + * + * Join DHCP6 multicast groups. + * Create and bind server pcb. + * Prebuild fixed parts of reply. + */ +err_t +dhcp6ds_init(struct netif *proxy_netif) +{ + ip6_addr_t *pxaddr, *pxaddr_nonlocal; + int i; + err_t error; + + LWIP_ASSERT1(proxy_netif != NULL); + LWIP_ASSERT1(proxy_netif->hwaddr_len == 6); /* ethernet */ + + pxaddr = netif_ip6_addr(proxy_netif, 0); /* link local */ + + /* + * XXX: TODO: This is a leftover from testing with IPv6 mapped + * loopback with a special IPv6->IPv4 mapping hack in pxudp.c + */ + /* advertise ourself as DNS resolver - will be proxied to host */ + pxaddr_nonlocal = NULL; + for (i = 1; i < LWIP_IPV6_NUM_ADDRESSES; ++i) { + if (ip6_addr_ispreferred(netif_ip6_addr_state(proxy_netif, i)) + && !ip6_addr_islinklocal(netif_ip6_addr(proxy_netif, i))) + { + pxaddr_nonlocal = netif_ip6_addr(proxy_netif, i); + break; + } + } + LWIP_ASSERT1(pxaddr_nonlocal != NULL); /* must be configured on the netif */ + + + error = mld6_joingroup(pxaddr, &all_dhcp_relays_and_servers); + if (error != ERR_OK) { + DPRINTF0(("%s: failed to join All_DHCP_Relay_Agents_and_Servers: %s\n", + __func__, proxy_lwip_strerr(error))); + goto err; + } + + error = mld6_joingroup(pxaddr, &all_dhcp_servers); + if (error != ERR_OK) { + DPRINTF0(("%s: failed to join All_DHCP_Servers: %s\n", + __func__, proxy_lwip_strerr(error))); + goto err1; + } + + + dhcp6ds_pcb = udp_new_ip6(); + if (dhcp6ds_pcb == NULL) { + DPRINTF0(("%s: failed to allocate PCB\n", __func__)); + error = ERR_MEM; + goto err2; + } + + udp_recv_ip6(dhcp6ds_pcb, dhcp6ds_recv, NULL); + + error = udp_bind_ip6(dhcp6ds_pcb, pxaddr, DHCP6_SERVER_PORT); + if (error != ERR_OK) { + DPRINTF0(("%s: failed to bind PCB\n", __func__)); + goto err3; + } + + +#define OPT_SET(buf, off, c) do { \ + u16_t _s = PP_HTONS(c); \ + memcpy(&(buf)[off], &_s, sizeof(u16_t)); \ + } while (0) + +#define SERVERID_SET(off, c) OPT_SET(dhcp6ds_serverid, (off), (c)) +#define DNSSRV_SET(off, c) OPT_SET(dhcp6ds_dns, (off), (c)) + + SERVERID_SET(0, DHCP6_OPTION_SERVERID); + SERVERID_SET(2, DUID_LL_LEN); + SERVERID_SET(4, DHCP6_DUID_LL); + SERVERID_SET(6, ARES_HRD_ETHERNET); + memcpy(&dhcp6ds_serverid[8], proxy_netif->hwaddr, 6); + + DNSSRV_SET(0, DHCP6_OPTION_DNS_SERVERS); + DNSSRV_SET(2, 16); /* one IPv6 address */ + /* + * XXX: TODO: This is a leftover from testing with IPv6 mapped + * loopback with a special IPv6->IPv4 mapping hack in pxudp.c + */ + memcpy(&dhcp6ds_dns[4], pxaddr_nonlocal, sizeof(ip6_addr_t)); + +#undef SERVERID_SET +#undef DNSSRV_SET + + return ERR_OK; + + + err3: + udp_remove(dhcp6ds_pcb); + dhcp6ds_pcb = NULL; + err2: + mld6_leavegroup(pxaddr, &all_dhcp_servers); + err1: + mld6_leavegroup(pxaddr, &all_dhcp_relays_and_servers); + err: + return error; +} + + +static u8_t dhcp6ds_reply_buf[1024]; + +static void +dhcp6ds_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip6_addr_t *addr, u16_t port) +{ + u8_t msg_header[4]; + unsigned int msg_type, msg_tid; + int copied; + size_t roff; + struct pbuf *q; + err_t error; + + LWIP_UNUSED_ARG(arg); + LWIP_ASSERT1(p != NULL); + + copied = pbuf_copy_partial(p, msg_header, sizeof(msg_header), 0); + if (copied != sizeof(msg_header)) { + DPRINTF(("%s: message header truncated\n", __func__)); + pbuf_free(p); + return; + } + pbuf_header(p, -(s16_t)sizeof(msg_header)); + + msg_type = msg_header[0]; + msg_tid = (msg_header[1] << 16) | (msg_header[2] << 8) | msg_header[3]; + DPRINTF(("%s: type %u, tid 0x%6x\n", __func__, msg_type, msg_tid)); + if (msg_type != DHCP6_INFORMATION_REQUEST) { /** @todo ? RELAY_FORW */ + pbuf_free(p); + return; + } + + roff = 0; + + msg_header[0] = DHCP6_REPLY; + memcpy(dhcp6ds_reply_buf + roff, msg_header, sizeof(msg_header)); + roff += sizeof(msg_header); + + + /* loop over options */ + while (p->tot_len > 0) { + u16_t opt, optlen; + + /* fetch option code */ + copied = pbuf_copy_partial(p, &opt, sizeof(opt), 0); + if (copied != sizeof(opt)) { + DPRINTF(("%s: option header truncated\n", __func__)); + pbuf_free(p); + return; + } + pbuf_header(p, -(s16_t)sizeof(opt)); + opt = ntohs(opt); + + /* fetch option length */ + copied = pbuf_copy_partial(p, &optlen, sizeof(optlen), 0); + if (copied != sizeof(optlen)) { + DPRINTF(("%s: option %u length truncated\n", __func__, opt)); + pbuf_free(p); + return; + } + pbuf_header(p, -(s16_t)sizeof(optlen)); + optlen = ntohs(optlen); + + /* enough data? */ + if (optlen > p->tot_len) { + DPRINTF(("%s: option %u truncated: expect %u, got %u\n", + __func__, opt, optlen, p->tot_len)); + pbuf_free(p); + return; + } + + DPRINTF2(("%s: option %u length %u\n", __func__, opt, optlen)); + + if (opt == DHCP6_OPTION_CLIENTID) { + u16_t s; + + /* "A DUID can be no more than 128 octets long (not + including the type code)." */ + if (optlen > 130) { + DPRINTF(("%s: client DUID too long: %u\n", __func__, optlen)); + pbuf_free(p); + return; + } + + s = PP_HTONS(DHCP6_OPTION_CLIENTID); + memcpy(dhcp6ds_reply_buf + roff, &s, sizeof(s)); + roff += sizeof(s); + + s = ntohs(optlen); + memcpy(dhcp6ds_reply_buf + roff, &s, sizeof(s)); + roff += sizeof(s); + + pbuf_copy_partial(p, dhcp6ds_reply_buf + roff, optlen, 0); + roff += optlen; + } + else if (opt == DHCP6_OPTION_ORO) { + u16_t *opts; + int i, nopts; + + if (optlen % 2 != 0) { + DPRINTF2(("%s: Option Request of odd length\n", __func__)); + goto bad_oro; + } + nopts = optlen / 2; + + opts = (u16_t *)malloc(optlen); + if (opts == NULL) { + DPRINTF2(("%s: failed to allocate space for Option Request\n", + __func__)); + goto bad_oro; + } + + pbuf_copy_partial(p, opts, optlen, 0); + for (i = 0; i < nopts; ++i) { + opt = ntohs(opts[i]); + DPRINTF2(("> request option %u\n", opt)); + }; + free(opts); + + bad_oro: /* empty */; + } + + pbuf_header(p, -optlen); /* go to next option */ + } + pbuf_free(p); /* done */ + + + memcpy(dhcp6ds_reply_buf + roff, dhcp6ds_serverid, sizeof(dhcp6ds_serverid)); + roff += sizeof(dhcp6ds_serverid); + + memcpy(dhcp6ds_reply_buf + roff, dhcp6ds_dns, sizeof(dhcp6ds_dns)); + roff += sizeof(dhcp6ds_dns); + + Assert(roff == (u16_t)roff); + q = pbuf_alloc(PBUF_RAW, (u16_t)roff, PBUF_RAM); + if (q == NULL) { + DPRINTF(("%s: pbuf_alloc(%d) failed\n", __func__, (int)roff)); + return; + } + + error = pbuf_take(q, dhcp6ds_reply_buf, (u16_t)roff); + if (error != ERR_OK) { + DPRINTF(("%s: pbuf_take(%d) failed: %s\n", + __func__, (int)roff, proxy_lwip_strerr(error))); + pbuf_free(q); + return; + } + + error = udp_sendto_ip6(pcb, q, addr, port); + if (error != ERR_OK) { + DPRINTF(("%s: udp_sendto failed: %s\n", + __func__, proxy_lwip_strerr(error))); + } + + pbuf_free(q); +} diff --git a/src/VBox/NetworkServices/NAT/proxy_pollmgr.c b/src/VBox/NetworkServices/NAT/proxy_pollmgr.c new file mode 100644 index 00000000..a62c1949 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/proxy_pollmgr.c @@ -0,0 +1,838 @@ +/* $Id: proxy_pollmgr.c $ */ +/** @file + * NAT Network - poll manager. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" + +#include "proxy_pollmgr.h" +#include "proxy.h" + +#ifndef RT_OS_WINDOWS +#include <sys/socket.h> +#include <netinet/in.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#else +#include <iprt/errcore.h> +#include <stdlib.h> +#include <string.h> +#include "winpoll.h" +#endif + +#include <iprt/req.h> +#include <iprt/errcore.h> + + +#define POLLMGR_GARBAGE (-1) + + +enum { + POLLMGR_QUEUE = 0, + + POLLMGR_SLOT_STATIC_COUNT, + POLLMGR_SLOT_FIRST_DYNAMIC = POLLMGR_SLOT_STATIC_COUNT +}; + + +struct pollmgr_chan { + struct pollmgr_handler *handler; + void *arg; + bool arg_valid; +}; + +struct pollmgr { + struct pollfd *fds; + struct pollmgr_handler **handlers; + nfds_t capacity; /* allocated size of the arrays */ + nfds_t nfds; /* part of the arrays in use */ + + /* channels (socketpair) for static slots */ + SOCKET chan[POLLMGR_SLOT_STATIC_COUNT][2]; +#define POLLMGR_CHFD_RD 0 /* - pollmgr side */ +#define POLLMGR_CHFD_WR 1 /* - client side */ + + + /* emulate channels with request queue */ + RTREQQUEUE queue; + struct pollmgr_handler queue_handler; + struct pollmgr_chan chan_handlers[POLLMGR_CHAN_COUNT]; +} pollmgr; + + +static int pollmgr_queue_callback(struct pollmgr_handler *, SOCKET, int); +static void pollmgr_chan_call_handler(int, void *); + +static void pollmgr_loop(void); + +static void pollmgr_add_at(int, struct pollmgr_handler *, SOCKET, int); +static void pollmgr_refptr_delete(struct pollmgr_refptr *); + + +/* + * We cannot portably peek at the length of the incoming datagram and + * pre-allocate pbuf chain to recvmsg() directly to it. On Linux it's + * possible to recv with MSG_PEEK|MSG_TRUC, but extra syscall is + * probably more expensive (haven't measured) than doing an extra copy + * of data, since typical UDP datagrams are small enough to avoid + * fragmentation. + * + * We can use shared buffer here since we read from sockets + * sequentially in a loop over pollfd. + */ +u8_t pollmgr_udpbuf[64 * 1024]; + + +int +pollmgr_init(void) +{ + struct pollfd *newfds; + struct pollmgr_handler **newhdls; + nfds_t newcap; + int rc, status; + nfds_t i; + + rc = RTReqQueueCreate(&pollmgr.queue); + if (RT_FAILURE(rc)) + return -1; + + pollmgr.fds = NULL; + pollmgr.handlers = NULL; + pollmgr.capacity = 0; + pollmgr.nfds = 0; + + for (i = 0; i < POLLMGR_SLOT_STATIC_COUNT; ++i) { + pollmgr.chan[i][POLLMGR_CHFD_RD] = INVALID_SOCKET; + pollmgr.chan[i][POLLMGR_CHFD_WR] = INVALID_SOCKET; + } + + for (i = 0; i < POLLMGR_SLOT_STATIC_COUNT; ++i) { +#ifndef RT_OS_WINDOWS + int j; + + status = socketpair(PF_LOCAL, SOCK_DGRAM, 0, pollmgr.chan[i]); + if (status < 0) { + DPRINTF(("socketpair: %R[sockerr]\n", SOCKERRNO())); + goto cleanup_close; + } + + /* now manually make them O_NONBLOCK */ + for (j = 0; j < 2; ++j) { + int s = pollmgr.chan[i][j]; + int sflags; + + sflags = fcntl(s, F_GETFL, 0); + if (sflags < 0) { + DPRINTF0(("F_GETFL: %R[sockerr]\n", errno)); + goto cleanup_close; + } + + status = fcntl(s, F_SETFL, sflags | O_NONBLOCK); + if (status < 0) { + DPRINTF0(("O_NONBLOCK: %R[sockerr]\n", errno)); + goto cleanup_close; + } + } +#else + status = RTWinSocketPair(PF_INET, SOCK_DGRAM, 0, pollmgr.chan[i]); + if (RT_FAILURE(status)) { + goto cleanup_close; + } +#endif + } + + + newcap = 16; /* XXX: magic */ + LWIP_ASSERT1(newcap >= POLLMGR_SLOT_STATIC_COUNT); + + newfds = (struct pollfd *) + malloc(newcap * sizeof(*pollmgr.fds)); + if (newfds == NULL) { + DPRINTF(("%s: Failed to allocate fds array\n", __func__)); + goto cleanup_close; + } + + newhdls = (struct pollmgr_handler **) + malloc(newcap * sizeof(*pollmgr.handlers)); + if (newhdls == NULL) { + DPRINTF(("%s: Failed to allocate handlers array\n", __func__)); + free(newfds); + goto cleanup_close; + } + + pollmgr.capacity = newcap; + pollmgr.fds = newfds; + pollmgr.handlers = newhdls; + + pollmgr.nfds = POLLMGR_SLOT_STATIC_COUNT; + + for (i = 0; i < pollmgr.capacity; ++i) { + pollmgr.fds[i].fd = INVALID_SOCKET; + pollmgr.fds[i].events = 0; + pollmgr.fds[i].revents = 0; + } + + /* add request queue notification */ + pollmgr.queue_handler.callback = pollmgr_queue_callback; + pollmgr.queue_handler.data = NULL; + pollmgr.queue_handler.slot = -1; + + pollmgr_add_at(POLLMGR_QUEUE, &pollmgr.queue_handler, + pollmgr.chan[POLLMGR_QUEUE][POLLMGR_CHFD_RD], + POLLIN); + + return 0; + + cleanup_close: + for (i = 0; i < POLLMGR_SLOT_STATIC_COUNT; ++i) { + SOCKET *chan = pollmgr.chan[i]; + if (chan[POLLMGR_CHFD_RD] != INVALID_SOCKET) { + closesocket(chan[POLLMGR_CHFD_RD]); + closesocket(chan[POLLMGR_CHFD_WR]); + } + } + + return -1; +} + + +/* + * Add new channel. We now implement channels with request queue, so + * all channels get the same socket that triggers queue processing. + * + * Must be called before pollmgr loop is started, so no locking. + */ +SOCKET +pollmgr_add_chan(int slot, struct pollmgr_handler *handler) +{ + AssertReturn(0 <= slot && slot < POLLMGR_CHAN_COUNT, INVALID_SOCKET); + AssertReturn(handler != NULL && handler->callback != NULL, INVALID_SOCKET); + + handler->slot = slot; + pollmgr.chan_handlers[slot].handler = handler; + return pollmgr.chan[POLLMGR_QUEUE][POLLMGR_CHFD_WR]; +} + + +/* + * This used to actually send data over the channel's socket. Now we + * queue a request and send single byte notification over shared + * POLLMGR_QUEUE socket. + */ +ssize_t +pollmgr_chan_send(int slot, void *buf, size_t nbytes) +{ + static const char notification = 0x5a; + + void *ptr; + SOCKET fd; + ssize_t nsent; + int rc; + + AssertReturn(0 <= slot && slot < POLLMGR_CHAN_COUNT, -1); + + /* + * XXX: Hack alert. We only ever "sent" single pointer which was + * simultaneously both the wakeup event for the poll and the + * argument for the channel handler that it read from the channel. + * So now we pass this pointer to the request and arrange for the + * handler to "read" it when it asks for it. + */ + if (nbytes != sizeof(void *)) { + return -1; + } + + ptr = *(void **)buf; + + rc = RTReqQueueCallEx(pollmgr.queue, NULL, 0, + RTREQFLAGS_VOID | RTREQFLAGS_NO_WAIT, + (PFNRT)pollmgr_chan_call_handler, 2, + slot, ptr); + + fd = pollmgr.chan[POLLMGR_QUEUE][POLLMGR_CHFD_WR]; + nsent = send(fd, ¬ification, 1, 0); + if (nsent == SOCKET_ERROR) { + DPRINTF(("send on chan %d: %R[sockerr]\n", slot, SOCKERRNO())); + return -1; + } + else if ((size_t)nsent != 1) { + DPRINTF(("send on chan %d: datagram truncated to %u bytes", + slot, (unsigned int)nsent)); + return -1; + } + + /* caller thinks it's sending the pointer */ + return sizeof(void *); +} + + +/* + * pollmgr_chan_send() sent us a notification, process the queue. + */ +static int +pollmgr_queue_callback(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + ssize_t nread; + int sockerr; + int rc; + + RT_NOREF(handler, revents); + Assert(pollmgr.queue != NIL_RTREQQUEUE); + + nread = recv(fd, (char *)pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0); + sockerr = SOCKERRNO(); /* save now, may be clobbered */ + + if (nread == SOCKET_ERROR) { + DPRINTF0(("%s: recv: %R[sockerr]\n", __func__, sockerr)); + return POLLIN; + } + + DPRINTF2(("%s: read %zd\n", __func__, nread)); + if (nread == 0) { + return POLLIN; + } + + rc = RTReqQueueProcess(pollmgr.queue, 0); + if (RT_UNLIKELY(rc != VERR_TIMEOUT && RT_FAILURE_NP(rc))) { + DPRINTF0(("%s: RTReqQueueProcess: %Rrc\n", __func__, rc)); + } + + return POLLIN; +} + + +/* + * Queued requests use this function to emulate the call to the + * handler's callback. + */ +static void +pollmgr_chan_call_handler(int slot, void *arg) +{ + struct pollmgr_handler *handler; + int nevents; + + AssertReturnVoid(0 <= slot && slot < POLLMGR_CHAN_COUNT); + + handler = pollmgr.chan_handlers[slot].handler; + AssertReturnVoid(handler != NULL && handler->callback != NULL); + + /* arrange for pollmgr_chan_recv_ptr() to "receive" the arg */ + pollmgr.chan_handlers[slot].arg = arg; + pollmgr.chan_handlers[slot].arg_valid = true; + + nevents = handler->callback(handler, INVALID_SOCKET, POLLIN); + if (nevents != POLLIN) { + DPRINTF2(("%s: nevents=0x%x!\n", __func__, nevents)); + } +} + + +/* + * "Receive" a pointer "sent" over poll manager channel. + */ +void * +pollmgr_chan_recv_ptr(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + int slot; + void *ptr; + + RT_NOREF(fd); + + slot = handler->slot; + Assert(0 <= slot && slot < POLLMGR_CHAN_COUNT); + + if (revents & POLLNVAL) { + errx(EXIT_FAILURE, "chan %d: fd invalid", (int)handler->slot); + /* NOTREACHED */ + } + + if (revents & (POLLERR | POLLHUP)) { + errx(EXIT_FAILURE, "chan %d: fd error", (int)handler->slot); + /* NOTREACHED */ + } + + LWIP_ASSERT1(revents & POLLIN); + + if (!pollmgr.chan_handlers[slot].arg_valid) { + err(EXIT_FAILURE, "chan %d: recv", (int)handler->slot); + /* NOTREACHED */ + } + + ptr = pollmgr.chan_handlers[slot].arg; + pollmgr.chan_handlers[slot].arg_valid = false; + + return ptr; +} + + +/* + * Must be called from pollmgr loop (via callbacks), so no locking. + */ +int +pollmgr_add(struct pollmgr_handler *handler, SOCKET fd, int events) +{ + int slot; + + DPRINTF2(("%s: new fd %d\n", __func__, fd)); + + if (pollmgr.nfds == pollmgr.capacity) { + struct pollfd *newfds; + struct pollmgr_handler **newhdls; + nfds_t newcap; + nfds_t i; + + newcap = pollmgr.capacity * 2; + + newfds = (struct pollfd *) + realloc(pollmgr.fds, newcap * sizeof(*pollmgr.fds)); + if (newfds == NULL) { + DPRINTF(("%s: Failed to reallocate fds array\n", __func__)); + handler->slot = -1; + return -1; + } + + pollmgr.fds = newfds; /* don't crash/leak if realloc(handlers) fails */ + /* but don't update capacity yet! */ + + newhdls = (struct pollmgr_handler **) + realloc(pollmgr.handlers, newcap * sizeof(*pollmgr.handlers)); + if (newhdls == NULL) { + DPRINTF(("%s: Failed to reallocate handlers array\n", __func__)); + /* if we failed to realloc here, then fds points to the + * new array, but we pretend we still has old capacity */ + handler->slot = -1; + return -1; + } + + pollmgr.handlers = newhdls; + pollmgr.capacity = newcap; + + for (i = pollmgr.nfds; i < newcap; ++i) { + newfds[i].fd = INVALID_SOCKET; + newfds[i].events = 0; + newfds[i].revents = 0; + newhdls[i] = NULL; + } + } + + slot = pollmgr.nfds; + ++pollmgr.nfds; + + pollmgr_add_at(slot, handler, fd, events); + return slot; +} + + +static void +pollmgr_add_at(int slot, struct pollmgr_handler *handler, SOCKET fd, int events) +{ + pollmgr.fds[slot].fd = fd; + pollmgr.fds[slot].events = events; + pollmgr.fds[slot].revents = 0; + pollmgr.handlers[slot] = handler; + + handler->slot = slot; +} + + +void +pollmgr_update_events(int slot, int events) +{ + LWIP_ASSERT1(slot >= POLLMGR_SLOT_FIRST_DYNAMIC); + LWIP_ASSERT1((nfds_t)slot < pollmgr.nfds); + + pollmgr.fds[slot].events = events; +} + + +void +pollmgr_del_slot(int slot) +{ + LWIP_ASSERT1(slot >= POLLMGR_SLOT_FIRST_DYNAMIC); + + DPRINTF2(("%s(%d): fd %d ! DELETED\n", + __func__, slot, pollmgr.fds[slot].fd)); + + pollmgr.fds[slot].fd = INVALID_SOCKET; /* see poll loop */ +} + + +void +pollmgr_thread(void *ignored) +{ + LWIP_UNUSED_ARG(ignored); + pollmgr_loop(); +} + + +static void +pollmgr_loop(void) +{ + int nready; + SOCKET delfirst; + SOCKET *pdelprev; + int i; + + for (;;) { +#ifndef RT_OS_WINDOWS + nready = poll(pollmgr.fds, pollmgr.nfds, -1); +#else + int rc = RTWinPoll(pollmgr.fds, pollmgr.nfds,RT_INDEFINITE_WAIT, &nready); + if (RT_FAILURE(rc)) { + err(EXIT_FAILURE, "poll"); /* XXX: what to do on error? */ + /* NOTREACHED*/ + } +#endif + + DPRINTF2(("%s: ready %d fd%s\n", + __func__, nready, (nready == 1 ? "" : "s"))); + + if (nready < 0) { + if (errno == EINTR) { + continue; + } + + err(EXIT_FAILURE, "poll"); /* XXX: what to do on error? */ + /* NOTREACHED*/ + } + else if (nready == 0) { /* cannot happen, we wait forever (-1) */ + continue; /* - but be defensive */ + } + + + delfirst = INVALID_SOCKET; + pdelprev = &delfirst; + + for (i = 0; (nfds_t)i < pollmgr.nfds && nready > 0; ++i) { + struct pollmgr_handler *handler; + SOCKET fd; + int revents, nevents; + + fd = pollmgr.fds[i].fd; + revents = pollmgr.fds[i].revents; + + /* + * Channel handlers can request deletion of dynamic slots + * by calling pollmgr_del_slot() that clobbers slot's fd. + */ + if (fd == INVALID_SOCKET && i >= POLLMGR_SLOT_FIRST_DYNAMIC) { + /* adjust count if events were pending for that slot */ + if (revents != 0) { + --nready; + } + + /* pretend that slot handler requested deletion */ + nevents = -1; + goto update_events; + } + + if (revents == 0) { + continue; /* next fd */ + } + --nready; + + handler = pollmgr.handlers[i]; + + if (handler != NULL && handler->callback != NULL) { +#ifdef LWIP_PROXY_DEBUG +# if LWIP_PROXY_DEBUG /* DEBUG */ + if (i < POLLMGR_SLOT_FIRST_DYNAMIC) { + if (revents == POLLIN) { + DPRINTF2(("%s: ch %d\n", __func__, i)); + } + else { + DPRINTF2(("%s: ch %d @ revents 0x%x!\n", + __func__, i, revents)); + } + } + else { + DPRINTF2(("%s: fd %d @ revents 0x%x\n", + __func__, fd, revents)); + } +# endif /* LWIP_PROXY_DEBUG / DEBUG */ +#endif + nevents = (*handler->callback)(handler, fd, revents); + } + else { + DPRINTF0(("%s: invalid handler for fd %d: ", __func__, fd)); + if (handler == NULL) { + DPRINTF0(("NULL\n")); + } + else { + DPRINTF0(("%p (callback = NULL)\n", (void *)handler)); + } + nevents = -1; /* delete it */ + } + + update_events: + if (nevents >= 0) { + if (nevents != pollmgr.fds[i].events) { + DPRINTF2(("%s: fd %d ! nevents 0x%x\n", + __func__, fd, nevents)); + } + pollmgr.fds[i].events = nevents; + } + else if (i < POLLMGR_SLOT_FIRST_DYNAMIC) { + /* Don't garbage-collect channels. */ + DPRINTF2(("%s: fd %d ! DELETED (channel %d)\n", + __func__, fd, i)); + pollmgr.fds[i].fd = INVALID_SOCKET; + pollmgr.fds[i].events = 0; + pollmgr.fds[i].revents = 0; + pollmgr.handlers[i] = NULL; + } + else { + DPRINTF2(("%s: fd %d ! DELETED\n", __func__, fd)); + + /* schedule for deletion (see g/c loop for details) */ + *pdelprev = i; /* make previous entry point to us */ + pdelprev = &pollmgr.fds[i].fd; + + pollmgr.fds[i].fd = INVALID_SOCKET; /* end of list (for now) */ + pollmgr.fds[i].events = POLLMGR_GARBAGE; + pollmgr.fds[i].revents = 0; + pollmgr.handlers[i] = NULL; + } + } /* processing loop */ + + + /* + * Garbage collect and compact the array. + * + * We overload pollfd::fd of garbage entries to store the + * index of the next garbage entry. The garbage list is + * co-directional with the fds array. The index of the first + * entry is in "delfirst", the last entry "points to" + * INVALID_SOCKET. + * + * See update_events code for nevents < 0 at the end of the + * processing loop above. + */ + while (delfirst != INVALID_SOCKET) { + const int last = pollmgr.nfds - 1; + + /* + * We want a live entry in the last slot to swap into the + * freed slot, so make sure we have one. + */ + if (pollmgr.fds[last].events == POLLMGR_GARBAGE /* garbage */ + || pollmgr.fds[last].fd == INVALID_SOCKET) /* or killed */ + { + /* drop garbage entry at the end of the array */ + --pollmgr.nfds; + + if (delfirst == (SOCKET)last) { + /* congruent to delnext >= pollmgr.nfds test below */ + delfirst = INVALID_SOCKET; /* done */ + } + } + else { + const SOCKET delnext = pollmgr.fds[delfirst].fd; + + /* copy live entry at the end to the first slot being freed */ + pollmgr.fds[delfirst] = pollmgr.fds[last]; /* struct copy */ + pollmgr.handlers[delfirst] = pollmgr.handlers[last]; + pollmgr.handlers[delfirst]->slot = (int)delfirst; + --pollmgr.nfds; + + if ((nfds_t)delnext >= pollmgr.nfds) { + delfirst = INVALID_SOCKET; /* done */ + } + else { + delfirst = delnext; + } + } + + pollmgr.fds[last].fd = INVALID_SOCKET; + pollmgr.fds[last].events = 0; + pollmgr.fds[last].revents = 0; + pollmgr.handlers[last] = NULL; + } + } /* poll loop */ +} + + +/** + * Create strongly held refptr. + */ +struct pollmgr_refptr * +pollmgr_refptr_create(struct pollmgr_handler *ptr) +{ + struct pollmgr_refptr *rp; + + LWIP_ASSERT1(ptr != NULL); + + rp = (struct pollmgr_refptr *)malloc(sizeof (*rp)); + if (rp == NULL) { + return NULL; + } + + sys_mutex_new(&rp->lock); + rp->ptr = ptr; + rp->strong = 1; + rp->weak = 0; + + return rp; +} + + +static void +pollmgr_refptr_delete(struct pollmgr_refptr *rp) +{ + if (rp == NULL) { + return; + } + + LWIP_ASSERT1(rp->strong == 0); + LWIP_ASSERT1(rp->weak == 0); + + sys_mutex_free(&rp->lock); + free(rp); +} + + +/** + * Add weak reference before "rp" is sent over a poll manager channel. + */ +void +pollmgr_refptr_weak_ref(struct pollmgr_refptr *rp) +{ + sys_mutex_lock(&rp->lock); + + LWIP_ASSERT1(rp->ptr != NULL); + LWIP_ASSERT1(rp->strong > 0); + + ++rp->weak; + + sys_mutex_unlock(&rp->lock); +} + + +/** + * Try to get the pointer from implicitely weak reference we've got + * from a channel. + * + * If we detect that the object is still strongly referenced, but no + * longer registered with the poll manager we abort strengthening + * conversion here b/c lwip thread callback is already scheduled to + * destruct the object. + */ +struct pollmgr_handler * +pollmgr_refptr_get(struct pollmgr_refptr *rp) +{ + struct pollmgr_handler *handler; + size_t weak; + + sys_mutex_lock(&rp->lock); + + LWIP_ASSERT1(rp->weak > 0); + weak = --rp->weak; + + handler = rp->ptr; + if (handler == NULL) { + LWIP_ASSERT1(rp->strong == 0); + sys_mutex_unlock(&rp->lock); + if (weak == 0) { + pollmgr_refptr_delete(rp); + } + return NULL; + } + + LWIP_ASSERT1(rp->strong == 1); + + /* + * Here we woild do: + * + * ++rp->strong; + * + * and then, after channel handler is done, we would decrement it + * back. + * + * Instead we check that the object is still registered with poll + * manager. If it is, there's no race with lwip thread trying to + * drop its strong reference, as lwip thread callback to destruct + * the object is always scheduled by its poll manager callback. + * + * Conversly, if we detect that the object is no longer registered + * with poll manager, we immediately abort. Since channel handler + * can't do anything useful anyway and would have to return + * immediately. + * + * Since channel handler would always find rp->strong as it had + * left it, just elide extra strong reference creation to avoid + * the whole back-and-forth. + */ + + if (handler->slot < 0) { /* no longer polling */ + sys_mutex_unlock(&rp->lock); + return NULL; + } + + sys_mutex_unlock(&rp->lock); + return handler; +} + + +/** + * Remove (the only) strong reference. + * + * If it were real strong/weak pointers, we should also call + * destructor for the referenced object, but + */ +void +pollmgr_refptr_unref(struct pollmgr_refptr *rp) +{ + sys_mutex_lock(&rp->lock); + + LWIP_ASSERT1(rp->strong == 1); + --rp->strong; + + if (rp->strong > 0) { + sys_mutex_unlock(&rp->lock); + } + else { + size_t weak; + + /* void *ptr = rp->ptr; */ + rp->ptr = NULL; + + /* delete ptr; // see doc comment */ + + weak = rp->weak; + sys_mutex_unlock(&rp->lock); + if (weak == 0) { + pollmgr_refptr_delete(rp); + } + } +} diff --git a/src/VBox/NetworkServices/NAT/proxy_pollmgr.h b/src/VBox/NetworkServices/NAT/proxy_pollmgr.h new file mode 100644 index 00000000..9ab6bf19 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/proxy_pollmgr.h @@ -0,0 +1,95 @@ +/* $Id: proxy_pollmgr.h $ */ +/** @file + * NAT Network - poll manager, definitions and declarations. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NAT_proxy_pollmgr_h +#define VBOX_INCLUDED_SRC_NAT_proxy_pollmgr_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#ifndef RT_OS_WINDOWS +# include <unistd.h> /* for ssize_t */ +#endif +#include "lwip/sys.h" + +enum pollmgr_slot_t { + POLLMGR_CHAN_PXTCP_ADD, /* new proxy tcp connection from guest */ + POLLMGR_CHAN_PXTCP_POLLIN, /* free space in ringbuf, may POLLIN */ + POLLMGR_CHAN_PXTCP_POLLOUT, /* schedule one-shot POLLOUT callback */ + POLLMGR_CHAN_PXTCP_DEL, /* delete pxtcp */ + POLLMGR_CHAN_PXTCP_RESET, /* send RST and delete pxtcp */ + + POLLMGR_CHAN_PXUDP_ADD, /* new proxy udp conversation from guest */ + POLLMGR_CHAN_PXUDP_DEL, /* delete pxudp from pollmgr */ + + POLLMGR_CHAN_PORTFWD, /* add/remove port forwarding rules */ + + POLLMGR_CHAN_COUNT +}; + + +struct pollmgr_handler; /* forward */ +typedef int (*pollmgr_callback)(struct pollmgr_handler *, SOCKET, int); + +struct pollmgr_handler { + pollmgr_callback callback; + void *data; + int slot; +}; + +struct pollmgr_refptr { + struct pollmgr_handler *ptr; + sys_mutex_t lock; + size_t strong; + size_t weak; +}; + +int pollmgr_init(void); + +/* static named slots (aka "channels") */ +SOCKET pollmgr_add_chan(int, struct pollmgr_handler *); +ssize_t pollmgr_chan_send(int, void *buf, size_t nbytes); +void *pollmgr_chan_recv_ptr(struct pollmgr_handler *, SOCKET, int); + +/* dynamic slots */ +int pollmgr_add(struct pollmgr_handler *, SOCKET, int); + +/* special-purpose strong/weak references */ +struct pollmgr_refptr *pollmgr_refptr_create(struct pollmgr_handler *); +void pollmgr_refptr_weak_ref(struct pollmgr_refptr *); +struct pollmgr_handler *pollmgr_refptr_get(struct pollmgr_refptr *); +void pollmgr_refptr_unref(struct pollmgr_refptr *); + +void pollmgr_update_events(int, int); +void pollmgr_del_slot(int); + +void pollmgr_thread(void *); + +/* buffer for callbacks to receive udp without worrying about truncation */ +extern u8_t pollmgr_udpbuf[64 * 1024]; + +#endif /* !VBOX_INCLUDED_SRC_NAT_proxy_pollmgr_h */ diff --git a/src/VBox/NetworkServices/NAT/proxy_rtadvd.c b/src/VBox/NetworkServices/NAT/proxy_rtadvd.c new file mode 100644 index 00000000..8ba2d3fc --- /dev/null +++ b/src/VBox/NetworkServices/NAT/proxy_rtadvd.c @@ -0,0 +1,427 @@ +/* $Id: proxy_rtadvd.c $ */ +/** @file + * NAT Network - IPv6 router advertisement daemon. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" + +#include "proxy.h" + +#include "lwip/opt.h" +#include "lwip/sys.h" +#include "lwip/stats.h" +#include "lwip/timers.h" + +#include "lwip/inet_chksum.h" +#include "lwip/icmp6.h" +#include "lwip/nd6.h" + +#include "lwip/raw.h" + +#include <string.h> + + +static void proxy_rtadvd_timer(void *); +static void proxy_rtadvd_send_multicast(struct netif *); +static void proxy_rtadvd_fill_payload(struct netif *, int); + +static u8_t rtadvd_recv(void *, struct raw_pcb *, struct pbuf *, ip6_addr_t *); + + +/* ff02::1 - link-local all nodes multicast address */ +static ip6_addr_t allnodes_linklocal = { + { PP_HTONL(0xff020000UL), 0, 0, PP_HTONL(0x00000001UL) } +}; + + +/* + * Unsolicited Router Advertisement payload. + * + * NB: Since ICMP checksum covers pseudo-header with destination + * address (link-local allnodes multicast in this case) this payload + * cannot be used for solicited replies to unicast addresses. + */ +static unsigned int unsolicited_ra_payload_length; +static u8_t unsolicited_ra_payload[ + sizeof(struct ra_header) + /* reserves enough space for NETIF_MAX_HWADDR_LEN */ + + sizeof(struct lladdr_option) + /* we only announce one prefix */ + + sizeof(struct prefix_option) * 1 +]; + + +static int ndefaults = 0; + +static struct raw_pcb *rtadvd_pcb; + + +void +proxy_rtadvd_start(struct netif *proxy_netif) +{ +#if 0 /* XXX */ + ndefaults = rtmon_get_defaults(); +#else + ndefaults = g_proxy_options->ipv6_defroute; +#endif + if (ndefaults < 0) { + DPRINTF0(("rtadvd: failed to read IPv6 routing table, aborting\n")); + return; + } + + proxy_rtadvd_fill_payload(proxy_netif, ndefaults > 0); + + rtadvd_pcb = raw_new_ip6(IP6_NEXTH_ICMP6); + if (rtadvd_pcb == NULL) { + DPRINTF0(("rtadvd: failed to allocate pcb, aborting\n")); + return; + } + + /* + * We cannot use raw_bind_ip6() since raw_input() doesn't grok + * multicasts. We are going to use ip6_output_if() directly. + */ + raw_recv_ip6(rtadvd_pcb, rtadvd_recv, proxy_netif); + + sys_timeout(3 * 1000, proxy_rtadvd_timer, proxy_netif); +} + + +static int quick_ras = 2; + + +/** + * lwIP thread callback invoked when we start/stop advertising default + * route. + */ +void +proxy_rtadvd_do_quick(void *arg) +{ + struct netif *proxy_netif = (struct netif *)arg; + + quick_ras = 2; + sys_untimeout(proxy_rtadvd_timer, proxy_netif); + proxy_rtadvd_timer(proxy_netif); /* sends and re-arms */ +} + + +static void +proxy_rtadvd_timer(void *arg) +{ + struct netif *proxy_netif = (struct netif *)arg; + int newdefs; + u32_t delay; + +#if 0 /* XXX */ + newdefs = rtmon_get_defaults(); +#else + newdefs = g_proxy_options->ipv6_defroute; +#endif + if (newdefs != ndefaults && newdefs != -1) { + ndefaults = newdefs; + proxy_rtadvd_fill_payload(proxy_netif, ndefaults > 0); + } + + proxy_rtadvd_send_multicast(proxy_netif); + + if (quick_ras > 0) { + --quick_ras; + delay = 16 * 1000; + } + else { + delay = 600 * 1000; + } + + sys_timeout(delay, proxy_rtadvd_timer, proxy_netif); +} + + +/* + * This should be folded into icmp6/nd6 input, but I don't want to + * solve this in general, making it configurable, etc. + * + * Cf. RFC 4861: + * 6.1.1. Validation of Router Solicitation Messages + */ +static u8_t +rtadvd_recv(void *arg, struct raw_pcb *pcb, struct pbuf *p, ip6_addr_t *addr) +{ + enum raw_recv_status { RAW_RECV_CONTINUE = 0, RAW_RECV_CONSUMED = 1 }; + + struct netif *proxy_netif = (struct netif *)arg; + struct ip6_hdr *ip6_hdr; + struct icmp6_hdr *icmp6_hdr; + struct lladdr_option *lladdr_opt; + void *option; + u8_t opttype, optlen8; + + LWIP_UNUSED_ARG(pcb); + LWIP_UNUSED_ARG(addr); + + /* save a pointer to IP6 header and skip to ICMP6 payload */ + ip6_hdr = (struct ip6_hdr *)p->payload; + pbuf_header(p, -ip_current_header_tot_len()); + + if (p->len < sizeof(struct icmp6_hdr)) { + ICMP6_STATS_INC(icmp6.lenerr); + goto drop; + } + + if (ip6_chksum_pseudo(p, IP6_NEXTH_ICMP6, p->tot_len, + ip6_current_src_addr(), + ip6_current_dest_addr()) != 0) + { + ICMP6_STATS_INC(icmp6.chkerr); + goto drop; + } + + icmp6_hdr = (struct icmp6_hdr *)p->payload; + if (icmp6_hdr->type != ICMP6_TYPE_RS) { + pbuf_header(p, ip_current_header_tot_len()); /* restore payload ptr */ + return RAW_RECV_CONTINUE; /* not interested */ + } + + /* only now that we know it's ICMP6_TYPE_RS we can check IP6 hop limit */ + if (IP6H_HOPLIM(ip6_hdr) != 255) { + ICMP6_STATS_INC(icmp6.proterr); + goto drop; + } + + /* future, backward-incompatible changes may use different Code values. */ + if (icmp6_hdr->code != 0) { + ICMP6_STATS_INC(icmp6.proterr); + goto drop; + } + + /* skip past rs_header, nothing interesting in it */ + if (p->len < sizeof(struct rs_header)) { + ICMP6_STATS_INC(icmp6.lenerr); + goto drop; + } + pbuf_header(p, -(s16_t)sizeof(struct rs_header)); + + lladdr_opt = NULL; + while (p->len > 0) { + int optlen; + + if (p->len < 8) { + ICMP6_STATS_INC(icmp6.lenerr); + goto drop; + } + + option = p->payload; + opttype = ((u8_t *)option)[0]; + optlen8 = ((u8_t *)option)[1]; /* in units of 8 octets */ + + if (optlen8 == 0) { + ICMP6_STATS_INC(icmp6.proterr); + goto drop; + } + + optlen = (unsigned int)optlen8 << 3; + if (p->len < optlen) { + ICMP6_STATS_INC(icmp6.lenerr); + goto drop; + } + + if (opttype == ND6_OPTION_TYPE_SOURCE_LLADDR) { + if (lladdr_opt != NULL) { /* duplicate */ + ICMP6_STATS_INC(icmp6.proterr); + goto drop; + } + lladdr_opt = (struct lladdr_option *)option; + } + + pbuf_header(p, -optlen); + } + + if (ip6_addr_isany(ip6_current_src_addr())) { + if (lladdr_opt != NULL) { + ICMP6_STATS_INC(icmp6.proterr); + goto drop; + } + + /* reply with multicast RA */ + } + else { + /* + * XXX: Router is supposed to update its Neighbor Cache (6.2.6), + * but it's hidden inside nd6.c. + */ + + /* may reply with either unicast or multicast RA */ + } + /* we just always reply with multicast RA */ + + pbuf_free(p); /* NB: this invalidates lladdr_option */ + + sys_untimeout(proxy_rtadvd_timer, proxy_netif); + proxy_rtadvd_timer(proxy_netif); /* sends and re-arms */ + + return RAW_RECV_CONSUMED; + + drop: + pbuf_free(p); + ICMP6_STATS_INC(icmp6.drop); + return RAW_RECV_CONSUMED; +} + + +static void +proxy_rtadvd_send_multicast(struct netif *proxy_netif) +{ + struct pbuf *ph, *pp; + err_t error; + + ph = pbuf_alloc(PBUF_IP, 0, PBUF_RAM); + if (ph == NULL) { + DPRINTF0(("%s: failed to allocate RA header pbuf\n", __func__)); + return; + } + + pp = pbuf_alloc(PBUF_RAW, unsolicited_ra_payload_length, PBUF_ROM); + if (pp == NULL) { + DPRINTF0(("%s: failed to allocate RA payload pbuf\n", __func__)); + pbuf_free(ph); + return; + } + pp->payload = unsolicited_ra_payload; + pbuf_chain(ph, pp); + + error = ip6_output_if(ph, + netif_ip6_addr(proxy_netif, 0), /* src: link-local */ + &allnodes_linklocal, /* dst */ + 255, /* hop limit */ + 0, /* traffic class */ + IP6_NEXTH_ICMP6, + proxy_netif); + if (error != ERR_OK) { + DPRINTF0(("%s: failed to send RA (err=%d)\n", __func__, error)); + } + + pbuf_free(pp); + pbuf_free(ph); +} + + +/* + * XXX: TODO: Only ra_header::router_lifetime (and hence + * ra_header::chksum) need to be changed, so we can precompute it once + * and then only update these two fields. + */ +static void +proxy_rtadvd_fill_payload(struct netif *proxy_netif, int is_default) +{ + struct pbuf *p; + struct ra_header *ra_hdr; + struct lladdr_option *lladdr_opt; + struct prefix_option *pfx_opt; + unsigned int lladdr_optlen; + + LWIP_ASSERT("netif hwaddr too long", + proxy_netif->hwaddr_len <= NETIF_MAX_HWADDR_LEN); + + /* type + length + ll addr + round up to 8 octets */ + lladdr_optlen = (2 + proxy_netif->hwaddr_len + 7) & ~0x7; + + /* actual payload length */ + unsolicited_ra_payload_length = + sizeof(struct ra_header) + + lladdr_optlen + + sizeof(struct prefix_option) * 1; + + /* Set fields. */ + ra_hdr = (struct ra_header *)unsolicited_ra_payload; + lladdr_opt = (struct lladdr_option *)((u8_t *)ra_hdr + sizeof(struct ra_header)); + pfx_opt = (struct prefix_option *)((u8_t *)lladdr_opt + lladdr_optlen); + + memset(unsolicited_ra_payload, 0, sizeof(unsolicited_ra_payload)); + + ra_hdr->type = ICMP6_TYPE_RA; + +#if 0 + /* + * "M" flag. Tell guests to use stateful DHCP6. Disabled here + * since we don't provide stateful server. + */ + ra_hdr->flags |= ND6_RA_FLAG_MANAGED_ADDR_CONFIG; +#endif + /* + * XXX: TODO: Disable "O" flag for now to match disabled stateless + * server. We don't yet get IPv6 nameserver addresses from + * HostDnsService, so we have nothing to say, don't tell guests to + * come asking. + */ +#if 0 + /* + * "O" flag. Tell guests to use DHCP6 for DNS and the like. This + * is served by simple stateless server (RFC 3736). + * + * XXX: "STATEFUL" in the flag name was probably a bug in RFC2461. + * It's present in the text, but not in the router configuration + * variable name. It's dropped in the text in RFC4861. + */ + ra_hdr->flags |= ND6_RA_FLAG_OTHER_STATEFUL_CONFIG; +#endif + + if (is_default) { + ra_hdr->router_lifetime = PP_HTONS(1200); /* seconds */ + } + else { + ra_hdr->router_lifetime = 0; + } + + lladdr_opt->type = ND6_OPTION_TYPE_SOURCE_LLADDR; + lladdr_opt->length = lladdr_optlen >> 3; /* in units of 8 octets */ + memcpy(lladdr_opt->addr, proxy_netif->hwaddr, proxy_netif->hwaddr_len); + + pfx_opt->type = ND6_OPTION_TYPE_PREFIX_INFO; + pfx_opt->length = 4; + pfx_opt->prefix_length = 64; + pfx_opt->flags = ND6_PREFIX_FLAG_ON_LINK + | ND6_PREFIX_FLAG_AUTONOMOUS; + pfx_opt->valid_lifetime = ~0U; /* infinite */ + pfx_opt->preferred_lifetime = ~0U; /* infinite */ + pfx_opt->prefix.addr[0] = netif_ip6_addr(proxy_netif, 1)->addr[0]; + pfx_opt->prefix.addr[1] = netif_ip6_addr(proxy_netif, 1)->addr[1]; + + + /* we need a temp pbuf to calculate the checksum */ + p = pbuf_alloc(PBUF_IP, unsolicited_ra_payload_length, PBUF_ROM); + if (p == NULL) { + DPRINTF0(("rtadvd: failed to allocate RA pbuf\n")); + return; + } + p->payload = unsolicited_ra_payload; + + ra_hdr->chksum = ip6_chksum_pseudo(p, IP6_NEXTH_ICMP6, p->len, + /* src addr: netif's link-local */ + netif_ip6_addr(proxy_netif, 0), + /* dst addr */ + &allnodes_linklocal); + pbuf_free(p); +} diff --git a/src/VBox/NetworkServices/NAT/proxy_tftpd.c b/src/VBox/NetworkServices/NAT/proxy_tftpd.c new file mode 100644 index 00000000..6baa2cae --- /dev/null +++ b/src/VBox/NetworkServices/NAT/proxy_tftpd.c @@ -0,0 +1,991 @@ +/* $Id: proxy_tftpd.c $ */ +/** @file + * NAT Network - TFTP server. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" + +#include "proxy.h" +#include "tftp.h" + +#ifndef RT_OS_WINDOWS +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#else +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <fcntl.h> +#include <io.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> + +# define O_RDONLY _O_RDONLY +# define S_ISREG(x) ((x) & _S_IFREG) +#endif + +#include "lwip/timers.h" +#include "lwip/udp.h" + +#include <iprt/string.h> + +struct xfer { + struct udp_pcb *pcb; + int fd; + unsigned int ack; + struct pbuf *pbuf; + + struct pbuf *oack; + + int rexmit; + + ipX_addr_t peer_ip; + u16_t peer_port; + + char *filename; + int octet; + + /* options */ + unsigned int blksize; + int blksize_from_opt; + + unsigned int timeout; + int timeout_from_opt; + + off_t tsize; + int tsize_from_opt; +}; + +struct tftpd { + struct udp_pcb *pcb; + char *root; + +#define TFTP_MAX_XFERS 3 + struct xfer xfers[TFTP_MAX_XFERS]; +}; + +struct tftp_option { + const char *name; + int (*getopt)(struct xfer *, const char *); + int (*ackopt)(struct xfer *, char **, size_t *); +}; + + +static void tftpd_recv(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t); + +static void tftpd_rrq(struct pbuf *, ip_addr_t *, u16_t); + +static void tftp_xfer_recv(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t); + +static void tftp_recv_ack(struct xfer *, u16_t); +static void tftp_fillbuf(struct xfer *); +static void tftp_send(struct xfer *); +static void tftp_timeout(void *); + +static struct xfer *tftp_xfer_alloc(ip_addr_t *, u16_t); +static int tftp_xfer_create_pcb(struct xfer *); +static void tftp_xfer_free(struct xfer *); + +static int tftp_parse_filename(struct xfer *, char **, size_t *); +static int tftp_parse_mode(struct xfer *, char **, size_t *); +static int tftp_parse_option(struct xfer *, char **, size_t *); + +static int tftp_opt_blksize(struct xfer *, const char *); +static int tftp_opt_timeout(struct xfer *, const char *); +static int tftp_opt_tsize(struct xfer *, const char *); + +static char *tftp_getstr(struct xfer *, const char *, char **, size_t *); + +static int tftp_ack_blksize(struct xfer *, char **, size_t *); +static int tftp_ack_timeout(struct xfer *, char **, size_t *); +static int tftp_ack_tsize(struct xfer *, char **, size_t *); + +static int tftp_add_oack(char **, size_t *, const char *, const char *, ...) __attribute__((format(printf, 4, 5))); + +static ssize_t tftp_strnlen(char *, size_t); + +static int tftp_internal_error(struct xfer *); +static int tftp_error(struct xfer *, u16_t, const char *, ...) __attribute__((format(printf, 3, 4))); +static void tftpd_error(ip_addr_t *, u16_t, u16_t, const char *, ...) __attribute__((format(printf, 4, 5))); +static struct pbuf *tftp_verror(u16_t, const char *, va_list); + + +/* const */ int report_transient_errors = 1; +static struct tftpd tftpd; + +static struct tftp_option tftp_options[] = { + { "blksize", tftp_opt_blksize, tftp_ack_blksize }, /* RFC 2348 */ + { "timeout", tftp_opt_timeout, tftp_ack_timeout }, /* RFC 2349 */ + { "tsize", tftp_opt_tsize, tftp_ack_tsize }, /* RFC 2349 */ + { NULL, NULL, NULL } +}; + + +err_t +tftpd_init(struct netif *proxy_netif, const char *tftproot) +{ + size_t len; + err_t error; + + tftpd.root = strdup(tftproot); + if (tftpd.root == NULL) { + DPRINTF0(("%s: failed to allocate tftpd.root\n", __func__)); + return ERR_MEM; + } + + len = strlen(tftproot); + if (tftpd.root[len - 1] == '/') { + tftpd.root[len - 1] = '\0'; + } + + tftpd.pcb = udp_new(); + if (tftpd.pcb == NULL) { + DPRINTF0(("%s: failed to allocate PCB\n", __func__)); + return ERR_MEM; + } + + udp_recv(tftpd.pcb, tftpd_recv, NULL); + + error = udp_bind(tftpd.pcb, &proxy_netif->ip_addr, TFTP_SERVER_PORT); + if (error != ERR_OK) { + DPRINTF0(("%s: failed to bind PCB\n", __func__)); + return error; + } + + return ERR_OK; +} + + +static void +tftpd_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + u16_t op; + + LWIP_ASSERT1(pcb == tftpd.pcb); + + LWIP_UNUSED_ARG(pcb); /* only in assert */ + LWIP_UNUSED_ARG(arg); + + if (pbuf_clen(p) > 1) { /* this code assumes contiguous aligned payload */ + pbuf_free(p); + return; + } + + op = ntohs(*(u16_t *)p->payload); + switch (op) { + case TFTP_RRQ: + tftpd_rrq(p, addr, port); + break; + + case TFTP_WRQ: + tftpd_error(addr, port, TFTP_EACCESS, "Permission denied"); + break; + + default: + tftpd_error(addr, port, TFTP_ENOSYS, "Bad opcode %d", op); + break; + } + + pbuf_free(p); +} + + +/** + * Parse Read Request packet and start new transfer. + */ +static void +tftpd_rrq(struct pbuf *p, ip_addr_t *addr, u16_t port) +{ + struct xfer *xfer; + char *s; + size_t len; + int has_options; + int status; + + xfer = tftp_xfer_alloc(addr, port); + if (xfer == NULL) { + return; + } + + /* skip opcode */ + s = (char *)p->payload + sizeof(u16_t); + len = p->len - sizeof(u16_t); + + + /* + * Parse RRQ: + * filename, mode, [opt1, value1, [...] ] + */ + status = tftp_parse_filename(xfer, &s, &len); + if (status < 0) { + goto terminate; + } + + status = tftp_parse_mode(xfer, &s, &len); + if (status < 0) { + goto terminate; + } + + has_options = 0; + while (len > 0) { + status = tftp_parse_option(xfer, &s, &len); + if (status < 0) { + goto terminate; + } + has_options += status; + } + + + /* + * Create OACK packet if necessary. + */ + if (has_options) { + xfer->oack = pbuf_alloc(PBUF_RAW, 128, PBUF_RAM); + if (xfer->oack != NULL) { + struct tftp_option *o; + + ((u16_t *)xfer->oack->payload)[0] = PP_HTONS(TFTP_OACK); + + s = (char *)xfer->oack->payload + sizeof(u16_t); + len = xfer->oack->len - sizeof(u16_t); + + for (o = &tftp_options[0]; o->name != NULL; ++o) { + status = (*o->ackopt)(xfer, &s, &len); + if (status < 0) { + pbuf_free(xfer->oack); + xfer->oack = NULL; + break; + } + } + + if (xfer->oack != NULL) { + Assert((u16_t)(xfer->oack->len - len) == xfer->oack->len - len); + pbuf_realloc(xfer->oack, (u16_t)(xfer->oack->len - len)); + } + } + } + + + /* + * Create static pbuf that will be used for all data packets. + */ + xfer->pbuf = pbuf_alloc(PBUF_RAW, xfer->blksize + 4, PBUF_RAM); + if (xfer->pbuf == NULL) { + tftp_internal_error(xfer); + goto terminate; + } + ((u16_t *)xfer->pbuf->payload)[0] = PP_HTONS(TFTP_DATA); + + + /* + * Finally, create PCB. Before this point any error was reported + * from the server port (see tftp_error() for the reason). + */ + status = tftp_xfer_create_pcb(xfer); + if (status < 0) { + goto terminate; + } + + if (xfer->oack) { + tftp_send(xfer); + } + else { + /* trigger send of the first data packet */ + tftp_recv_ack(xfer, 0); + } + + return; + + terminate: + DPRINTF(("%s: terminated", __func__)); + tftp_xfer_free(xfer); +} + + +static void +tftp_xfer_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + struct xfer *xfer = (struct xfer *)arg; + u16_t op; + + LWIP_UNUSED_ARG(pcb); /* assert only */ + LWIP_UNUSED_ARG(addr); + LWIP_UNUSED_ARG(port); + + LWIP_ASSERT1(xfer->pcb == pcb); + + if (p->len < 2) { + tftp_error(xfer, TFTP_ENOSYS, "Short packet"); + tftp_xfer_free(xfer); + pbuf_free(p); + return; + } + + op = ntohs(*(u16_t *)p->payload); + if (op == TFTP_ACK) { + u16_t ack; + + if (p->len < 4) { + tftp_error(xfer, TFTP_ENOSYS, "Short packet"); + tftp_xfer_free(xfer); + pbuf_free(p); + return; + } + + ack = ntohs(((u16_t *)p->payload)[1]); + tftp_recv_ack(xfer, ack); + } + else if (op == TFTP_ERROR) { + tftp_xfer_free(xfer); + } + else { + tftp_error(xfer, TFTP_ENOSYS, "Unexpected opcode %d", op); + tftp_xfer_free(xfer); + } + + pbuf_free(p); +} + + +static void +tftp_recv_ack(struct xfer *xfer, u16_t ack) +{ + if (ack != (u16_t)xfer->ack) { + DPRINTF2(("%s: expect %u (%u), got %u\n", + __func__, (u16_t)xfer->ack, xfer->ack, ack)); + return; + } + + sys_untimeout(tftp_timeout, xfer); + xfer->rexmit = 0; + + if (xfer->pbuf->len < xfer->blksize) { + DPRINTF(("%s: got final ack %u (%u)\n", + __func__, (u16_t)xfer->ack, xfer->ack)); + tftp_xfer_free(xfer); + return; + } + + if (xfer->oack != NULL) { + pbuf_free(xfer->oack); + xfer->oack = NULL; + } + + ++xfer->ack; + tftp_fillbuf(xfer); + tftp_send(xfer); +} + + +static void +tftp_send(struct xfer *xfer) +{ + struct pbuf *pbuf; + + pbuf = xfer->oack ? xfer->oack : xfer->pbuf; + udp_send(xfer->pcb, pbuf); + sys_timeout(xfer->timeout * 1000, tftp_timeout, xfer); +} + + +static void +tftp_timeout(void *arg) +{ + struct xfer *xfer = (struct xfer *)arg; + int maxrexmit; + + maxrexmit = xfer->timeout < 60 ? 5 : 3; + if (++xfer->rexmit < maxrexmit) { + tftp_send(xfer); + } + else { + tftp_xfer_free(xfer); + } +} + + +static void +tftp_fillbuf(struct xfer *xfer) +{ + ssize_t nread; + + DPRINTF2(("%s: reading block %u\n", __func__, xfer->ack)); + + ((u16_t *)xfer->pbuf->payload)[1] = htons(xfer->ack); + nread = read(xfer->fd, (char *)xfer->pbuf->payload + 4, xfer->blksize); + + if (nread < 0) { + tftp_error(xfer, TFTP_EUNDEF, "Read failed"); + return; + } + + pbuf_realloc(xfer->pbuf, nread + 4); +} + + +/** + * Find a free transfer slot (without a pcb). Record peer's IP + * address and port, but don't allocate a pcb yet. + * + * We delay creation of the pcb in response to the original request + * until the request is verified and accepted. This makes using + * tcpdump(8) easier, since tcpdump does not track TFTP transfers, so + * an error reply from a new pcb is not recognized as such and is not + * decoded as TFTP (see tftp_error()). + * + * If the request is rejected, the pcb remains NULL and the transfer + * slot remains unallocated. Since all TFTP processing happens on the + * lwIP thread, there's no concurrent processing, so we don't need to + * "lock" the transfer slot until the pcb is allocated. + */ +static struct xfer * +tftp_xfer_alloc(ip_addr_t *addr, u16_t port) +{ + struct xfer *xfer; + int i; + + /* Find free xfer slot */ + xfer = NULL; + for (i = 0; i < TFTP_MAX_XFERS; ++i) { + if (tftpd.xfers[i].pcb == NULL) { + xfer = &tftpd.xfers[i]; + break; + } + } + + if (xfer == NULL) { + if (report_transient_errors) { + tftpd_error(addr, port, TFTP_EUNDEF, + "Maximum number of simultaneous connections exceeded"); + } + return NULL; + } + + ipX_addr_copy(0, xfer->peer_ip, *ip_2_ipX(addr)); + xfer->peer_port = port; + + xfer->ack = 0; + + xfer->pbuf = NULL; + xfer->oack = NULL; + xfer->rexmit = 0; + + xfer->blksize = 512; + xfer->blksize_from_opt = 0; + + xfer->timeout = 1; + xfer->timeout_from_opt = 0; + + xfer->tsize = -1; + xfer->tsize_from_opt = 0; + + return xfer; +} + + +static int +tftp_xfer_create_pcb(struct xfer *xfer) +{ + struct udp_pcb *pcb; + err_t error; + + pcb = udp_new(); + + /* Bind */ + if (pcb != NULL) { + error = udp_bind(pcb, ipX_2_ip(&tftpd.pcb->local_ip), 0); + if (error != ERR_OK) { + udp_remove(pcb); + pcb = NULL; + } + } + + /* Connect */ + if (pcb != NULL) { + error = udp_connect(pcb, ipX_2_ip(&xfer->peer_ip), xfer->peer_port); + if (error != ERR_OK) { + udp_remove(pcb); + pcb = NULL; + } + } + + if (pcb == NULL) { + if (report_transient_errors) { + tftp_error(xfer, TFTP_EUNDEF, "Failed to create connection"); + } + return -1; + } + + xfer->pcb = pcb; + udp_recv(xfer->pcb, tftp_xfer_recv, xfer); + + return 0; +} + + +static void +tftp_xfer_free(struct xfer *xfer) +{ + sys_untimeout(tftp_timeout, xfer); + + if (xfer->pcb != NULL) { + udp_remove(xfer->pcb); + xfer->pcb = NULL; + } + + if (xfer->fd > 0) { + close(xfer->fd); + xfer->fd = -1; + } + + if (xfer->oack != NULL) { + pbuf_free(xfer->oack); + xfer->oack = NULL; + } + + if (xfer->pbuf != NULL) { + pbuf_free(xfer->pbuf); + xfer->pbuf = NULL; + } + + if (xfer->filename != NULL) { + free(xfer->filename); + xfer->filename = NULL; + } +} + + +static int +tftp_parse_filename(struct xfer *xfer, char **ps, size_t *plen) +{ + const char *filename; + struct stat st; + char *pathname; + char *s; + size_t len; + int status; + + filename = tftp_getstr(xfer, "filename", ps, plen); + if (filename == NULL) { + return -1; + } + + DPRINTF(("%s: requested file name: %s\n", __func__, filename)); + xfer->filename = strdup(filename); + if (xfer->filename == NULL) { + return tftp_internal_error(xfer); + } + + /* replace backslashes with forward slashes */ + s = xfer->filename; + while ((s = strchr(s, '\\')) != NULL) { + *s++ = '/'; + } + + /* deny attempts to break out of tftp dir */ + if (strncmp(xfer->filename, "../", 3) == 0 + || strstr(xfer->filename, "/../") != NULL) + { + return tftp_error(xfer, TFTP_ENOENT, "Permission denied"); + } + + len = strlen(tftpd.root) + 1 /*slash*/ + strlen(xfer->filename) + 1 /*nul*/; + pathname = (char *)malloc(len); + if (pathname == NULL) { + return tftp_internal_error(xfer); + } + + RTStrPrintf(pathname, len, "%s/%s", tftpd.root, xfer->filename); +/** @todo fix RTStrPrintf because this does not currently work: + * status = RTStrPrintf(pathname, len, "%s/%s", tftpd.root, xfer->filename); + * if (status < 0) { + * return tftp_internal_error(xfer); + * } + */ + + DPRINTF(("%s: full pathname: %s\n", __func__, pathname)); + xfer->fd = open(pathname, O_RDONLY); + if (xfer->fd < 0) { + if (errno == EPERM) { + return tftp_error(xfer, TFTP_ENOENT, "Permission denied"); + } + else { + return tftp_error(xfer, TFTP_ENOENT, "File not found"); + } + } + + status = fstat(xfer->fd, &st); + if (status < 0) { + return tftp_internal_error(xfer); + } + + if (!S_ISREG(st.st_mode)) { + return tftp_error(xfer, TFTP_ENOENT, "File not found"); + } + + xfer->tsize = st.st_size; + return 0; +} + + +static int +tftp_parse_mode(struct xfer *xfer, char **ps, size_t *plen) +{ + const char *modename; + + modename = tftp_getstr(xfer, "mode", ps, plen); + if (modename == NULL) { + return -1; + } + + if (RTStrICmp(modename, "octet") == 0) { + xfer->octet = 1; + } + else if (RTStrICmp(modename, "netascii") == 0) { + xfer->octet = 0; + /* XXX: not (yet?) */ + return tftp_error(xfer, TFTP_ENOSYS, "Mode \"netascii\" not supported"); + } + else if (RTStrICmp(modename, "mail") == 0) { + return tftp_error(xfer, TFTP_ENOSYS, "Mode \"mail\" not supported"); + } + else { + return tftp_error(xfer, TFTP_ENOSYS, "Unknown mode \"%s\"", modename); + } + + return 0; +} + + +static int +tftp_parse_option(struct xfer *xfer, char **ps, size_t *plen) +{ + const char *opt; + const char *val; + struct tftp_option *o; + + opt = tftp_getstr(xfer, "option name", ps, plen); + if (opt == NULL) { + return -1; + } + + if (*plen == 0) { + return tftp_error(xfer, TFTP_EUNDEF, "Missing option value"); + } + + val = tftp_getstr(xfer, "option value", ps, plen); + if (val == NULL) { + return -1; + } + + /* handle option if known, ignore otherwise */ + for (o = &tftp_options[0]; o->name != NULL; ++o) { + if (RTStrICmp(o->name, opt) == 0) { + return (*o->getopt)(xfer, val); + } + } + + return 0; /* unknown option */ +} + + +static int +tftp_opt_blksize(struct xfer *xfer, const char *optval) +{ + char *end; + long blksize; + + errno = 0; + blksize = strtol(optval, &end, 10); + if (errno != 0 || *end != '\0') { + return 0; + } + + if (blksize < 8) { + return 0; + } + + if (blksize > 1428) { /* exceeds ethernet mtu */ + blksize = 1428; + } + + xfer->blksize = blksize; + xfer->blksize_from_opt = 1; + return 1; +} + + +static int +tftp_opt_timeout(struct xfer *xfer, const char *optval) +{ + LWIP_UNUSED_ARG(xfer); + LWIP_UNUSED_ARG(optval); + return 0; +} + + +static int +tftp_opt_tsize(struct xfer *xfer, const char *optval) +{ + LWIP_UNUSED_ARG(optval); /* must be "0", but we don't check it */ + + if (xfer->tsize < 0) { + return 0; + } + + xfer->tsize_from_opt = 1; + return 1; +} + + +static char * +tftp_getstr(struct xfer *xfer, const char *msg, char **ps, size_t *plen) +{ + char *s; + ssize_t slen; + + s = *ps; + slen = tftp_strnlen(s, *plen); + if (slen < 0) { + tftp_error(xfer, TFTP_EUNDEF, "Unterminated %s", msg); + return NULL; + } + + *ps += slen + 1; + *plen -= slen + 1; + + return s; +} + + +static int +tftp_ack_blksize(struct xfer *xfer, char **ps, size_t *plen) +{ + if (!xfer->blksize_from_opt) { + return 0; + } + + return tftp_add_oack(ps, plen, "blksize", "%u", xfer->blksize); +} + + +static int +tftp_ack_timeout(struct xfer *xfer, char **ps, size_t *plen) +{ + if (!xfer->timeout_from_opt) { + return 0; + } + + return tftp_add_oack(ps, plen, "timeout", "%u", xfer->timeout); +} + + +static int +tftp_ack_tsize(struct xfer *xfer, char **ps, size_t *plen) +{ + if (!xfer->tsize_from_opt) { + return 0; + } + + LWIP_ASSERT1(xfer->tsize >= 0); + return tftp_add_oack(ps, plen, "tsize", + /* XXX: FIXME: want 64 bit */ + "%lu", (unsigned long)xfer->tsize); +} + + +static int +tftp_add_oack(char **ps, size_t *plen, + const char *optname, const char *fmt, ...) +{ + va_list ap; + int sz; + +/** @todo Fix RTStrPrintf because this doesn't really work. + * sz = RTStrPrintf(*ps, *plen, "%s", optname); + * if (sz < 0 || (size_t)sz >= *plen) { + * return -1; + * } */ + sz = (int)RTStrPrintf(*ps, *plen, "%s", optname); + if (/*sz < 0 ||*/ (size_t)sz >= *plen) { + return -1; + } + + ++sz; /* for nul byte */ + *ps += sz; + *plen -= sz; + + va_start(ap, fmt); + sz = vsnprintf(*ps, *plen, fmt, ap); + va_end(ap); + if (sz < 0 || (size_t)sz >= *plen) { + return -1; + } + + ++sz; /* for nul byte */ + *ps += sz; + *plen -= sz; + + return 0; +} + + +static ssize_t +tftp_strnlen(char *buf, size_t bufsize) +{ + void *end; + + end = memchr(buf, '\0', bufsize); + if (end == NULL) { + return -1; + } + + return (char *)end - buf; +} + + +static int +tftp_internal_error(struct xfer *xfer) +{ + if (report_transient_errors) { + tftp_error(xfer, TFTP_EUNDEF, "Internal error"); + } + return -1; +} + + +/** + * Send an error packet to the peer. + * + * PCB may not be created yet in which case send the error packet from + * the TFTP server port (*). + * + * (*) We delay creation of the PCB in response to the original + * request until the request is verified and accepted. This makes + * using tcpdump(8) easier, since tcpdump does not track TFTP + * transfers, so an error reply from a new PCB is not recognized as + * such and is not decoded as TFTP. + * + * Always returns -1 for callers to reuse. + */ +static int +tftp_error(struct xfer *xfer, u16_t error, const char *fmt, ...) +{ + va_list ap; + struct pbuf *q; + + LWIP_ASSERT1(xfer != NULL); + + va_start(ap, fmt); + q = tftp_verror(error, fmt, ap); + va_end(ap); + + if (q == NULL) { + return -1; + } + + if (xfer->pcb != NULL) { + udp_send(xfer->pcb, q); + } + else { + udp_sendto(tftpd.pcb, q, ipX_2_ip(&xfer->peer_ip), xfer->peer_port); + } + + pbuf_free(q); + return -1; +} + + +/** + * Send an error packet from TFTP server port to the specified peer. + */ +static void +tftpd_error(ip_addr_t *addr, u16_t port, u16_t error, const char *fmt, ...) +{ + va_list ap; + struct pbuf *q; + + va_start(ap, fmt); + q = tftp_verror(error, fmt, ap); + va_end(ap); + + if (q != NULL) { + udp_sendto(tftpd.pcb, q, addr, port); + pbuf_free(q); + } +} + + +/** + * Create ERROR pbuf with formatted error message. + */ +static struct pbuf * +tftp_verror(u16_t error, const char *fmt, va_list ap) +{ + struct tftp_error { + u16_t opcode; /* TFTP_ERROR */ + u16_t errcode; + char errmsg[512]; + }; + + struct pbuf *p; + struct tftp_error *errpkt; + int msgsz; + + p = pbuf_alloc(PBUF_TRANSPORT, sizeof(*errpkt), PBUF_RAM); + if (p == NULL) { + return NULL; + } + + errpkt = (struct tftp_error *)p->payload; + errpkt->opcode = PP_HTONS(TFTP_ERROR); + errpkt->errcode = htons(error); + + msgsz = vsnprintf(errpkt->errmsg, sizeof(errpkt->errmsg), fmt, ap); + if (msgsz < 0) { + errpkt->errmsg[0] = '\0'; + msgsz = 1; + } + else if ((size_t)msgsz < sizeof(errpkt->errmsg)) { + ++msgsz; /* for nul byte */ + } + else { + msgsz = sizeof(errpkt->errmsg); /* truncated, includes nul byte */ + } + + pbuf_realloc(p, sizeof(*errpkt) - sizeof(errpkt->errmsg) + msgsz); + return p; +} diff --git a/src/VBox/NetworkServices/NAT/pxdns.c b/src/VBox/NetworkServices/NAT/pxdns.c new file mode 100644 index 00000000..5e98824a --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxdns.c @@ -0,0 +1,942 @@ +/* $Id: pxdns.c $ */ +/** @file + * NAT Network - DNS proxy. + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * Copyright (c) 2003,2004,2005 Armin Wolfermann + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" + +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "pxtcp.h" + +#include "lwip/sys.h" +#include "lwip/tcpip.h" +#include "lwip/ip_addr.h" +#include "lwip/udp.h" +#include "lwip/tcp.h" + +#ifndef RT_OS_WINDOWS +#include <sys/poll.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <netdb.h> +#else +#include "winpoll.h" +#endif + +#include <stdio.h> +#include <string.h> + + +union sockaddr_inet { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; +}; + + +struct request; + + +/** + * DNS Proxy + */ +struct pxdns { + SOCKET sock4; + SOCKET sock6; + + struct pollmgr_handler pmhdl4; + struct pollmgr_handler pmhdl6; + + struct udp_pcb *pcb4; + struct udp_pcb *pcb6; + + struct tcp_pcb *ltcp; + + size_t generation; + size_t nresolvers; + union sockaddr_inet *resolvers; + + u16_t id; + + sys_mutex_t lock; + + size_t active_queries; + size_t expired_queries; + size_t late_answers; + size_t hash_collisions; + +#define TIMEOUT 5 + size_t timeout_slot; + u32_t timeout_mask; + struct request *timeout_list[TIMEOUT]; + +#define HASHSIZE 10 +#define HASH(id) ((id) & ((1 << HASHSIZE) - 1)) + struct request *request_hash[1 << HASHSIZE]; +} g_pxdns; + + +struct request { + /** + * Request ID that we use in relayed request. + */ + u16_t id; + + /** + * pxdns::generation used for this request + */ + size_t generation; + + /** + * Current index into pxdns::resolvers + */ + size_t residx; + + /** + * PCB from which we have received this request. lwIP doesn't + * support listening for both IPv4 and IPv6 on the same pcb, so we + * use two and need to keep track. + */ + struct udp_pcb *pcb; + + /** + * Client this request is from and its original request ID. + */ + ipX_addr_t client_addr; + u16_t client_port; + u16_t client_id; + + /** + * Chaining for pxdns::request_hash + */ + struct request **pprev_hash; + struct request *next_hash; + + /** + * Chaining for pxdns::timeout_list + */ + struct request **pprev_timeout; + struct request *next_timeout; + + /** + * Slot in pxdns::timeout_list + */ + size_t timeout_slot; + + /** + * Pbuf with reply received on pollmgr thread. + */ + struct pbuf *reply; + + /** + * Preallocated lwIP message to send reply from the lwIP thread. + */ + struct tcpip_msg msg_reply; + + /** + * Client request. ID is replaced with ours, original saved in + * client_id. Use a copy since we might need to resend and we + * don't want to hold onto pbuf of the request. + */ + size_t size; + u8_t data[1]; +}; + + +static void pxdns_create_resolver_sockaddrs(struct pxdns *pxdns, + const char **nameservers); + +static err_t pxdns_accept_syn(void *arg, struct tcp_pcb *newpcb, struct pbuf *syn); + +static void pxdns_recv4(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip_addr_t *addr, u16_t port); +static void pxdns_recv6(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip6_addr_t *addr, u16_t port); +static void pxdns_query(struct pxdns *pxdns, struct udp_pcb *pcb, struct pbuf *p, + ipX_addr_t *addr, u16_t port); +static void pxdns_timer(void *arg); +static int pxdns_rexmit(struct pxdns *pxdns, struct request *req); +static int pxdns_forward_outbound(struct pxdns *pxdns, struct request *req); + +static int pxdns_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents); +static void pxdns_pcb_reply(void *ctx); + +static void pxdns_request_register(struct pxdns *pxdns, struct request *req); +static void pxdns_request_deregister(struct pxdns *pxdns, struct request *req); +static struct request *pxdns_request_find(struct pxdns *pxdns, u16_t id); + +static void pxdns_hash_add(struct pxdns *pxdns, struct request *req); +static void pxdns_hash_del(struct pxdns *pxdns, struct request *req); +static void pxdns_timeout_add(struct pxdns *pxdns, struct request *req); +static void pxdns_timeout_del(struct pxdns *pxdns, struct request *req); + +static void pxdns_request_free(struct request *req); + + +err_t +pxdns_init(struct netif *proxy_netif) +{ + struct pxdns *pxdns = &g_pxdns; + err_t error; + + LWIP_UNUSED_ARG(proxy_netif); + + pxdns->ltcp = tcp_new(); + if (pxdns->ltcp != NULL) { + tcp_bind_ip6(pxdns->ltcp, IP6_ADDR_ANY, 53); + pxdns->ltcp = tcp_listen_dual(pxdns->ltcp); + if (pxdns->ltcp != NULL) { + tcp_arg(pxdns->ltcp, pxdns); + tcp_accept_syn(pxdns->ltcp, pxdns_accept_syn); + } + } + + pxdns->pmhdl4.callback = pxdns_pmgr_pump; + pxdns->pmhdl4.data = (void *)pxdns; + pxdns->pmhdl4.slot = -1; + + pxdns->pmhdl6.callback = pxdns_pmgr_pump; + pxdns->pmhdl6.data = (void *)pxdns; + pxdns->pmhdl6.slot = -1; + + pxdns->pcb4 = udp_new(); + if (pxdns->pcb4 == NULL) { + error = ERR_MEM; + goto err_cleanup_pcb; + } + + pxdns->pcb6 = udp_new_ip6(); + if (pxdns->pcb6 == NULL) { + error = ERR_MEM; + goto err_cleanup_pcb; + } + + error = udp_bind(pxdns->pcb4, IP_ADDR_ANY, 53); + if (error != ERR_OK) { + goto err_cleanup_pcb; + } + + error = udp_bind_ip6(pxdns->pcb6, IP6_ADDR_ANY, 53); + if (error != ERR_OK) { + goto err_cleanup_pcb; + } + + udp_recv(pxdns->pcb4, pxdns_recv4, pxdns); + udp_recv_ip6(pxdns->pcb6, pxdns_recv6, pxdns); + + pxdns->sock4 = socket(AF_INET, SOCK_DGRAM, 0); + if (pxdns->sock4 == INVALID_SOCKET) { + goto err_cleanup_pcb; + } + + pxdns->sock6 = socket(AF_INET6, SOCK_DGRAM, 0); + if (pxdns->sock6 == INVALID_SOCKET) { + /* it's ok if the host doesn't support IPv6 */ + /* XXX: TODO: log */ + } + + pxdns->generation = 0; + pxdns->nresolvers = 0; + pxdns->resolvers = NULL; + pxdns_create_resolver_sockaddrs(pxdns, g_proxy_options->nameservers); + + sys_mutex_new(&pxdns->lock); + + pxdns->timeout_slot = 0; + pxdns->timeout_mask = 0; + + /* NB: assumes pollmgr thread is not running yet */ + pollmgr_add(&pxdns->pmhdl4, pxdns->sock4, POLLIN); + if (pxdns->sock6 != INVALID_SOCKET) { + pollmgr_add(&pxdns->pmhdl6, pxdns->sock6, POLLIN); + } + + return ERR_OK; + + err_cleanup_pcb: + if (pxdns->pcb4 != NULL) { + udp_remove(pxdns->pcb4); + pxdns->pcb4 = NULL; + } + if (pxdns->pcb6 != NULL) { + udp_remove(pxdns->pcb6); + pxdns->pcb4 = NULL; + } + + return error; +} + + +/** + * lwIP thread callback to set the new list of nameservers. + */ +void +pxdns_set_nameservers(void *arg) +{ + const char **nameservers = (const char **)arg; + + if (g_proxy_options->nameservers != NULL) { + RTMemFree((void *)g_proxy_options->nameservers); + } + g_proxy_options->nameservers = nameservers; + + pxdns_create_resolver_sockaddrs(&g_pxdns, nameservers); +} + + +/** + * Use this list of nameservers to resolve guest requests. + * + * Runs on lwIP thread, so no new queries or retramsmits compete with + * it for the use of the existing list of resolvers (to be replaced). + */ +static void +pxdns_create_resolver_sockaddrs(struct pxdns *pxdns, const char **nameservers) +{ + struct addrinfo hints; + union sockaddr_inet *resolvers; + size_t nnames, nresolvers; + const char **p; + int status; + + resolvers = NULL; + nresolvers = 0; + + if (nameservers == NULL) { + goto update_resolvers; + } + + nnames = 0; + for (p = nameservers; *p != NULL; ++p) { + ++nnames; + } + + if (nnames == 0) { + goto update_resolvers; + } + + resolvers = (union sockaddr_inet *)calloc(sizeof(resolvers[0]), nnames); + if (resolvers == NULL) { + nresolvers = 0; + goto update_resolvers; + } + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_DGRAM; + hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV; + + for (p = nameservers; *p != NULL; ++p) { + const char *name = *p; + struct addrinfo *ai; + status = getaddrinfo(name, /* "domain" */ "53", &hints, &ai); + if (status != 0) { + /* XXX: log failed resolution */ + continue; + } + + if (ai->ai_family != AF_INET && ai->ai_family != AF_INET6) { + /* XXX: log unsupported address family */ + freeaddrinfo(ai); + continue; + } + + if (ai->ai_addrlen > sizeof(resolvers[nresolvers])) { + /* XXX: log */ + freeaddrinfo(ai); + continue; + } + + if (ai->ai_family == AF_INET6 && pxdns->sock6 == INVALID_SOCKET) { + /* no IPv6 support on the host, can't use this resolver */ + freeaddrinfo(ai); + continue; + } + + memcpy(&resolvers[nresolvers], ai->ai_addr, ai->ai_addrlen); + freeaddrinfo(ai); + ++nresolvers; + } + + if (nresolvers == 0) { + if (resolvers != NULL) { + free(resolvers); + } + resolvers = NULL; + } + + update_resolvers: + ++pxdns->generation; + if (pxdns->resolvers != NULL) { + free(pxdns->resolvers); + } + pxdns->resolvers = resolvers; + pxdns->nresolvers = nresolvers; +} + + +static void +pxdns_request_free(struct request *req) +{ + LWIP_ASSERT1(req->pprev_hash == NULL); + LWIP_ASSERT1(req->pprev_timeout == NULL); + + if (req->reply != NULL) { + pbuf_free(req->reply); + } + free(req); +} + + +static void +pxdns_hash_add(struct pxdns *pxdns, struct request *req) +{ + struct request **chain; + + LWIP_ASSERT1(req->pprev_hash == NULL); + ++pxdns->active_queries; + + chain = &pxdns->request_hash[HASH(req->id)]; + if ((req->next_hash = *chain) != NULL) { + (*chain)->pprev_hash = &req->next_hash; + ++pxdns->hash_collisions; + } + *chain = req; + req->pprev_hash = chain; +} + + +static void +pxdns_timeout_add(struct pxdns *pxdns, struct request *req) +{ + struct request **chain; + u32_t omask; + + LWIP_ASSERT1(req->pprev_timeout == NULL); + + req->timeout_slot = pxdns->timeout_slot; + chain = &pxdns->timeout_list[req->timeout_slot]; + if ((req->next_timeout = *chain) != NULL) { + (*chain)->pprev_timeout = &req->next_timeout; + } + *chain = req; + req->pprev_timeout = chain; + + omask = pxdns->timeout_mask; + pxdns->timeout_mask |= 1U << req->timeout_slot; + if (omask == 0) { + sys_untimeout(pxdns_timer, pxdns); + sys_timeout(1 * 1000, pxdns_timer, pxdns); + } +} + + +static void +pxdns_hash_del(struct pxdns *pxdns, struct request *req) +{ + LWIP_ASSERT1(req->pprev_hash != NULL); + --pxdns->active_queries; + + if (req->next_hash != NULL) { + req->next_hash->pprev_hash = req->pprev_hash; + } + *req->pprev_hash = req->next_hash; + req->pprev_hash = NULL; + req->next_hash = NULL; +} + + +static void +pxdns_timeout_del(struct pxdns *pxdns, struct request *req) +{ + LWIP_ASSERT1(req->pprev_timeout != NULL); + LWIP_ASSERT1(req->timeout_slot < TIMEOUT); + + if (req->next_timeout != NULL) { + req->next_timeout->pprev_timeout = req->pprev_timeout; + } + *req->pprev_timeout = req->next_timeout; + req->pprev_timeout = NULL; + req->next_timeout = NULL; + + if (pxdns->timeout_list[req->timeout_slot] == NULL) { + pxdns->timeout_mask &= ~(1U << req->timeout_slot); + /* may be on pollmgr thread so no sys_untimeout */ + } +} + + + +/** + * Do bookkeeping on new request. Called from pxdns_query(). + */ +static void +pxdns_request_register(struct pxdns *pxdns, struct request *req) +{ + sys_mutex_lock(&pxdns->lock); + + pxdns_hash_add(pxdns, req); + pxdns_timeout_add(pxdns, req); + + sys_mutex_unlock(&pxdns->lock); +} + + +static void +pxdns_request_deregister(struct pxdns *pxdns, struct request *req) +{ + sys_mutex_lock(&pxdns->lock); + + pxdns_hash_del(pxdns, req); + pxdns_timeout_del(pxdns, req); + + sys_mutex_unlock(&pxdns->lock); +} + + +/** + * Find request by the id we used when relaying it and remove it from + * id hash and timeout list. Called from pxdns_pmgr_pump() when reply + * comes. + */ +static struct request * +pxdns_request_find(struct pxdns *pxdns, u16_t id) +{ + struct request *req = NULL; + + sys_mutex_lock(&pxdns->lock); + + /* find request in the id->req hash */ + for (req = pxdns->request_hash[HASH(id)]; req != NULL; req = req->next_hash) { + if (req->id == id) { + break; + } + } + + if (req != NULL) { + pxdns_hash_del(pxdns, req); + pxdns_timeout_del(pxdns, req); + } + + sys_mutex_unlock(&pxdns->lock); + return req; +} + + +/** + * Retransmit of g/c expired requests and move timeout slot forward. + */ +static void +pxdns_timer(void *arg) +{ + struct pxdns *pxdns = (struct pxdns *)arg; + struct request **chain, *req; + u32_t mask; + + sys_mutex_lock(&pxdns->lock); + + /* + * Move timeout slot first. New slot points to the list of + * expired requests. If any expired request is retransmitted, we + * keep it on the list (that is now current), effectively + * resetting the timeout. + */ + LWIP_ASSERT1(pxdns->timeout_slot < TIMEOUT); + if (++pxdns->timeout_slot == TIMEOUT) { + pxdns->timeout_slot = 0; + } + + chain = &pxdns->timeout_list[pxdns->timeout_slot]; + req = *chain; + while (req != NULL) { + struct request *expired = req; + req = req->next_timeout; + + if (pxdns_rexmit(pxdns, expired)) { + continue; + } + + pxdns_hash_del(pxdns, expired); + pxdns_timeout_del(pxdns, expired); + ++pxdns->expired_queries; + + pxdns_request_free(expired); + } + + if (pxdns->timeout_list[pxdns->timeout_slot] == NULL) { + pxdns->timeout_mask &= ~(1U << pxdns->timeout_slot); + } + else { + pxdns->timeout_mask |= 1U << pxdns->timeout_slot; + } + mask = pxdns->timeout_mask; + + sys_mutex_unlock(&pxdns->lock); + + if (mask != 0) { + sys_timeout(1 * 1000, pxdns_timer, pxdns); + } +} + + +static void +pxdns_recv4(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + struct pxdns *pxdns = (struct pxdns *)arg; + pxdns_query(pxdns, pcb, p, ip_2_ipX(addr), port); +} + +static void +pxdns_recv6(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip6_addr_t *addr, u16_t port) +{ + struct pxdns *pxdns = (struct pxdns *)arg; + pxdns_query(pxdns, pcb, p, ip6_2_ipX(addr), port); +} + + +static void +pxdns_query(struct pxdns *pxdns, struct udp_pcb *pcb, struct pbuf *p, + ipX_addr_t *addr, u16_t port) +{ + struct request *req; + int sent; + + if (pxdns->nresolvers == 0) { + /* nothing we can do */ + pbuf_free(p); + return; + } + + req = calloc(1, sizeof(struct request) - 1 + p->tot_len); + if (req == NULL) { + pbuf_free(p); + return; + } + + /* copy request data */ + req->size = p->tot_len; + pbuf_copy_partial(p, req->data, p->tot_len, 0); + + /* save client identity and client's request id */ + req->pcb = pcb; + ipX_addr_copy(PCB_ISIPV6(pcb), req->client_addr, *addr); + req->client_port = port; + memcpy(&req->client_id, req->data, sizeof(req->client_id)); + + /* slap our request id onto it */ + req->id = pxdns->id++; + memcpy(req->data, &req->id, sizeof(u16_t)); + + /* resolver to forward to */ + req->generation = pxdns->generation; + req->residx = 0; + + /* prepare for relaying the reply back to guest */ + req->msg_reply.type = TCPIP_MSG_CALLBACK_STATIC; + req->msg_reply.sem = NULL; + req->msg_reply.msg.cb.function = pxdns_pcb_reply; + req->msg_reply.msg.cb.ctx = (void *)req; + + DPRINTF2(("%s: req=%p: client id %d -> id %d\n", + __func__, (void *)req, req->client_id, req->id)); + + pxdns_request_register(pxdns, req); + + sent = pxdns_forward_outbound(pxdns, req); + if (!sent) { + sent = pxdns_rexmit(pxdns, req); + } + if (!sent) { + pxdns_request_deregister(pxdns, req); + pxdns_request_free(req); + } +} + + +/** + * Forward request to the req::residx resolver in the pxdns::resolvers + * array of upstream resolvers. + * + * Returns 1 on success, 0 on failure. + */ +static int +pxdns_forward_outbound(struct pxdns *pxdns, struct request *req) +{ + union sockaddr_inet *resolver; + ssize_t nsent; +#ifdef RT_OS_WINDOWS + const char *pSendData = (const char *)&req->data[0]; + int cbSendData = (int)req->size; + Assert((size_t)cbSendData == req->size); +#else + const void *pSendData = &req->data[0]; + size_t cbSendData = req->size; +#endif + + DPRINTF2(("%s: req %p: sending to resolver #%lu\n", + __func__, (void *)req, (unsigned long)req->residx)); + + LWIP_ASSERT1(req->generation == pxdns->generation); + LWIP_ASSERT1(req->residx < pxdns->nresolvers); + resolver = &pxdns->resolvers[req->residx]; + + if (resolver->sa.sa_family == AF_INET) { + nsent = sendto(pxdns->sock4, pSendData, cbSendData, 0, + &resolver->sa, sizeof(resolver->sin)); + + } + else if (resolver->sa.sa_family == AF_INET6) { + if (pxdns->sock6 != INVALID_SOCKET) { + nsent = sendto(pxdns->sock6, pSendData, cbSendData, 0, + &resolver->sa, sizeof(resolver->sin6)); + } + else { + /* shouldn't happen, we should have weeded out IPv6 resolvers */ + return 0; + } + } + else { + /* shouldn't happen, we should have weeded out unsupported families */ + return 0; + } + + if ((size_t)nsent == req->size) { + return 1; /* sent */ + } + + if (nsent < 0) { + DPRINTF2(("%s: send: %R[sockerr]\n", __func__, SOCKERRNO())); + } + else { + DPRINTF2(("%s: sent only %lu of %lu\n", + __func__, (unsigned long)nsent, (unsigned long)req->size)); + } + return 0; /* not sent, caller will retry as necessary */ +} + + +/** + * Forward request to the next resolver in the pxdns::resolvers array + * of upstream resolvers if there are any left. + */ +static int +pxdns_rexmit(struct pxdns *pxdns, struct request *req) +{ + int sent; + + if (/* __predict_false */ req->generation != pxdns->generation) { + DPRINTF2(("%s: req %p: generation %lu != pxdns generation %lu\n", + __func__, (void *)req, + (unsigned long)req->generation, + (unsigned long)pxdns->generation)); + return 0; + } + + LWIP_ASSERT1(req->residx < pxdns->nresolvers); + do { + if (++req->residx == pxdns->nresolvers) { + return 0; + } + + sent = pxdns_forward_outbound(pxdns, req); + } while (!sent); + + return 1; +} + + +static int +pxdns_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxdns *pxdns; + struct request *req; + ssize_t nread; + err_t error; + u16_t id; + + pxdns = (struct pxdns *)handler->data; + LWIP_ASSERT1(handler == &pxdns->pmhdl4 || handler == &pxdns->pmhdl6); + LWIP_ASSERT1(fd == (handler == &pxdns->pmhdl4 ? pxdns->sock4 : pxdns->sock6)); + + if (revents & ~(POLLIN|POLLERR)) { + DPRINTF0(("%s: unexpected revents 0x%x\n", __func__, revents)); + return POLLIN; + } + + if (revents & POLLERR) { + int sockerr = -1; + socklen_t optlen = (socklen_t)sizeof(sockerr); + int status; + + status = getsockopt(fd, SOL_SOCKET, + SO_ERROR, (char *)&sockerr, &optlen); + if (status < 0) { + DPRINTF(("%s: sock %d: SO_ERROR failed: %R[sockerr]\n", + __func__, fd, SOCKERRNO())); + } + else { + DPRINTF(("%s: sock %d: %R[sockerr]\n", + __func__, fd, sockerr)); + } + } + + if ((revents & POLLIN) == 0) { + return POLLIN; + } + + +#ifdef RT_OS_WINDOWS + nread = recv(fd, (char *)pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0); +#else + nread = recv(fd, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0); +#endif + if (nread < 0) { + DPRINTF(("%s: %R[sockerr]\n", __func__, SOCKERRNO())); + return POLLIN; + } + + /* check for minimum dns packet length */ + if (nread < 12) { + DPRINTF2(("%s: short reply %lu bytes\n", + __func__, (unsigned long)nread)); + return POLLIN; + } + + /* XXX: shall we proxy back RCODE=Refused responses? */ + + memcpy(&id, pollmgr_udpbuf, sizeof(id)); + req = pxdns_request_find(pxdns, id); + if (req == NULL) { + DPRINTF2(("%s: orphaned reply for %d\n", __func__, id)); + ++pxdns->late_answers; + return POLLIN; + } + + DPRINTF2(("%s: reply for req=%p: id %d -> client id %d\n", + __func__, (void *)req, req->id, req->client_id)); + + req->reply = pbuf_alloc(PBUF_RAW, nread, PBUF_RAM); + if (req->reply == NULL) { + DPRINTF(("%s: pbuf_alloc(%d) failed\n", __func__, (int)nread)); + pxdns_request_free(req); + return POLLIN; + } + + memcpy(pollmgr_udpbuf, &req->client_id, sizeof(req->client_id)); + error = pbuf_take(req->reply, pollmgr_udpbuf, nread); + if (error != ERR_OK) { + DPRINTF(("%s: pbuf_take(%d) failed\n", __func__, (int)nread)); + pxdns_request_free(req); + return POLLIN; + } + + proxy_lwip_post(&req->msg_reply); + return POLLIN; +} + + +/** + * Called on lwIP thread via request::msg_reply callback. + */ +static void +pxdns_pcb_reply(void *ctx) +{ + struct request *req = (struct request *)ctx; + err_t error; + + error = udp_sendto(req->pcb, req->reply, + ipX_2_ip(&req->client_addr), req->client_port); + if (error != ERR_OK) { + DPRINTF(("%s: udp_sendto err %s\n", + __func__, proxy_lwip_strerr(error))); + } + + pxdns_request_free(req); +} + + +/** + * TCP DNS proxy. This kicks in for large replies that don't fit into + * 512 bytes of UDP payload. Client will retry with TCP to get + * complete reply. + */ +static err_t +pxdns_accept_syn(void *arg, struct tcp_pcb *newpcb, struct pbuf *syn) +{ + struct pxdns *pxdns = (struct pxdns *)arg; + union sockaddr_inet *si; + ipX_addr_t *dst; + u16_t dst_port; + + tcp_accepted(pxdns->ltcp); + + if (pxdns->nresolvers == 0) { + return ERR_CONN; + } + + si = &pxdns->resolvers[0]; + + if (si->sa.sa_family == AF_INET6) { + dst = (ipX_addr_t *)&si->sin6.sin6_addr; + dst_port = ntohs(si->sin6.sin6_port); + } + else { + dst = (ipX_addr_t *)&si->sin.sin_addr; + dst_port = ntohs(si->sin.sin_port); + } + + /* + * XXX: TODO: need to implement protocol hooks. E.g. here if + * connect fails, we should try connecting to a different server. + */ + return pxtcp_pcb_accept_outbound(newpcb, syn, + si->sa.sa_family == AF_INET6, dst, dst_port); +} diff --git a/src/VBox/NetworkServices/NAT/pxping.c b/src/VBox/NetworkServices/NAT/pxping.c new file mode 100644 index 00000000..9acee074 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxping.c @@ -0,0 +1,2020 @@ +/* $Id: pxping.c $ */ +/** @file + * NAT Network - ping proxy, raw sockets version. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "pxremap.h" + +#include <iprt/string.h> + +#ifndef RT_OS_WINDOWS +#include <sys/types.h> +#include <sys/socket.h> +#ifdef RT_OS_DARWIN +# define __APPLE_USE_RFC_3542 +#endif +#include <netinet/in.h> +#include <poll.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#else +#include <iprt/stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "winpoll.h" +#endif + +#include "lwip/opt.h" + +#include "lwip/sys.h" +#include "lwip/tcpip.h" +#include "lwip/inet_chksum.h" +#include "lwip/ip.h" +#include "lwip/icmp.h" + +#if defined(RT_OS_LINUX) && !defined(__USE_GNU) +#if __GLIBC_PREREQ(2, 8) +/* + * XXX: This is gross. in6_pktinfo is now hidden behind _GNU_SOURCE + * https://sourceware.org/bugzilla/show_bug.cgi?id=6775 + * + * But in older glibc versions, e.g. RHEL5, it is not! I don't want + * to deal with _GNU_SOURCE now, so as a kludge check for glibc + * version. It seems the __USE_GNU guard was introduced in 2.8. + */ +struct in6_pktinfo { + struct in6_addr ipi6_addr; + unsigned int ipi6_ifindex; +}; +#endif /* __GLIBC_PREREQ */ +#endif /* RT_OS_LINUX && !__USE_GNU */ + + +/* forward */ +struct ping_pcb; + + +/** + * Global state for ping proxy collected in one entity to minimize + * globals. There's only one instance of this structure. + * + * Raw ICMP sockets are promiscuous, so it doesn't make sense to have + * multiple. If this code ever needs to support multiple netifs, the + * netif member should be exiled into "pcb". + */ +struct pxping { + SOCKET sock4; + +#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) +# define DF_WITH_IP_HDRINCL + int hdrincl; +#else + int df; +#endif + int ttl; + int tos; + + SOCKET sock6; +#ifdef RT_OS_WINDOWS + LPFN_WSARECVMSG pfWSARecvMsg6; +#endif + int hopl; + + struct pollmgr_handler pmhdl4; + struct pollmgr_handler pmhdl6; + + struct netif *netif; + + /** + * Protect lwIP and pmgr accesses to the list of pcbs. + */ + sys_mutex_t lock; + + /* + * We need to find pcbs both from the guest side and from the host + * side. If we need to support industrial grade ping throughput, + * we will need two pcb hashes. For now, a short linked list + * should be enough. Cf. pxping_pcb_for_request() and + * pxping_pcb_for_reply(). + */ +#define PXPING_MAX_PCBS 8 + size_t npcbs; + struct ping_pcb *pcbs; + +#define TIMEOUT 5 + int timer_active; + size_t timeout_slot; + struct ping_pcb *timeout_list[TIMEOUT]; +}; + + +/** + * Quasi PCB for ping. + */ +struct ping_pcb { + ipX_addr_t src; + ipX_addr_t dst; + + u8_t is_ipv6; + u8_t is_mapped; + + u16_t guest_id; + u16_t host_id; + + /** + * Desired slot in pxping::timeout_list. See pxping_timer(). + */ + size_t timeout_slot; + + /** + * Chaining for pxping::timeout_list + */ + struct ping_pcb **pprev_timeout; + struct ping_pcb *next_timeout; + + /** + * Chaining for pxping::pcbs + */ + struct ping_pcb *next; + + union { + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } peer; +}; + + +/** + * lwIP thread callback message for IPv4 ping. + * + * We pass raw IP datagram for ip_output_if() so we only need pbuf and + * netif (from pxping). + */ +struct ping_msg { + struct tcpip_msg msg; + struct pxping *pxping; + struct pbuf *p; +}; + + +/** + * lwIP thread callback message for IPv6 ping. + * + * We cannot obtain raw IPv6 datagram from host without extra trouble, + * so we pass ICMPv6 payload in pbuf and also other parameters to + * ip6_output_if(). + */ +struct ping6_msg { + struct tcpip_msg msg; + struct pxping *pxping; + struct pbuf *p; + ip6_addr_t src, dst; + int hopl, tclass; +}; + + +#ifdef RT_OS_WINDOWS +static int pxping_init_windows(struct pxping *pxping); +#endif +static void pxping_recv4(void *arg, struct pbuf *p); +static void pxping_recv6(void *arg, struct pbuf *p); + +static void pxping_timer(void *arg); +static void pxping_timer_needed(struct pxping *pxping); + +static struct ping_pcb *pxping_pcb_for_request(struct pxping *pxping, + int is_ipv6, + ipX_addr_t *src, ipX_addr_t *dst, + u16_t guest_id); +static struct ping_pcb *pxping_pcb_for_reply(struct pxping *pxping, int is_ipv6, + ipX_addr_t *dst, u16_t host_id); + +static FNRTSTRFORMATTYPE pxping_pcb_rtstrfmt; +static struct ping_pcb *pxping_pcb_allocate(struct pxping *pxping); +static void pxping_pcb_register(struct pxping *pxping, struct ping_pcb *pcb); +static void pxping_pcb_deregister(struct pxping *pxping, struct ping_pcb *pcb); +static void pxping_pcb_delete(struct pxping *pxping, struct ping_pcb *pcb); +static void pxping_timeout_add(struct pxping *pxping, struct ping_pcb *pcb); +static void pxping_timeout_del(struct pxping *pxping, struct ping_pcb *pcb); + +static int pxping_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents); + +static void pxping_pmgr_icmp4(struct pxping *pxping); +static void pxping_pmgr_icmp4_echo(struct pxping *pxping, + u16_t iplen, struct sockaddr_in *peer); +static void pxping_pmgr_icmp4_error(struct pxping *pxping, + u16_t iplen, struct sockaddr_in *peer); +static void pxping_pmgr_icmp6(struct pxping *pxping); +static void pxping_pmgr_icmp6_echo(struct pxping *pxping, + ip6_addr_t *src, ip6_addr_t *dst, + int hopl, int tclass, u16_t icmplen); +static void pxping_pmgr_icmp6_error(struct pxping *pxping, + ip6_addr_t *src, ip6_addr_t *dst, + int hopl, int tclass, u16_t icmplen); + +static void pxping_pmgr_forward_inbound(struct pxping *pxping, u16_t iplen); +static void pxping_pcb_forward_inbound(void *arg); + +static void pxping_pmgr_forward_inbound6(struct pxping *pxping, + ip6_addr_t *src, ip6_addr_t *dst, + u8_t hopl, u8_t tclass, + u16_t icmplen); +static void pxping_pcb_forward_inbound6(void *arg); + +/* + * NB: This is not documented except in RTFS. + * + * If ip_output_if() is passed dest == NULL then it treats p as + * complete IP packet with payload pointing to the IP header. It does + * not build IP header, ignores all header-related arguments, fetches + * real destination from the header in the pbuf and outputs pbuf to + * the specified netif. + */ +#define ip_raw_output_if(p, netif) \ + (ip_output_if((p), NULL, NULL, 0, 0, 0, (netif))) + + + +static struct pxping g_pxping; + + +err_t +pxping_init(struct netif *netif, SOCKET sock4, SOCKET sock6) +{ + const int on = 1; + int status; + + if (sock4 == INVALID_SOCKET && sock6 == INVALID_SOCKET) { + return ERR_VAL; + } + + g_pxping.netif = netif; + sys_mutex_new(&g_pxping.lock); + + g_pxping.sock4 = sock4; + if (g_pxping.sock4 != INVALID_SOCKET) { +#ifdef DF_WITH_IP_HDRINCL + g_pxping.hdrincl = 0; +#else + g_pxping.df = -1; +#endif + g_pxping.ttl = -1; + g_pxping.tos = 0; + +#ifdef RT_OS_LINUX + { + const int dont = IP_PMTUDISC_DONT; + status = setsockopt(sock4, IPPROTO_IP, IP_MTU_DISCOVER, + &dont, sizeof(dont)); + if (status != 0) { + DPRINTF(("IP_MTU_DISCOVER: %R[sockerr]\n", SOCKERRNO())); + } + } +#endif /* RT_OS_LINUX */ + + g_pxping.pmhdl4.callback = pxping_pmgr_pump; + g_pxping.pmhdl4.data = (void *)&g_pxping; + g_pxping.pmhdl4.slot = -1; + pollmgr_add(&g_pxping.pmhdl4, g_pxping.sock4, POLLIN); + + ping_proxy_accept(pxping_recv4, &g_pxping); + } + + g_pxping.sock6 = sock6; +#ifdef RT_OS_WINDOWS + /* we need recvmsg */ + if (g_pxping.sock6 != INVALID_SOCKET) { + status = pxping_init_windows(&g_pxping); + if (status == SOCKET_ERROR) { + g_pxping.sock6 = INVALID_SOCKET; + /* close(sock6); */ + } + } +#endif + if (g_pxping.sock6 != INVALID_SOCKET) { + g_pxping.hopl = -1; + +#if !defined(IPV6_RECVPKTINFO) +#define IPV6_RECVPKTINFO (IPV6_PKTINFO) +#endif + status = setsockopt(sock6, IPPROTO_IPV6, IPV6_RECVPKTINFO, + (const char *)&on, sizeof(on)); + if (status < 0) { + DPRINTF(("IPV6_RECVPKTINFO: %R[sockerr]\n", SOCKERRNO())); + /* XXX: for now this is fatal */ + } + +#if !defined(IPV6_RECVHOPLIMIT) +#define IPV6_RECVHOPLIMIT (IPV6_HOPLIMIT) +#endif + status = setsockopt(sock6, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, + (const char *)&on, sizeof(on)); + if (status < 0) { + DPRINTF(("IPV6_RECVHOPLIMIT: %R[sockerr]\n", SOCKERRNO())); + } + +#ifdef IPV6_RECVTCLASS /* new in RFC 3542, there's no RFC 2292 counterpart */ + /** @todo IPV6_RECVTCLASS */ +#endif + + g_pxping.pmhdl6.callback = pxping_pmgr_pump; + g_pxping.pmhdl6.data = (void *)&g_pxping; + g_pxping.pmhdl6.slot = -1; + pollmgr_add(&g_pxping.pmhdl6, g_pxping.sock6, POLLIN); + + ping6_proxy_accept(pxping_recv6, &g_pxping); + } + + status = RTStrFormatTypeRegister("ping_pcb", pxping_pcb_rtstrfmt, NULL); + AssertRC(status); + + return ERR_OK; +} + + +#ifdef RT_OS_WINDOWS +static int +pxping_init_windows(struct pxping *pxping) +{ + GUID WSARecvMsgGUID = WSAID_WSARECVMSG; + DWORD nread; + int status; + + pxping->pfWSARecvMsg6 = NULL; + status = WSAIoctl(pxping->sock6, + SIO_GET_EXTENSION_FUNCTION_POINTER, + &WSARecvMsgGUID, sizeof(WSARecvMsgGUID), + &pxping->pfWSARecvMsg6, sizeof(pxping->pfWSARecvMsg6), + &nread, + NULL, NULL); + return status; +} +#endif /* RT_OS_WINDOWS */ + + +static u32_t +chksum_delta_16(u16_t oval, u16_t nval) +{ + u32_t sum = (u16_t)~oval; + sum += nval; + return sum; +} + + +static u32_t +chksum_update_16(u16_t *oldp, u16_t nval) +{ + u32_t sum = chksum_delta_16(*oldp, nval); + *oldp = nval; + return sum; +} + + +static u32_t +chksum_delta_32(u32_t oval, u32_t nval) +{ + u32_t sum = ~oval; + sum = FOLD_U32T(sum); + sum += FOLD_U32T(nval); + return sum; +} + + +static u32_t +chksum_update_32(u32_t *oldp, u32_t nval) +{ + u32_t sum = chksum_delta_32(*oldp, nval); + *oldp = nval; + return sum; +} + + +static u32_t +chksum_delta_ipv6(const ip6_addr_t *oldp, const ip6_addr_t *newp) +{ + u32_t sum; + + sum = chksum_delta_32(oldp->addr[0], newp->addr[0]); + sum += chksum_delta_32(oldp->addr[1], newp->addr[1]); + sum += chksum_delta_32(oldp->addr[2], newp->addr[2]); + sum += chksum_delta_32(oldp->addr[3], newp->addr[3]); + + return sum; +} + + +static u32_t +chksum_update_ipv6(ip6_addr_t *oldp, const ip6_addr_t *newp) +{ + u32_t sum; + + sum = chksum_update_32(&oldp->addr[0], newp->addr[0]); + sum += chksum_update_32(&oldp->addr[1], newp->addr[1]); + sum += chksum_update_32(&oldp->addr[2], newp->addr[2]); + sum += chksum_update_32(&oldp->addr[3], newp->addr[3]); + + return sum; +} + + +/** + * ICMP Echo Request in pbuf "p" is to be proxied. + */ +static void +pxping_recv4(void *arg, struct pbuf *p) +{ + struct pxping *pxping = (struct pxping *)arg; + struct ping_pcb *pcb; +#ifdef DF_WITH_IP_HDRINCL + struct ip_hdr iph_orig; +#endif + struct icmp_echo_hdr icmph_orig; + struct ip_hdr *iph; + struct icmp_echo_hdr *icmph; + int df, ttl, tos; + u32_t sum; + u16_t iphlen; + int status; + + iphlen = ip_current_header_tot_len(); + if (iphlen != IP_HLEN) { /* we don't do options */ + pbuf_free(p); + return; + } + + iph = (/* UNCONST */ struct ip_hdr *)ip_current_header(); + icmph = (struct icmp_echo_hdr *)p->payload; + + pcb = pxping_pcb_for_request(pxping, 0, + ipX_current_src_addr(), + ipX_current_dest_addr(), + icmph->id); + if (pcb == NULL) { + pbuf_free(p); + return; + } + + DPRINTF(("ping %p: %R[ping_pcb] seq %d len %u ttl %d\n", + pcb, pcb, + ntohs(icmph->seqno), (unsigned int)p->tot_len, + IPH_TTL(iph))); + + ttl = IPH_TTL(iph); + if (!pcb->is_mapped) { + if (RT_UNLIKELY(ttl == 1)) { + status = pbuf_header(p, iphlen); /* back to IP header */ + if (RT_LIKELY(status == 0)) { + icmp_time_exceeded(p, ICMP_TE_TTL); + } + pbuf_free(p); + return; + } + --ttl; + } + + /* + * OS X doesn't provide a socket option to control fragmentation. + * Solaris doesn't provide IP_DONTFRAG on all releases we support. + * In this case we have to use IP_HDRINCL. We don't want to use + * it always since it doesn't handle fragmentation (but that's ok + * for DF) and Windows doesn't do automatic source address + * selection with IP_HDRINCL. + */ + df = (IPH_OFFSET(iph) & PP_HTONS(IP_DF)) != 0; + +#ifdef DF_WITH_IP_HDRINCL + if (df != pxping->hdrincl) { + status = setsockopt(pxping->sock4, IPPROTO_IP, IP_HDRINCL, + &df, sizeof(df)); + if (RT_LIKELY(status == 0)) { + pxping->hdrincl = df; + } + else { + DPRINTF(("IP_HDRINCL: %R[sockerr]\n", SOCKERRNO())); + } + } + + if (pxping->hdrincl) { + status = pbuf_header(p, iphlen); /* back to IP header */ + if (RT_UNLIKELY(status != 0)) { + pbuf_free(p); + return; + } + + /* we will overwrite IP header, save original for ICMP errors */ + memcpy(&iph_orig, iph, iphlen); + + if (pcb->is_mapped) { + ip4_addr_set_u32(&iph->dest, pcb->peer.sin.sin_addr.s_addr); + } + + if (g_proxy_options->src4 != NULL) { + ip4_addr_set_u32(&iph->src, g_proxy_options->src4->sin_addr.s_addr); + } + else { + /* let the kernel select suitable source address */ + ip_addr_set_any(&iph->src); + } + + IPH_TTL_SET(iph, ttl); /* already decremented */ + IPH_ID_SET(iph, 0); /* kernel will set one */ +#ifdef RT_OS_DARWIN + /* wants ip_offset and ip_len fields in host order */ + IPH_OFFSET_SET(iph, ntohs(IPH_OFFSET(iph))); + IPH_LEN_SET(iph, ntohs(IPH_LEN(iph))); + /* wants checksum of everything (sic!), in host order */ + sum = inet_chksum_pbuf(p); + IPH_CHKSUM_SET(iph, sum); +#else /* !RT_OS_DARWIN */ + IPH_CHKSUM_SET(iph, 0); /* kernel will recalculate */ +#endif + } + else /* !pxping->hdrincl */ +#endif /* DF_WITH_IP_HDRINCL */ + { +#if !defined(DF_WITH_IP_HDRINCL) + /* control DF flag via setsockopt(2) */ +#define USE_DF_OPTION(_Optname) \ + const int dfopt = _Optname; \ + const char * const dfoptname = #_Optname; NOREF(dfoptname) +#if defined(RT_OS_LINUX) + USE_DF_OPTION(IP_MTU_DISCOVER); + df = df ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; +#elif defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD) + USE_DF_OPTION(IP_DONTFRAG); +#elif defined(RT_OS_WINDOWS) + USE_DF_OPTION(IP_DONTFRAGMENT); +#endif + if (df != pxping->df) { + status = setsockopt(pxping->sock4, IPPROTO_IP, dfopt, + (char *)&df, sizeof(df)); + if (RT_LIKELY(status == 0)) { + pxping->df = df; + } + else { + DPRINTF(("%s: %R[sockerr]\n", dfoptname, SOCKERRNO())); + } + } +#endif /* !DF_WITH_IP_HDRINCL */ + + if (ttl != pxping->ttl) { + status = setsockopt(pxping->sock4, IPPROTO_IP, IP_TTL, + (char *)&ttl, sizeof(ttl)); + if (RT_LIKELY(status == 0)) { + pxping->ttl = ttl; + } + else { + DPRINTF(("IP_TTL: %R[sockerr]\n", SOCKERRNO())); + } + } + + tos = IPH_TOS(iph); + if (tos != pxping->tos) { + status = setsockopt(pxping->sock4, IPPROTO_IP, IP_TOS, + (char *)&tos, sizeof(tos)); + if (RT_LIKELY(status == 0)) { + pxping->tos = tos; + } + else { + DPRINTF(("IP_TOS: %R[sockerr]\n", SOCKERRNO())); + } + } + } + + /* rewrite ICMP echo header */ + memcpy(&icmph_orig, icmph, sizeof(*icmph)); + sum = (u16_t)~icmph->chksum; + sum += chksum_update_16(&icmph->id, pcb->host_id); + sum = FOLD_U32T(sum); + icmph->chksum = ~sum; + + status = proxy_sendto(pxping->sock4, p, + &pcb->peer.sin, sizeof(pcb->peer.sin)); + if (status != 0) { + int error = -status; + DPRINTF(("%s: sendto: %R[sockerr]\n", __func__, error)); + +#ifdef DF_WITH_IP_HDRINCL + if (pxping->hdrincl) { + /* restore original IP header */ + memcpy(iph, &iph_orig, iphlen); + } + else +#endif + { + status = pbuf_header(p, iphlen); /* back to IP header */ + if (RT_UNLIKELY(status != 0)) { + pbuf_free(p); + return; + } + } + + /* restore original ICMP header */ + memcpy(icmph, &icmph_orig, sizeof(*icmph)); + + /* + * Some ICMP errors may be generated by the kernel and we read + * them from the socket and forward them normally, hence the + * ifdefs below. + */ + switch (error) { + +#if !( defined(RT_OS_SOLARIS) \ + || (defined(RT_OS_LINUX) && !defined(DF_WITH_IP_HDRINCL)) \ + ) + case EMSGSIZE: + icmp_dest_unreach(p, ICMP_DUR_FRAG); + break; +#endif + + case ENETDOWN: + case ENETUNREACH: + icmp_dest_unreach(p, ICMP_DUR_NET); + break; + + case EHOSTDOWN: + case EHOSTUNREACH: + icmp_dest_unreach(p, ICMP_DUR_HOST); + break; + } + } + + pbuf_free(p); +} + + +/** + * ICMPv6 Echo Request in pbuf "p" is to be proxied. + */ +static void +pxping_recv6(void *arg, struct pbuf *p) +{ + struct pxping *pxping = (struct pxping *)arg; + struct ping_pcb *pcb; + struct ip6_hdr *iph; + struct icmp6_echo_hdr *icmph; + int hopl; + u16_t iphlen; + u16_t id, seq; + int status; + + iph = (/* UNCONST */ struct ip6_hdr *)ip6_current_header(); + iphlen = ip_current_header_tot_len(); + + icmph = (struct icmp6_echo_hdr *)p->payload; + + id = icmph->id; + seq = icmph->seqno; + + pcb = pxping_pcb_for_request(pxping, 1, + ipX_current_src_addr(), + ipX_current_dest_addr(), + id); + if (pcb == NULL) { + pbuf_free(p); + return; + } + + DPRINTF(("ping %p: %R[ping_pcb] seq %d len %u hopl %d\n", + pcb, pcb, + ntohs(seq), (unsigned int)p->tot_len, + IP6H_HOPLIM(iph))); + + hopl = IP6H_HOPLIM(iph); + if (!pcb->is_mapped) { + if (hopl == 1) { + status = pbuf_header(p, iphlen); /* back to IP header */ + if (RT_LIKELY(status == 0)) { + icmp6_time_exceeded(p, ICMP6_TE_HL); + } + pbuf_free(p); + return; + } + --hopl; + } + + /* + * Rewrite ICMPv6 echo header. We don't need to recompute the + * checksum since, unlike IPv4, checksum includes pseudo-header. + * OS computes checksum for us on send() since it needs to select + * source address. + */ + icmph->id = pcb->host_id; + + /** @todo use control messages to save a syscall? */ + if (hopl != pxping->hopl) { + status = setsockopt(pxping->sock6, IPPROTO_IPV6, IPV6_UNICAST_HOPS, + (char *)&hopl, sizeof(hopl)); + if (status == 0) { + pxping->hopl = hopl; + } + else { + DPRINTF(("IPV6_HOPLIMIT: %R[sockerr]\n", SOCKERRNO())); + } + } + + status = proxy_sendto(pxping->sock6, p, + &pcb->peer.sin6, sizeof(pcb->peer.sin6)); + if (status != 0) { + int error = -status; + DPRINTF(("%s: sendto: %R[sockerr]\n", __func__, error)); + + status = pbuf_header(p, iphlen); /* back to IP header */ + if (RT_UNLIKELY(status != 0)) { + pbuf_free(p); + return; + } + + /* restore original ICMP header */ + icmph->id = pcb->guest_id; + + switch (error) { + case EACCES: + icmp6_dest_unreach(p, ICMP6_DUR_PROHIBITED); + break; + +#ifdef ENONET + case ENONET: +#endif + case ENETDOWN: + case ENETUNREACH: + case EHOSTDOWN: + case EHOSTUNREACH: + icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE); + break; + } + } + + pbuf_free(p); +} + + +/** + * Formatter for %R[ping_pcb]. + */ +static DECLCALLBACK(size_t) +pxping_pcb_rtstrfmt(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, const void *pvValue, + int cchWidth, int cchPrecision, unsigned int fFlags, + void *pvUser) +{ + const struct ping_pcb *pcb = (const struct ping_pcb *)pvValue; + size_t cb = 0; + + NOREF(cchWidth); + NOREF(cchPrecision); + NOREF(fFlags); + NOREF(pvUser); + + AssertReturn(strcmp(pszType, "ping_pcb") == 0, 0); + + if (pcb == NULL) { + return RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, "(null)"); + } + + /* XXX: %RTnaipv4 takes the value, but %RTnaipv6 takes the pointer */ + if (pcb->is_ipv6) { + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, + "%RTnaipv6 -> %RTnaipv6", &pcb->src, &pcb->dst); + if (pcb->is_mapped) { + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, + " (%RTnaipv6)", &pcb->peer.sin6.sin6_addr); + } + } + else { + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, + "%RTnaipv4 -> %RTnaipv4", + ip4_addr_get_u32(ipX_2_ip(&pcb->src)), + ip4_addr_get_u32(ipX_2_ip(&pcb->dst))); + if (pcb->is_mapped) { + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, + " (%RTnaipv4)", pcb->peer.sin.sin_addr.s_addr); + } + } + + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, + " id %04x->%04x", ntohs(pcb->guest_id), ntohs(pcb->host_id)); + + return cb; +} + + +static struct ping_pcb * +pxping_pcb_allocate(struct pxping *pxping) +{ + struct ping_pcb *pcb; + + if (pxping->npcbs >= PXPING_MAX_PCBS) { + return NULL; + } + + pcb = (struct ping_pcb *)malloc(sizeof(*pcb)); + if (pcb == NULL) { + return NULL; + } + + ++pxping->npcbs; + return pcb; +} + + +static void +pxping_pcb_delete(struct pxping *pxping, struct ping_pcb *pcb) +{ + LWIP_ASSERT1(pxping->npcbs > 0); + LWIP_ASSERT1(pcb->next == NULL); + LWIP_ASSERT1(pcb->pprev_timeout == NULL); + + DPRINTF(("%s: ping %p\n", __func__, (void *)pcb)); + + --pxping->npcbs; + free(pcb); +} + + +static void +pxping_timeout_add(struct pxping *pxping, struct ping_pcb *pcb) +{ + struct ping_pcb **chain; + + LWIP_ASSERT1(pcb->pprev_timeout == NULL); + + chain = &pxping->timeout_list[pcb->timeout_slot]; + if ((pcb->next_timeout = *chain) != NULL) { + (*chain)->pprev_timeout = &pcb->next_timeout; + } + *chain = pcb; + pcb->pprev_timeout = chain; +} + + +static void +pxping_timeout_del(struct pxping *pxping, struct ping_pcb *pcb) +{ + LWIP_UNUSED_ARG(pxping); + + LWIP_ASSERT1(pcb->pprev_timeout != NULL); + if (pcb->next_timeout != NULL) { + pcb->next_timeout->pprev_timeout = pcb->pprev_timeout; + } + *pcb->pprev_timeout = pcb->next_timeout; + pcb->pprev_timeout = NULL; + pcb->next_timeout = NULL; +} + + +static void +pxping_pcb_register(struct pxping *pxping, struct ping_pcb *pcb) +{ + pcb->next = pxping->pcbs; + pxping->pcbs = pcb; + + pxping_timeout_add(pxping, pcb); +} + + +static void +pxping_pcb_deregister(struct pxping *pxping, struct ping_pcb *pcb) +{ + struct ping_pcb **p; + + for (p = &pxping->pcbs; *p != NULL; p = &(*p)->next) { + if (*p == pcb) { + *p = pcb->next; + pcb->next = NULL; + break; + } + } + + pxping_timeout_del(pxping, pcb); +} + + +static struct ping_pcb * +pxping_pcb_for_request(struct pxping *pxping, + int is_ipv6, ipX_addr_t *src, ipX_addr_t *dst, + u16_t guest_id) +{ + struct ping_pcb *pcb; + + /* on lwip thread, so no concurrent updates */ + for (pcb = pxping->pcbs; pcb != NULL; pcb = pcb->next) { + if (pcb->guest_id == guest_id + && pcb->is_ipv6 == is_ipv6 + && ipX_addr_cmp(is_ipv6, &pcb->dst, dst) + && ipX_addr_cmp(is_ipv6, &pcb->src, src)) + { + break; + } + } + + if (pcb == NULL) { + int mapped; + + pcb = pxping_pcb_allocate(pxping); + if (pcb == NULL) { + return NULL; + } + + pcb->is_ipv6 = is_ipv6; + ipX_addr_copy(is_ipv6, pcb->src, *src); + ipX_addr_copy(is_ipv6, pcb->dst, *dst); + + pcb->guest_id = guest_id; +#ifdef RT_OS_WINDOWS +# define random() (rand()) +#endif + pcb->host_id = random() & 0xffffUL; + + pcb->pprev_timeout = NULL; + pcb->next_timeout = NULL; + + if (is_ipv6) { + pcb->peer.sin6.sin6_family = AF_INET6; +#if HAVE_SA_LEN + pcb->peer.sin6.sin6_len = sizeof(pcb->peer.sin6); +#endif + pcb->peer.sin6.sin6_port = htons(IPPROTO_ICMPV6); + pcb->peer.sin6.sin6_flowinfo = 0; + mapped = pxremap_outbound_ip6((ip6_addr_t *)&pcb->peer.sin6.sin6_addr, + ipX_2_ip6(&pcb->dst)); + } + else { + pcb->peer.sin.sin_family = AF_INET; +#if HAVE_SA_LEN + pcb->peer.sin.sin_len = sizeof(pcb->peer.sin); +#endif + pcb->peer.sin.sin_port = htons(IPPROTO_ICMP); + mapped = pxremap_outbound_ip4((ip_addr_t *)&pcb->peer.sin.sin_addr, + ipX_2_ip(&pcb->dst)); + } + + if (mapped == PXREMAP_FAILED) { + free(pcb); + return NULL; + } + else { + pcb->is_mapped = (mapped == PXREMAP_MAPPED); + } + + pcb->timeout_slot = pxping->timeout_slot; + + sys_mutex_lock(&pxping->lock); + pxping_pcb_register(pxping, pcb); + sys_mutex_unlock(&pxping->lock); + + DPRINTF(("ping %p: %R[ping_pcb] - created\n", pcb, pcb)); + + pxping_timer_needed(pxping); + } + else { + /* just bump up expiration timeout lazily */ + DPRINTF(("ping %p: %R[ping_pcb] - slot %d -> %d\n", + pcb, pcb, + (unsigned int)pcb->timeout_slot, + (unsigned int)pxping->timeout_slot)); + pcb->timeout_slot = pxping->timeout_slot; + } + + return pcb; +} + + +/* GCC 12.2.1 complains that array subscript is partly outside + * of array bounds in expansion of ipX_addr_cmp. */ +#if RT_GNUC_PREREQ(12, 0) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Warray-bounds" +#endif +/** + * Called on pollmgr thread. Caller must do the locking since caller + * is going to use the returned pcb, which needs to be protected from + * being expired by pxping_timer() on lwip thread. + */ +static struct ping_pcb * +pxping_pcb_for_reply(struct pxping *pxping, + int is_ipv6, ipX_addr_t *dst, u16_t host_id) +{ + struct ping_pcb *pcb; + + for (pcb = pxping->pcbs; pcb != NULL; pcb = pcb->next) { + if (pcb->host_id == host_id + && pcb->is_ipv6 == is_ipv6 + /* XXX: allow broadcast pings? */ + && ipX_addr_cmp(is_ipv6, &pcb->dst, dst)) + { + return pcb; + } + } + + return NULL; +} +#if RT_GNUC_PREREQ(12, 0) +# pragma GCC diagnostic pop +#endif + + +static void +pxping_timer(void *arg) +{ + struct pxping *pxping = (struct pxping *)arg; + struct ping_pcb **chain, *pcb; + + pxping->timer_active = 0; + + /* + * New slot points to the list of pcbs to check for expiration. + */ + LWIP_ASSERT1(pxping->timeout_slot < TIMEOUT); + if (++pxping->timeout_slot == TIMEOUT) { + pxping->timeout_slot = 0; + } + + chain = &pxping->timeout_list[pxping->timeout_slot]; + pcb = *chain; + + /* protect from pollmgr concurrent reads */ + sys_mutex_lock(&pxping->lock); + + while (pcb != NULL) { + struct ping_pcb *xpcb = pcb; + pcb = pcb->next_timeout; + + if (xpcb->timeout_slot == pxping->timeout_slot) { + /* expired */ + pxping_pcb_deregister(pxping, xpcb); + pxping_pcb_delete(pxping, xpcb); + } + else { + /* + * If there was another request, we updated timeout_slot + * but delayed actually moving the pcb until now. + */ + pxping_timeout_del(pxping, xpcb); /* from current slot */ + pxping_timeout_add(pxping, xpcb); /* to new slot */ + } + } + + sys_mutex_unlock(&pxping->lock); + pxping_timer_needed(pxping); +} + + +static void +pxping_timer_needed(struct pxping *pxping) +{ + if (!pxping->timer_active && pxping->pcbs != NULL) { + pxping->timer_active = 1; + sys_timeout(1 * 1000, pxping_timer, pxping); + } +} + + +static int +pxping_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxping *pxping; + + pxping = (struct pxping *)handler->data; + LWIP_ASSERT1(fd == pxping->sock4 || fd == pxping->sock6); + + if (revents & ~(POLLIN|POLLERR)) { + DPRINTF0(("%s: unexpected revents 0x%x\n", __func__, revents)); + return POLLIN; + } + + if (revents & POLLERR) { + int sockerr = -1; + socklen_t optlen = (socklen_t)sizeof(sockerr); + int status; + + status = getsockopt(fd, SOL_SOCKET, + SO_ERROR, (char *)&sockerr, &optlen); + if (status < 0) { + DPRINTF(("%s: sock %d: SO_ERROR failed: %R[sockerr]\n", + __func__, fd, SOCKERRNO())); + } + else { + DPRINTF(("%s: sock %d: %R[sockerr]\n", + __func__, fd, sockerr)); + } + } + + if ((revents & POLLIN) == 0) { + return POLLIN; + } + + if (fd == pxping->sock4) { + pxping_pmgr_icmp4(pxping); + } + else /* fd == pxping->sock6 */ { + pxping_pmgr_icmp6(pxping); + } + + return POLLIN; +} + + +/** + * Process incoming ICMP message for the host. + * NB: we will get a lot of spam here and have to sift through it. + */ +static void +pxping_pmgr_icmp4(struct pxping *pxping) +{ + struct sockaddr_in sin; + socklen_t salen = sizeof(sin); + ssize_t nread; + struct ip_hdr *iph; + struct icmp_echo_hdr *icmph; + u16_t iplen, ipoff; + + memset(&sin, 0, sizeof(sin)); + + /* + * Reads from raw IPv4 sockets deliver complete IP datagrams with + * IP header included. + */ + nread = recvfrom(pxping->sock4, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0, + (struct sockaddr *)&sin, &salen); + if (nread < 0) { + DPRINTF(("%s: %R[sockerr]\n", __func__, SOCKERRNO())); + return; + } + + if (nread < IP_HLEN) { + DPRINTF2(("%s: read %d bytes, IP header truncated\n", + __func__, (unsigned int)nread)); + return; + } + + iph = (struct ip_hdr *)pollmgr_udpbuf; + + /* match version */ + if (IPH_V(iph) != 4) { + DPRINTF2(("%s: unexpected IP version %d\n", __func__, IPH_V(iph))); + return; + } + + /* no fragmentation */ + ipoff = IPH_OFFSET(iph); +#if defined(RT_OS_DARWIN) + /* darwin reports IPH_OFFSET in host byte order */ + ipoff = htons(ipoff); + IPH_OFFSET_SET(iph, ipoff); +#endif + if ((ipoff & PP_HTONS(IP_OFFMASK | IP_MF)) != 0) { + DPRINTF2(("%s: dropping fragmented datagram (0x%04x)\n", + __func__, ntohs(ipoff))); + return; + } + + /* no options */ + if (IPH_HL(iph) * 4 != IP_HLEN) { + DPRINTF2(("%s: dropping datagram with options (IP header length %d)\n", + __func__, IPH_HL(iph) * 4)); + return; + } + + if (IPH_PROTO(iph) != IP_PROTO_ICMP) { + DPRINTF2(("%s: unexpected protocol %d\n", __func__, IPH_PROTO(iph))); + return; + } + + iplen = IPH_LEN(iph); +#if !defined(RT_OS_DARWIN) + /* darwin reports IPH_LEN in host byte order */ + iplen = ntohs(iplen); +#endif +#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) + /* darwin and solaris change IPH_LEN to payload length only */ + iplen += IP_HLEN; /* we verified there are no options */ + IPH_LEN_SET(iph, htons(iplen)); +#endif + if (nread < iplen) { + DPRINTF2(("%s: read %d bytes but total length is %d bytes\n", + __func__, (unsigned int)nread, (unsigned int)iplen)); + return; + } + + if (iplen < IP_HLEN + ICMP_HLEN) { + DPRINTF2(("%s: IP length %d bytes, ICMP header truncated\n", + __func__, iplen)); + return; + } + + icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN); + if (ICMPH_TYPE(icmph) == ICMP_ER) { + pxping_pmgr_icmp4_echo(pxping, iplen, &sin); + } + else if (ICMPH_TYPE(icmph) == ICMP_DUR || ICMPH_TYPE(icmph) == ICMP_TE) { + pxping_pmgr_icmp4_error(pxping, iplen, &sin); + } +#if 1 + else { + DPRINTF2(("%s: ignoring ICMP type %d\n", __func__, ICMPH_TYPE(icmph))); + } +#endif +} + + +/** + * Check if this incoming ICMP echo reply is for one of our pings and + * forward it to the guest. + */ +static void +pxping_pmgr_icmp4_echo(struct pxping *pxping, + u16_t iplen, struct sockaddr_in *peer) +{ + struct ip_hdr *iph; + struct icmp_echo_hdr *icmph; + u16_t id, seq; + ip_addr_t guest_ip, target_ip; + int mapped; + struct ping_pcb *pcb; + u16_t guest_id; + u16_t oipsum; + u32_t sum; + RT_NOREF(peer); + + iph = (struct ip_hdr *)pollmgr_udpbuf; + icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN); + + id = icmph->id; + seq = icmph->seqno; + + DPRINTF(("<--- PING %RTnaipv4 id 0x%x seq %d\n", + peer->sin_addr.s_addr, ntohs(id), ntohs(seq))); + + /* + * Is this a reply to one of our pings? + */ + + ip_addr_copy(target_ip, iph->src); + mapped = pxremap_inbound_ip4(&target_ip, &target_ip); + if (mapped == PXREMAP_FAILED) { + return; + } + if (mapped == PXREMAP_ASIS && IPH_TTL(iph) == 1) { + DPRINTF2(("%s: dropping packet with ttl 1\n", __func__)); + return; + } + + sys_mutex_lock(&pxping->lock); + pcb = pxping_pcb_for_reply(pxping, 0, ip_2_ipX(&target_ip), id); + if (pcb == NULL) { + sys_mutex_unlock(&pxping->lock); + DPRINTF2(("%s: no match\n", __func__)); + return; + } + + DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb)); + + /* save info before unlocking since pcb may expire */ + ip_addr_copy(guest_ip, *ipX_2_ip(&pcb->src)); + guest_id = pcb->guest_id; + + sys_mutex_unlock(&pxping->lock); + + + /* + * Rewrite headers and forward to guest. + */ + + /* rewrite ICMP echo header */ + sum = (u16_t)~icmph->chksum; + sum += chksum_update_16(&icmph->id, guest_id); + sum = FOLD_U32T(sum); + icmph->chksum = ~sum; + + /* rewrite IP header */ + oipsum = IPH_CHKSUM(iph); + if (oipsum == 0) { + /* Solaris doesn't compute checksum for local replies */ + ip_addr_copy(iph->dest, guest_ip); + if (mapped == PXREMAP_MAPPED) { + ip_addr_copy(iph->src, target_ip); + } + else { + IPH_TTL_SET(iph, IPH_TTL(iph) - 1); + } + IPH_CHKSUM_SET(iph, inet_chksum(iph, ntohs(IPH_LEN(iph)))); + } + else { + sum = (u16_t)~oipsum; + sum += chksum_update_32((u32_t *)&iph->dest, + ip4_addr_get_u32(&guest_ip)); + if (mapped == PXREMAP_MAPPED) { + sum += chksum_update_32((u32_t *)&iph->src, + ip4_addr_get_u32(&target_ip)); + } + else { + IPH_TTL_SET(iph, IPH_TTL(iph) - 1); + sum += PP_NTOHS(~0x0100); + } + sum = FOLD_U32T(sum); + IPH_CHKSUM_SET(iph, ~sum); + } + + pxping_pmgr_forward_inbound(pxping, iplen); +} + + +/** + * Check if this incoming ICMP error (destination unreachable or time + * exceeded) is about one of our pings and forward it to the guest. + */ +static void +pxping_pmgr_icmp4_error(struct pxping *pxping, + u16_t iplen, struct sockaddr_in *peer) +{ + struct ip_hdr *iph, *oiph; + struct icmp_echo_hdr *icmph, *oicmph; + u16_t oipoff, oiphlen, oiplen; + u16_t id, seq; + ip_addr_t guest_ip, target_ip, error_ip; + int target_mapped, error_mapped; + struct ping_pcb *pcb; + u16_t guest_id; + u32_t sum; + RT_NOREF(peer); + + iph = (struct ip_hdr *)pollmgr_udpbuf; + icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN); + + /* + * Inner IP datagram is not checked by the kernel and may be + * anything, possibly malicious. + */ + + oipoff = IP_HLEN + ICMP_HLEN; + oiplen = iplen - oipoff; /* NB: truncated length, not IPH_LEN(oiph) */ + if (oiplen < IP_HLEN) { + DPRINTF2(("%s: original datagram truncated to %d bytes\n", + __func__, oiplen)); + } + + /* IP header of the original message */ + oiph = (struct ip_hdr *)(pollmgr_udpbuf + oipoff); + + /* match version */ + if (IPH_V(oiph) != 4) { + DPRINTF2(("%s: unexpected IP version %d\n", __func__, IPH_V(oiph))); + return; + } + + /* can't match fragments except the first one */ + if ((IPH_OFFSET(oiph) & PP_HTONS(IP_OFFMASK)) != 0) { + DPRINTF2(("%s: ignoring fragment with offset %d\n", + __func__, ntohs(IPH_OFFSET(oiph) & PP_HTONS(IP_OFFMASK)))); + return; + } + + if (IPH_PROTO(oiph) != IP_PROTO_ICMP) { +#if 0 + /* don't spam with every "destination unreachable" in the system */ + DPRINTF2(("%s: ignoring protocol %d\n", __func__, IPH_PROTO(oiph))); +#endif + return; + } + + oiphlen = IPH_HL(oiph) * 4; + if (oiplen < oiphlen + ICMP_HLEN) { + DPRINTF2(("%s: original datagram truncated to %d bytes\n", + __func__, oiplen)); + return; + } + + oicmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + oipoff + oiphlen); + if (ICMPH_TYPE(oicmph) != ICMP_ECHO) { + DPRINTF2(("%s: ignoring ICMP error for original ICMP type %d\n", + __func__, ICMPH_TYPE(oicmph))); + return; + } + + id = oicmph->id; + seq = oicmph->seqno; + + DPRINTF2(("%s: ping %RTnaipv4 id 0x%x seq %d", + __func__, ip4_addr_get_u32(&oiph->dest), ntohs(id), ntohs(seq))); + if (ICMPH_TYPE(icmph) == ICMP_DUR) { + DPRINTF2((" unreachable (code %d)\n", ICMPH_CODE(icmph))); + } + else { + DPRINTF2((" time exceeded\n")); + } + + + /* + * Is the inner (failed) datagram one of our pings? + */ + + ip_addr_copy(target_ip, oiph->dest); /* inner (failed) */ + target_mapped = pxremap_inbound_ip4(&target_ip, &target_ip); + if (target_mapped == PXREMAP_FAILED) { + return; + } + + sys_mutex_lock(&pxping->lock); + pcb = pxping_pcb_for_reply(pxping, 0, ip_2_ipX(&target_ip), id); + if (pcb == NULL) { + sys_mutex_unlock(&pxping->lock); + DPRINTF2(("%s: no match\n", __func__)); + return; + } + + DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb)); + + /* save info before unlocking since pcb may expire */ + ip_addr_copy(guest_ip, *ipX_2_ip(&pcb->src)); + guest_id = pcb->guest_id; + + sys_mutex_unlock(&pxping->lock); + + + /* + * Rewrite both inner and outer headers and forward to guest. + * Note that the checksum of the outer ICMP error message is + * preserved by the changes we do to inner headers. + */ + + ip_addr_copy(error_ip, iph->src); /* node that reports the error */ + error_mapped = pxremap_inbound_ip4(&error_ip, &error_ip); + if (error_mapped == PXREMAP_FAILED) { + return; + } + if (error_mapped == PXREMAP_ASIS && IPH_TTL(iph) == 1) { + DPRINTF2(("%s: dropping packet with ttl 1\n", __func__)); + return; + } + + /* rewrite inner ICMP echo header */ + sum = (u16_t)~oicmph->chksum; + sum += chksum_update_16(&oicmph->id, guest_id); + sum = FOLD_U32T(sum); + oicmph->chksum = ~sum; + + /* rewrite inner IP header */ +#if defined(RT_OS_DARWIN) + /* darwin converts inner length to host byte order too */ + IPH_LEN_SET(oiph, htons(IPH_LEN(oiph))); +#endif + sum = (u16_t)~IPH_CHKSUM(oiph); + sum += chksum_update_32((u32_t *)&oiph->src, ip4_addr_get_u32(&guest_ip)); + if (target_mapped == PXREMAP_MAPPED) { + sum += chksum_update_32((u32_t *)&oiph->dest, ip4_addr_get_u32(&target_ip)); + } + sum = FOLD_U32T(sum); + IPH_CHKSUM_SET(oiph, ~sum); + + /* rewrite outer IP header */ + sum = (u16_t)~IPH_CHKSUM(iph); + sum += chksum_update_32((u32_t *)&iph->dest, ip4_addr_get_u32(&guest_ip)); + if (error_mapped == PXREMAP_MAPPED) { + sum += chksum_update_32((u32_t *)&iph->src, ip4_addr_get_u32(&error_ip)); + } + else { + IPH_TTL_SET(iph, IPH_TTL(iph) - 1); + sum += PP_NTOHS(~0x0100); + } + sum = FOLD_U32T(sum); + IPH_CHKSUM_SET(iph, ~sum); + + pxping_pmgr_forward_inbound(pxping, iplen); +} + + +/** + * Process incoming ICMPv6 message for the host. + * NB: we will get a lot of spam here and have to sift through it. + */ +static void +pxping_pmgr_icmp6(struct pxping *pxping) +{ +#ifndef RT_OS_WINDOWS + struct msghdr mh; + ssize_t nread; +#else + WSAMSG mh; + DWORD nread; +#endif + IOVEC iov[1]; + static u8_t cmsgbuf[128]; + struct cmsghdr *cmh; + struct sockaddr_in6 sin6; + /* socklen_t salen = sizeof(sin6); - unused */ + struct icmp6_echo_hdr *icmph; + struct in6_pktinfo *pktinfo; + int hopl, tclass; +#ifdef RT_OS_WINDOWS + int status; +#endif + + /* + * Reads from raw IPv6 sockets deliver only the payload. Full + * headers are available via recvmsg(2)/cmsg(3). + */ + IOVEC_SET_BASE(iov[0], pollmgr_udpbuf); + IOVEC_SET_LEN(iov[0], sizeof(pollmgr_udpbuf)); + + memset(&mh, 0, sizeof(mh)); +#ifndef RT_OS_WINDOWS + mh.msg_name = &sin6; + mh.msg_namelen = sizeof(sin6); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = cmsgbuf; + mh.msg_controllen = sizeof(cmsgbuf); + mh.msg_flags = 0; + + nread = recvmsg(pxping->sock6, &mh, 0); + if (nread < 0) { + DPRINTF(("%s: %R[sockerr]\n", __func__, SOCKERRNO())); + return; + } +#else /* RT_OS_WINDOWS */ + mh.name = (LPSOCKADDR)&sin6; + mh.namelen = sizeof(sin6); + mh.lpBuffers = iov; + mh.dwBufferCount = 1; + mh.Control.buf = cmsgbuf; + mh.Control.len = sizeof(cmsgbuf); + mh.dwFlags = 0; + + status = (*pxping->pfWSARecvMsg6)(pxping->sock6, &mh, &nread, NULL, NULL); + if (status == SOCKET_ERROR) { + DPRINTF2(("%s: error %d\n", __func__, WSAGetLastError())); + return; + } +#endif + + icmph = (struct icmp6_echo_hdr *)pollmgr_udpbuf; + + DPRINTF2(("%s: %RTnaipv6 ICMPv6: ", __func__, &sin6.sin6_addr)); + + if (icmph->type == ICMP6_TYPE_EREP) { + DPRINTF2(("echo reply %04x %u\n", + (unsigned int)icmph->id, (unsigned int)icmph->seqno)); + } + else { /* XXX */ + if (icmph->type == ICMP6_TYPE_EREQ) { + DPRINTF2(("echo request %04x %u\n", + (unsigned int)icmph->id, (unsigned int)icmph->seqno)); + } + else if (icmph->type == ICMP6_TYPE_DUR) { + DPRINTF2(("destination unreachable\n")); + } + else if (icmph->type == ICMP6_TYPE_PTB) { + DPRINTF2(("packet too big\n")); + } + else if (icmph->type == ICMP6_TYPE_TE) { + DPRINTF2(("time exceeded\n")); + } + else if (icmph->type == ICMP6_TYPE_PP) { + DPRINTF2(("parameter problem\n")); + } + else { + DPRINTF2(("type %d len %u\n", icmph->type, (unsigned int)nread)); + } + + if (icmph->type >= ICMP6_TYPE_EREQ) { + return; /* informational message */ + } + } + + pktinfo = NULL; + hopl = -1; + tclass = -1; + for (cmh = CMSG_FIRSTHDR(&mh); cmh != NULL; cmh = CMSG_NXTHDR(&mh, cmh)) { + if (cmh->cmsg_len == 0) + break; + + if (cmh->cmsg_level == IPPROTO_IPV6 + && cmh->cmsg_type == IPV6_HOPLIMIT + && cmh->cmsg_len == CMSG_LEN(sizeof(int))) + { + hopl = *(int *)CMSG_DATA(cmh); + DPRINTF2(("hoplimit = %d\n", hopl)); + } + + if (cmh->cmsg_level == IPPROTO_IPV6 + && cmh->cmsg_type == IPV6_PKTINFO + && cmh->cmsg_len == CMSG_LEN(sizeof(struct in6_pktinfo))) + { + pktinfo = (struct in6_pktinfo *)CMSG_DATA(cmh); + DPRINTF2(("pktinfo found\n")); + } + } + + if (pktinfo == NULL) { + /* + * ip6_output_if() doesn't do checksum for us so we need to + * manually recompute it - for this we must know the + * destination address of the pseudo-header that we will + * rewrite with guest's address. (TODO: yeah, yeah, we can + * compute it from scratch...) + */ + DPRINTF2(("%s: unable to get pktinfo\n", __func__)); + return; + } + + if (hopl < 0) { + hopl = LWIP_ICMP6_HL; + } + + if (icmph->type == ICMP6_TYPE_EREP) { + pxping_pmgr_icmp6_echo(pxping, + (ip6_addr_t *)&sin6.sin6_addr, + (ip6_addr_t *)&pktinfo->ipi6_addr, + hopl, tclass, (u16_t)nread); + } + else if (icmph->type < ICMP6_TYPE_EREQ) { + pxping_pmgr_icmp6_error(pxping, + (ip6_addr_t *)&sin6.sin6_addr, + (ip6_addr_t *)&pktinfo->ipi6_addr, + hopl, tclass, (u16_t)nread); + } +} + + +/** + * Check if this incoming ICMPv6 echo reply is for one of our pings + * and forward it to the guest. + */ +static void +pxping_pmgr_icmp6_echo(struct pxping *pxping, + ip6_addr_t *src, ip6_addr_t *dst, + int hopl, int tclass, u16_t icmplen) +{ + struct icmp6_echo_hdr *icmph; + ip6_addr_t guest_ip, target_ip; + int mapped; + struct ping_pcb *pcb; + u16_t id, guest_id; + u32_t sum; + + ip6_addr_copy(target_ip, *src); + mapped = pxremap_inbound_ip6(&target_ip, &target_ip); + if (mapped == PXREMAP_FAILED) { + return; + } + else if (mapped == PXREMAP_ASIS) { + if (hopl == 1) { + DPRINTF2(("%s: dropping packet with ttl 1\n", __func__)); + return; + } + --hopl; + } + + icmph = (struct icmp6_echo_hdr *)pollmgr_udpbuf; + id = icmph->id; + + sys_mutex_lock(&pxping->lock); + pcb = pxping_pcb_for_reply(pxping, 1, ip6_2_ipX(&target_ip), id); + if (pcb == NULL) { + sys_mutex_unlock(&pxping->lock); + DPRINTF2(("%s: no match\n", __func__)); + return; + } + + DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb)); + + /* save info before unlocking since pcb may expire */ + ip6_addr_copy(guest_ip, *ipX_2_ip6(&pcb->src)); + guest_id = pcb->guest_id; + + sys_mutex_unlock(&pxping->lock); + + /* rewrite ICMPv6 echo header */ + sum = (u16_t)~icmph->chksum; + sum += chksum_update_16(&icmph->id, guest_id); + sum += chksum_delta_ipv6(dst, &guest_ip); /* pseudo */ + if (mapped) { + sum += chksum_delta_ipv6(src, &target_ip); /* pseudo */ + } + sum = FOLD_U32T(sum); + icmph->chksum = ~sum; + + pxping_pmgr_forward_inbound6(pxping, + &target_ip, /* echo reply src */ + &guest_ip, /* echo reply dst */ + hopl, tclass, icmplen); +} + + +/** + * Check if this incoming ICMPv6 error is about one of our pings and + * forward it to the guest. + */ +static void +pxping_pmgr_icmp6_error(struct pxping *pxping, + ip6_addr_t *src, ip6_addr_t *dst, + int hopl, int tclass, u16_t icmplen) +{ + struct icmp6_hdr *icmph; + u8_t *bufptr; + size_t buflen, hlen; + int proto; + struct ip6_hdr *oiph; + struct icmp6_echo_hdr *oicmph; + struct ping_pcb *pcb; + ip6_addr_t guest_ip, target_ip, error_ip; + int target_mapped, error_mapped; + u16_t guest_id; + u32_t sum; + + icmph = (struct icmp6_hdr *)pollmgr_udpbuf; + + /* + * Inner IP datagram is not checked by the kernel and may be + * anything, possibly malicious. + */ + oiph = NULL; + oicmph = NULL; + + bufptr = pollmgr_udpbuf; + buflen = icmplen; + + hlen = sizeof(*icmph); + proto = IP6_NEXTH_ENCAPS; /* i.e. IPv6, lwIP's name is unfortuate */ + for (;;) { + if (hlen > buflen) { + DPRINTF2(("truncated datagram inside ICMPv6 error message is too short\n")); + return; + } + buflen -= hlen; + bufptr += hlen; + + if (proto == IP6_NEXTH_ENCAPS && oiph == NULL) { /* outermost IPv6 */ + oiph = (struct ip6_hdr *)bufptr; + if (IP6H_V(oiph) != 6) { + DPRINTF2(("%s: unexpected IP version %d\n", __func__, IP6H_V(oiph))); + return; + } + + proto = IP6H_NEXTH(oiph); + hlen = IP6_HLEN; + } + else if (proto == IP6_NEXTH_ICMP6) { + oicmph = (struct icmp6_echo_hdr *)bufptr; + break; + } + else if (proto == IP6_NEXTH_ROUTING + || proto == IP6_NEXTH_HOPBYHOP + || proto == IP6_NEXTH_DESTOPTS) + { + proto = bufptr[0]; + hlen = (bufptr[1] + 1) * 8; + } + else { + DPRINTF2(("%s: stopping at protocol %d\n", __func__, proto)); + break; + } + } + + if (oiph == NULL || oicmph == NULL) { + return; + } + + if (buflen < sizeof(*oicmph)) { + DPRINTF2(("%s: original ICMPv6 is truncated too short\n", __func__)); + return; + } + + if (oicmph->type != ICMP6_TYPE_EREQ) { + DPRINTF2(("%s: ignoring original ICMPv6 type %d\n", __func__, oicmph->type)); + return; + } + + ip6_addr_copy(target_ip, oiph->dest); /* inner (failed) */ + target_mapped = pxremap_inbound_ip6(&target_ip, &target_ip); + if (target_mapped == PXREMAP_FAILED) { + return; + } + + sys_mutex_lock(&pxping->lock); + pcb = pxping_pcb_for_reply(pxping, 1, ip6_2_ipX(&target_ip), oicmph->id); + if (pcb == NULL) { + sys_mutex_unlock(&pxping->lock); + DPRINTF2(("%s: no match\n", __func__)); + return; + } + + DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb)); + + /* save info before unlocking since pcb may expire */ + ip6_addr_copy(guest_ip, *ipX_2_ip6(&pcb->src)); + guest_id = pcb->guest_id; + + sys_mutex_unlock(&pxping->lock); + + + /* + * Rewrite inner and outer headers and forward to guest. Note + * that IPv6 has no IP header checksum, but uses pseudo-header for + * ICMPv6, so we update both in one go, adjusting ICMPv6 checksum + * as we rewrite IP header. + */ + + ip6_addr_copy(error_ip, *src); /* node that reports the error */ + error_mapped = pxremap_inbound_ip6(&error_ip, &error_ip); + if (error_mapped == PXREMAP_FAILED) { + return; + } + if (error_mapped == PXREMAP_ASIS && hopl == 1) { + DPRINTF2(("%s: dropping packet with ttl 1\n", __func__)); + return; + } + + /* rewrite inner ICMPv6 echo header and inner IPv6 header */ + sum = (u16_t)~oicmph->chksum; + sum += chksum_update_16(&oicmph->id, guest_id); + sum += chksum_update_ipv6((ip6_addr_t *)&oiph->src, &guest_ip); + if (target_mapped) { + sum += chksum_delta_ipv6((ip6_addr_t *)&oiph->dest, &target_ip); + } + sum = FOLD_U32T(sum); + oicmph->chksum = ~sum; + + /* rewrite outer ICMPv6 error header */ + sum = (u16_t)~icmph->chksum; + sum += chksum_delta_ipv6(dst, &guest_ip); /* pseudo */ + if (error_mapped) { + sum += chksum_delta_ipv6(src, &error_ip); /* pseudo */ + } + sum = FOLD_U32T(sum); + icmph->chksum = ~sum; + + pxping_pmgr_forward_inbound6(pxping, + &error_ip, /* error src */ + &guest_ip, /* error dst */ + hopl, tclass, icmplen); +} + + +/** + * Hand off ICMP datagram to the lwip thread where it will be + * forwarded to the guest. + * + * We no longer need ping_pcb. The pcb may get expired on the lwip + * thread, but we have already patched necessary information into the + * datagram. + */ +static void +pxping_pmgr_forward_inbound(struct pxping *pxping, u16_t iplen) +{ + struct pbuf *p; + struct ping_msg *msg; + err_t error; + + p = pbuf_alloc(PBUF_LINK, iplen, PBUF_RAM); + if (p == NULL) { + DPRINTF(("%s: pbuf_alloc(%d) failed\n", + __func__, (unsigned int)iplen)); + return; + } + + error = pbuf_take(p, pollmgr_udpbuf, iplen); + if (error != ERR_OK) { + DPRINTF(("%s: pbuf_take(%d) failed\n", + __func__, (unsigned int)iplen)); + pbuf_free(p); + return; + } + + msg = (struct ping_msg *)malloc(sizeof(*msg)); + if (msg == NULL) { + pbuf_free(p); + return; + } + + msg->msg.type = TCPIP_MSG_CALLBACK_STATIC; + msg->msg.sem = NULL; + msg->msg.msg.cb.function = pxping_pcb_forward_inbound; + msg->msg.msg.cb.ctx = (void *)msg; + + msg->pxping = pxping; + msg->p = p; + + proxy_lwip_post(&msg->msg); +} + + +static void +pxping_pcb_forward_inbound(void *arg) +{ + struct ping_msg *msg = (struct ping_msg *)arg; + err_t error; + + LWIP_ASSERT1(msg != NULL); + LWIP_ASSERT1(msg->pxping != NULL); + LWIP_ASSERT1(msg->p != NULL); + + error = ip_raw_output_if(msg->p, msg->pxping->netif); + if (error != ERR_OK) { + DPRINTF(("%s: ip_output_if: %s\n", + __func__, proxy_lwip_strerr(error))); + } + pbuf_free(msg->p); + free(msg); +} + + +static void +pxping_pmgr_forward_inbound6(struct pxping *pxping, + ip6_addr_t *src, ip6_addr_t *dst, + u8_t hopl, u8_t tclass, + u16_t icmplen) +{ + struct pbuf *p; + struct ping6_msg *msg; + + err_t error; + + p = pbuf_alloc(PBUF_IP, icmplen, PBUF_RAM); + if (p == NULL) { + DPRINTF(("%s: pbuf_alloc(%d) failed\n", + __func__, (unsigned int)icmplen)); + return; + } + + error = pbuf_take(p, pollmgr_udpbuf, icmplen); + if (error != ERR_OK) { + DPRINTF(("%s: pbuf_take(%d) failed\n", + __func__, (unsigned int)icmplen)); + pbuf_free(p); + return; + } + + msg = (struct ping6_msg *)malloc(sizeof(*msg)); + if (msg == NULL) { + pbuf_free(p); + return; + } + + msg->msg.type = TCPIP_MSG_CALLBACK_STATIC; + msg->msg.sem = NULL; + msg->msg.msg.cb.function = pxping_pcb_forward_inbound6; + msg->msg.msg.cb.ctx = (void *)msg; + + msg->pxping = pxping; + msg->p = p; + ip6_addr_copy(msg->src, *src); + ip6_addr_copy(msg->dst, *dst); + msg->hopl = hopl; + msg->tclass = tclass; + + proxy_lwip_post(&msg->msg); +} + + +static void +pxping_pcb_forward_inbound6(void *arg) +{ + struct ping6_msg *msg = (struct ping6_msg *)arg; + err_t error; + + LWIP_ASSERT1(msg != NULL); + LWIP_ASSERT1(msg->pxping != NULL); + LWIP_ASSERT1(msg->p != NULL); + + error = ip6_output_if(msg->p, + &msg->src, &msg->dst, msg->hopl, msg->tclass, + IP6_NEXTH_ICMP6, msg->pxping->netif); + if (error != ERR_OK) { + DPRINTF(("%s: ip6_output_if: %s\n", + __func__, proxy_lwip_strerr(error))); + } + pbuf_free(msg->p); + free(msg); +} diff --git a/src/VBox/NetworkServices/NAT/pxping_win.c b/src/VBox/NetworkServices/NAT/pxping_win.c new file mode 100644 index 00000000..2c6da8fd --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxping_win.c @@ -0,0 +1,672 @@ +/* $Id: pxping_win.c $ */ +/** @file + * NAT Network - ping proxy, Windows ICMP API version. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" +#include "proxy.h" +#include "pxremap.h" + +#include "lwip/ip.h" +#include "lwip/icmp.h" +#include "lwip/inet_chksum.h" + +/* XXX: lwIP names conflict with winsock <iphlpapi.h> */ +#undef IP_STATS +#undef ICMP_STATS +#undef TCP_STATS +#undef UDP_STATS +#undef IP6_STATS + +#include <winternl.h> /* for PIO_APC_ROUTINE &c */ +#ifndef PIO_APC_ROUTINE_DEFINED +# define PIO_APC_ROUTINE_DEFINED 1 +#endif +#include <iprt/win/iphlpapi.h> +#include <icmpapi.h> + +#include <stdio.h> + + +struct pxping { + /* + * We use single ICMP handle for all pings. This means that all + * proxied pings will have the same id and share single sequence + * of sequence numbers. + */ + HANDLE hdl4; + HANDLE hdl6; + + struct netif *netif; + + /* + * On Windows XP and Windows Server 2003 IcmpSendEcho2() callback + * is FARPROC, but starting from Vista it's PIO_APC_ROUTINE with + * two extra arguments. Callbacks use WINAPI (stdcall) calling + * convention with callee responsible for popping the arguments, + * so to avoid stack corruption we check windows version at run + * time and provide correct callback. + */ + PIO_APC_ROUTINE pfnCallback4; + PIO_APC_ROUTINE pfnCallback6; +}; + + +struct pong4 { + struct netif *netif; + + struct ip_hdr reqiph; + struct icmp_echo_hdr reqicmph; + + size_t bufsize; + u8_t buf[1]; +}; + + +struct pong6 { + struct netif *netif; + + ip6_addr_t reqsrc; + struct icmp6_echo_hdr reqicmph; + size_t reqsize; + + size_t bufsize; + u8_t buf[1]; +}; + + +static void pxping_recv4(void *arg, struct pbuf *p); +static void pxping_recv6(void *arg, struct pbuf *p); + +static VOID WINAPI pxping_icmp4_callback_old(void *); +static VOID WINAPI pxping_icmp4_callback_apc(void *, PIO_STATUS_BLOCK, ULONG); +static void pxping_icmp4_callback(struct pong4 *pong); + +static VOID WINAPI pxping_icmp6_callback_old(void *); +static VOID WINAPI pxping_icmp6_callback_apc(void *, PIO_STATUS_BLOCK, ULONG); +static void pxping_icmp6_callback(struct pong6 *pong); + + +struct pxping g_pxping; + + +err_t +pxping_init(struct netif *netif, SOCKET sock4, SOCKET sock6) +{ + OSVERSIONINFO osvi; + int status; + + LWIP_UNUSED_ARG(sock4); + LWIP_UNUSED_ARG(sock6); + + ZeroMemory(&osvi, sizeof(OSVERSIONINFO)); + osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + status = GetVersionEx(&osvi); + if (status == 0) { + return ERR_ARG; + } + + if (osvi.dwMajorVersion >= 6) { + g_pxping.pfnCallback4 = pxping_icmp4_callback_apc; + g_pxping.pfnCallback6 = pxping_icmp6_callback_apc; + } + else { + g_pxping.pfnCallback4 = (PIO_APC_ROUTINE)pxping_icmp4_callback_old; + g_pxping.pfnCallback6 = (PIO_APC_ROUTINE)pxping_icmp6_callback_old; + } + + + g_pxping.hdl4 = IcmpCreateFile(); + if (g_pxping.hdl4 != INVALID_HANDLE_VALUE) { + ping_proxy_accept(pxping_recv4, &g_pxping); + } + else { + DPRINTF(("IcmpCreateFile: error %d\n", GetLastError())); + } + + g_pxping.hdl6 = Icmp6CreateFile(); + if (g_pxping.hdl6 != INVALID_HANDLE_VALUE) { + ping6_proxy_accept(pxping_recv6, &g_pxping); + } + else { + DPRINTF(("Icmp6CreateFile: error %d\n", GetLastError())); + } + + if (g_pxping.hdl4 == INVALID_HANDLE_VALUE + && g_pxping.hdl6 == INVALID_HANDLE_VALUE) + { + return ERR_ARG; + } + + g_pxping.netif = netif; + + return ERR_OK; +} + + +/** + * ICMP Echo Request in pbuf "p" is to be proxied. + */ +static void +pxping_recv4(void *arg, struct pbuf *p) +{ + struct pxping *pxping = (struct pxping *)arg; + const struct ip_hdr *iph; + const struct icmp_echo_hdr *icmph; + u16_t iphlen; + size_t bufsize; + struct pong4 *pong; + IPAddr dst; + int mapped; + int ttl; + IP_OPTION_INFORMATION opts; + void *reqdata; + size_t reqsize; + int status; + + pong = NULL; + + iphlen = ip_current_header_tot_len(); + if (RT_UNLIKELY(iphlen != IP_HLEN)) { /* we don't do options */ + goto out; + } + + iph = (const struct ip_hdr *)ip_current_header(); + icmph = (const struct icmp_echo_hdr *)p->payload; + + mapped = pxremap_outbound_ip4((ip_addr_t *)&dst, (ip_addr_t *)&iph->dest); + if (RT_UNLIKELY(mapped == PXREMAP_FAILED)) { + goto out; + } + + ttl = IPH_TTL(iph); + if (mapped == PXREMAP_ASIS) { + if (RT_UNLIKELY(ttl == 1)) { + status = pbuf_header(p, iphlen); /* back to IP header */ + if (RT_LIKELY(status == 0)) { + icmp_time_exceeded(p, ICMP_TE_TTL); + } + goto out; + } + --ttl; + } + + status = pbuf_header(p, -(u16_t)sizeof(*icmph)); /* to ping payload */ + if (RT_UNLIKELY(status != 0)) { + goto out; + } + + bufsize = sizeof(ICMP_ECHO_REPLY); + if (p->tot_len < sizeof(IO_STATUS_BLOCK) + sizeof(struct icmp_echo_hdr)) + bufsize += sizeof(IO_STATUS_BLOCK) + sizeof(struct icmp_echo_hdr); + else + bufsize += p->tot_len; + bufsize += 16; /* whatever that is; empirically at least XP needs it */ + + pong = (struct pong4 *)malloc(RT_UOFFSETOF(struct pong4, buf) + bufsize); + if (RT_UNLIKELY(pong == NULL)) { + goto out; + } + pong->bufsize = bufsize; + pong->netif = pxping->netif; + + memcpy(&pong->reqiph, iph, sizeof(*iph)); + memcpy(&pong->reqicmph, icmph, sizeof(*icmph)); + + reqsize = p->tot_len; + if (p->next == NULL) { + /* single pbuf can be directly used as request data source */ + reqdata = p->payload; + } + else { + /* data from pbuf chain must be concatenated */ + pbuf_copy_partial(p, pong->buf, p->tot_len, 0); + reqdata = pong->buf; + } + + opts.Ttl = ttl; + opts.Tos = IPH_TOS(iph); /* affected by DisableUserTOSSetting key */ + opts.Flags = (IPH_OFFSET(iph) & PP_HTONS(IP_DF)) != 0 ? IP_FLAG_DF : 0; + opts.OptionsSize = 0; + opts.OptionsData = 0; + + status = IcmpSendEcho2(pxping->hdl4, NULL, + pxping->pfnCallback4, pong, + dst, reqdata, (WORD)reqsize, &opts, + pong->buf, (DWORD)pong->bufsize, + 5 * 1000 /* ms */); + + if (RT_UNLIKELY(status != 0)) { + DPRINTF(("IcmpSendEcho2: unexpected status %d\n", status)); + goto out; + } + if ((status = GetLastError()) != ERROR_IO_PENDING) { + int code; + + DPRINTF(("IcmpSendEcho2: error %d\n", status)); + switch (status) { + case ERROR_NETWORK_UNREACHABLE: + code = ICMP_DUR_NET; + break; + case ERROR_HOST_UNREACHABLE: + code = ICMP_DUR_HOST; + break; + default: + code = -1; + break; + } + + if (code != -1) { + /* move payload back to IP header */ + status = pbuf_header(p, (u16_t)(sizeof(*icmph) + iphlen)); + if (RT_LIKELY(status == 0)) { + icmp_dest_unreach(p, code); + } + } + goto out; + } + + pong = NULL; /* callback owns it now */ + out: + if (pong != NULL) { + free(pong); + } + pbuf_free(p); +} + + +static VOID WINAPI +pxping_icmp4_callback_apc(void *ctx, PIO_STATUS_BLOCK iob, ULONG reserved) +{ + struct pong4 *pong = (struct pong4 *)ctx; + LWIP_UNUSED_ARG(iob); + LWIP_UNUSED_ARG(reserved); + + if (pong != NULL) { + pxping_icmp4_callback(pong); + free(pong); + } +} + + +static VOID WINAPI +pxping_icmp4_callback_old(void *ctx) +{ + struct pong4 *pong = (struct pong4 *)ctx; + + if (pong != NULL) { + pxping_icmp4_callback(pong); + free(pong); + } +} + + +static void +pxping_icmp4_callback(struct pong4 *pong) +{ + ICMP_ECHO_REPLY *reply; + DWORD nreplies; + size_t icmplen; + struct pbuf *p; + struct icmp_echo_hdr *icmph; + ip_addr_t src; + int mapped; + + nreplies = IcmpParseReplies(pong->buf, (DWORD)pong->bufsize); + if (nreplies == 0) { + DWORD error = GetLastError(); + if (error == IP_REQ_TIMED_OUT) { + DPRINTF2(("pong4: %p timed out\n", (void *)pong)); + } + else { + DPRINTF(("pong4: %p: IcmpParseReplies: error %d\n", + (void *)pong, error)); + } + return; + } + + reply = (ICMP_ECHO_REPLY *)pong->buf; + + if (reply->Options.OptionsSize != 0) { /* don't do options */ + return; + } + + mapped = pxremap_inbound_ip4(&src, (ip_addr_t *)&reply->Address); + if (mapped == PXREMAP_FAILED) { + return; + } + if (mapped == PXREMAP_ASIS) { + if (reply->Options.Ttl == 1) { + return; + } + --reply->Options.Ttl; + } + + if (reply->Status == IP_SUCCESS) { + icmplen = sizeof(struct icmp_echo_hdr) + reply->DataSize; + if ((reply->Options.Flags & IP_FLAG_DF) != 0 + && IP_HLEN + icmplen > pong->netif->mtu) + { + return; + } + + p = pbuf_alloc(PBUF_IP, (u16_t)icmplen, PBUF_RAM); + if (RT_UNLIKELY(p == NULL)) { + return; + } + + icmph = (struct icmp_echo_hdr *)p->payload; + icmph->type = ICMP_ER; + icmph->code = 0; + icmph->chksum = 0; + icmph->id = pong->reqicmph.id; + icmph->seqno = pong->reqicmph.seqno; + + memcpy((u8_t *)p->payload + sizeof(*icmph), + reply->Data, reply->DataSize); + } + else { + u8_t type, code; + + switch (reply->Status) { + case IP_DEST_NET_UNREACHABLE: + type = ICMP_DUR; code = ICMP_DUR_NET; + break; + case IP_DEST_HOST_UNREACHABLE: + type = ICMP_DUR; code = ICMP_DUR_HOST; + break; + case IP_DEST_PROT_UNREACHABLE: + type = ICMP_DUR; code = ICMP_DUR_PROTO; + break; + case IP_PACKET_TOO_BIG: + type = ICMP_DUR; code = ICMP_DUR_FRAG; + break; + case IP_SOURCE_QUENCH: + type = ICMP_SQ; code = 0; + break; + case IP_TTL_EXPIRED_TRANSIT: + type = ICMP_TE; code = ICMP_TE_TTL; + break; + case IP_TTL_EXPIRED_REASSEM: + type = ICMP_TE; code = ICMP_TE_FRAG; + break; + default: + DPRINTF(("pong4: reply status %d, dropped\n", reply->Status)); + return; + } + + DPRINTF(("pong4: reply status %d -> type %d/code %d\n", + reply->Status, type, code)); + + icmplen = sizeof(*icmph) + sizeof(pong->reqiph) + sizeof(pong->reqicmph); + + p = pbuf_alloc(PBUF_IP, (u16_t)icmplen, PBUF_RAM); + if (RT_UNLIKELY(p == NULL)) { + return; + } + + icmph = (struct icmp_echo_hdr *)p->payload; + icmph->type = type; + icmph->code = code; + icmph->chksum = 0; + icmph->id = 0; + icmph->seqno = 0; + + /* + * XXX: we don't know the TTL of the request at the time this + * ICMP error was generated (we can guess it was 1 for ttl + * exceeded, but don't bother faking it). + */ + memcpy((u8_t *)p->payload + sizeof(*icmph), + &pong->reqiph, sizeof(pong->reqiph)); + + memcpy((u8_t *)p->payload + sizeof(*icmph) + sizeof(pong->reqiph), + &pong->reqicmph, sizeof(pong->reqicmph)); + } + + icmph->chksum = inet_chksum(p->payload, (u16_t)icmplen); + ip_output_if(p, &src, + (ip_addr_t *)&pong->reqiph.src, /* dst */ + reply->Options.Ttl, + reply->Options.Tos, + IPPROTO_ICMP, + pong->netif); + pbuf_free(p); +} + + +static void +pxping_recv6(void *arg, struct pbuf *p) +{ + struct pxping *pxping = (struct pxping *)arg; + struct icmp6_echo_hdr *icmph; + size_t bufsize; + struct pong6 *pong; + int mapped; + void *reqdata; + size_t reqsize; + struct sockaddr_in6 src, dst; + int hopl; + IP_OPTION_INFORMATION opts; + int status; + + pong = NULL; + + icmph = (struct icmp6_echo_hdr *)p->payload; + + memset(&dst, 0, sizeof(dst)); + dst.sin6_family = AF_INET6; + mapped = pxremap_outbound_ip6((ip6_addr_t *)&dst.sin6_addr, + ip6_current_dest_addr()); + if (RT_UNLIKELY(mapped == PXREMAP_FAILED)) { + goto out; + } + + hopl = IP6H_HOPLIM(ip6_current_header()); + if (mapped == PXREMAP_ASIS) { + if (RT_UNLIKELY(hopl == 1)) { + status = pbuf_header(p, ip_current_header_tot_len()); + if (RT_LIKELY(status == 0)) { + icmp6_time_exceeded(p, ICMP6_TE_HL); + } + goto out; + } + --hopl; + } + + status = pbuf_header(p, -(u16_t)sizeof(*icmph)); /* to ping payload */ + if (RT_UNLIKELY(status != 0)) { + goto out; + } + + /* XXX: parrotted from IPv4 version, not tested all os version/bitness */ + bufsize = sizeof(ICMPV6_ECHO_REPLY); + if (p->tot_len < sizeof(IO_STATUS_BLOCK) + sizeof(struct icmp6_echo_hdr)) + bufsize += sizeof(IO_STATUS_BLOCK) + sizeof(struct icmp6_echo_hdr); + else + bufsize += p->tot_len; + bufsize += 16; + + pong = (struct pong6 *)malloc(RT_UOFFSETOF(struct pong6, buf) + bufsize); + if (RT_UNLIKELY(pong == NULL)) { + goto out; + } + pong->bufsize = bufsize; + pong->netif = pxping->netif; + + ip6_addr_copy(pong->reqsrc, *ip6_current_src_addr()); + memcpy(&pong->reqicmph, icmph, sizeof(*icmph)); + + memset(pong->buf, 0xa5, pong->bufsize); + + pong->reqsize = reqsize = p->tot_len; + if (p->next == NULL) { + /* single pbuf can be directly used as request data source */ + reqdata = p->payload; + } + else { + /* data from pbuf chain must be concatenated */ + pbuf_copy_partial(p, pong->buf, p->tot_len, 0); + reqdata = pong->buf; + } + + memset(&src, 0, sizeof(src)); + src.sin6_family = AF_INET6; + src.sin6_addr = in6addr_any; /* let the OS select host source address */ + + memset(&opts, 0, sizeof(opts)); + opts.Ttl = hopl; + + status = Icmp6SendEcho2(pxping->hdl6, NULL, + pxping->pfnCallback6, pong, + &src, &dst, reqdata, (WORD)reqsize, &opts, + pong->buf, (DWORD)pong->bufsize, + 5 * 1000 /* ms */); + + if (RT_UNLIKELY(status != 0)) { + DPRINTF(("Icmp6SendEcho2: unexpected status %d\n", status)); + goto out; + } + if ((status = GetLastError()) != ERROR_IO_PENDING) { + int code; + + DPRINTF(("Icmp6SendEcho2: error %d\n", status)); + switch (status) { + case ERROR_NETWORK_UNREACHABLE: + case ERROR_HOST_UNREACHABLE: + code = ICMP6_DUR_NO_ROUTE; + break; + default: + code = -1; + break; + } + + if (code != -1) { + /* move payload back to IP header */ + status = pbuf_header(p, (u16_t)(sizeof(*icmph) + + ip_current_header_tot_len())); + if (RT_LIKELY(status == 0)) { + icmp6_dest_unreach(p, code); + } + } + goto out; + } + + pong = NULL; /* callback owns it now */ + out: + if (pong != NULL) { + free(pong); + } + pbuf_free(p); +} + + +static VOID WINAPI +pxping_icmp6_callback_apc(void *ctx, PIO_STATUS_BLOCK iob, ULONG reserved) +{ + struct pong6 *pong = (struct pong6 *)ctx; + LWIP_UNUSED_ARG(iob); + LWIP_UNUSED_ARG(reserved); + + if (pong != NULL) { + pxping_icmp6_callback(pong); + free(pong); + } +} + + +static VOID WINAPI +pxping_icmp6_callback_old(void *ctx) +{ + struct pong6 *pong = (struct pong6 *)ctx; + + if (pong != NULL) { + pxping_icmp6_callback(pong); + free(pong); + } +} + + +static void +pxping_icmp6_callback(struct pong6 *pong) +{ + DWORD nreplies; + ICMPV6_ECHO_REPLY *reply; + struct pbuf *p; + struct icmp6_echo_hdr *icmph; + size_t icmplen; + ip6_addr_t src; + int mapped; + + nreplies = Icmp6ParseReplies(pong->buf, (DWORD)pong->bufsize); + if (nreplies == 0) { + DWORD error = GetLastError(); + if (error == IP_REQ_TIMED_OUT) { + DPRINTF2(("pong6: %p timed out\n", (void *)pong)); + } + else { + DPRINTF(("pong6: %p: Icmp6ParseReplies: error %d\n", + (void *)pong, error)); + } + return; + } + + reply = (ICMPV6_ECHO_REPLY *)pong->buf; + + mapped = pxremap_inbound_ip6(&src, (ip6_addr_t *)reply->Address.sin6_addr); + if (mapped == PXREMAP_FAILED) { + return; + } + + /* + * Reply data follows ICMPV6_ECHO_REPLY structure in memory, but + * it doesn't tell us its size. Assume it's equal the size of the + * request. + */ + icmplen = sizeof(*icmph) + pong->reqsize; + p = pbuf_alloc(PBUF_IP, (u16_t)icmplen, PBUF_RAM); + if (RT_UNLIKELY(p == NULL)) { + return; + } + + icmph = (struct icmp6_echo_hdr *)p->payload; + icmph->type = ICMP6_TYPE_EREP; + icmph->code = 0; + icmph->chksum = 0; + icmph->id = pong->reqicmph.id; + icmph->seqno = pong->reqicmph.seqno; + + memcpy((u8_t *)p->payload + sizeof(*icmph), + pong->buf + sizeof(*reply), pong->reqsize); + + icmph->chksum = ip6_chksum_pseudo(p, IP6_NEXTH_ICMP6, p->tot_len, + &src, &pong->reqsrc); + ip6_output_if(p, /* :src */ &src, /* :dst */ &pong->reqsrc, + LWIP_ICMP6_HL, 0, IP6_NEXTH_ICMP6, + pong->netif); + pbuf_free(p); +} diff --git a/src/VBox/NetworkServices/NAT/pxremap.c b/src/VBox/NetworkServices/NAT/pxremap.c new file mode 100644 index 00000000..04d96591 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxremap.c @@ -0,0 +1,333 @@ +/* $Id: pxremap.c $ */ +/** @file + * NAT Network - Loopback remapping. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This file contains functions pertinent to magic address remapping. + * + * We want to expose host's loopback interfaces to the guest by + * mapping them to the addresses from the same prefix/subnet, so if, + * for example proxy interface is 10.0.2.1, we redirect traffic to + * 10.0.2.2 to host's 127.0.0.1 loopback. If need be, we may extend + * this to provide additional mappings, e.g. 127.0.1.1 loopback + * address is used on Ubuntu 12.10+ for NetworkManager's dnsmasq. + * + * Ditto for IPv6, except that IPv6 only has one loopback address. + */ +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" +#include "pxremap.h" +#include "proxy.h" + +#include "lwip/netif.h" +#include "netif/etharp.h" /* proxy arp hook */ + +#include "lwip/ip4.h" /* IPv4 divert hook */ +#include "lwip/ip6.h" /* IPv6 divert hook */ + +#include <string.h> + + +/** + * Check if "dst" is an IPv4 address that proxy remaps to host's + * loopback. + */ +static int +proxy_ip4_is_mapped_loopback(struct netif *netif, const ip_addr_t *dst, ip_addr_t *lo) +{ + u32_t off; + const struct ip4_lomap *lomap; + size_t i; + + LWIP_ASSERT1(dst != NULL); + + if (g_proxy_options->lomap_desc == NULL) { + return 0; + } + + if (!ip_addr_netcmp(dst, &netif->ip_addr, &netif->netmask)) { + return 0; + } + + /* XXX: TODO: check netif is a proxying netif! */ + + off = ntohl(ip4_addr_get_u32(dst) & ~ip4_addr_get_u32(&netif->netmask)); + lomap = g_proxy_options->lomap_desc->lomap; + for (i = 0; i < g_proxy_options->lomap_desc->num_lomap; ++i) { + if (off == lomap[i].off) { + if (lo != NULL) { + ip_addr_copy(*lo, lomap[i].loaddr); + } + return 1; + } + } + return 0; +} + + +#if ARP_PROXY +/** + * Hook function for etharp_arp_input() - returns true to cause proxy + * ARP reply to be generated for "dst". + */ +int +pxremap_proxy_arp(struct netif *netif, ip_addr_t *dst) +{ + return proxy_ip4_is_mapped_loopback(netif, dst, NULL); +} +#endif /* ARP_PROXY */ + + +/** + * Hook function for ip_forward() - returns true to divert packets to + * "dst" to proxy (instead of forwarding them via "netif" or dropping). + */ +int +pxremap_ip4_divert(struct netif *netif, ip_addr_t *dst) +{ + return proxy_ip4_is_mapped_loopback(netif, dst, NULL); +} + + +/** + * Mapping from local network to loopback for outbound connections. + * + * Copy "src" to "dst" with ip_addr_set(dst, src), but if "src" is a + * local network address that maps host's loopback address, copy + * loopback address to "dst". + */ +int +pxremap_outbound_ip4(ip_addr_t *dst, ip_addr_t *src) +{ + struct netif *netif; + + LWIP_ASSERT1(dst != NULL); + LWIP_ASSERT1(src != NULL); + + for (netif = netif_list; netif != NULL; netif = netif->next) { + if (netif_is_up(netif) /* && this is a proxy netif */) { + if (proxy_ip4_is_mapped_loopback(netif, src, dst)) { + return PXREMAP_MAPPED; + } + } + } + + /* not remapped, just copy src */ + ip_addr_set(dst, src); + return PXREMAP_ASIS; +} + + +/** + * Mapping from loopback to local network for inbound (port-forwarded) + * connections. + * + * Copy "src" to "dst" with ip_addr_set(dst, src), but if "src" is a + * host's loopback address, copy local network address that maps it to + * "dst". + */ +int +pxremap_inbound_ip4(ip_addr_t *dst, ip_addr_t *src) +{ + struct netif *netif; + const struct ip4_lomap *lomap; + unsigned int i; + + if (ip4_addr1(src) != IP_LOOPBACKNET) { + ip_addr_set(dst, src); + return PXREMAP_ASIS; + } + + if (g_proxy_options->lomap_desc == NULL) { + return PXREMAP_FAILED; + } + +#if 0 /* ?TODO: with multiple interfaces we need to consider fwspec::dst */ + netif = ip_route(target); + if (netif == NULL) { + return PXREMAP_FAILED; + } +#else + netif = netif_list; + LWIP_ASSERT1(netif != NULL); + LWIP_ASSERT1(netif->next == NULL); +#endif + + lomap = g_proxy_options->lomap_desc->lomap; + for (i = 0; i < g_proxy_options->lomap_desc->num_lomap; ++i) { + if (ip_addr_cmp(src, &lomap[i].loaddr)) { + ip_addr_t net; + + ip_addr_get_network(&net, &netif->ip_addr, &netif->netmask); + ip4_addr_set_u32(dst, + htonl(ntohl(ip4_addr_get_u32(&net)) + + lomap[i].off)); + return PXREMAP_MAPPED; + } + } + + return PXREMAP_FAILED; +} + + +static int +proxy_ip6_is_mapped_loopback(struct netif *netif, ip6_addr_t *dst) +{ + int i; + + /* XXX: TODO: check netif is a proxying netif! */ + + LWIP_ASSERT1(dst != NULL); + + for (i = 0; i < LWIP_IPV6_NUM_ADDRESSES; ++i) { + if (ip6_addr_ispreferred(netif_ip6_addr_state(netif, i)) + && ip6_addr_isuniquelocal(netif_ip6_addr(netif, i))) + { + ip6_addr_t *ifaddr = netif_ip6_addr(netif, i); + if (memcmp(dst, ifaddr, sizeof(ip6_addr_t) - 1) == 0 + && ((IP6_ADDR_BLOCK8(dst) & 0xff) + == (IP6_ADDR_BLOCK8(ifaddr) & 0xff) + 1)) + { + return 1; + } + } + } + + return 0; +} + + +/** + * Hook function for nd6_input() - returns true to cause proxy NA + * reply to be generated for "dst". + */ +int +pxremap_proxy_na(struct netif *netif, ip6_addr_t *dst) +{ + return proxy_ip6_is_mapped_loopback(netif, dst); +} + + +/** + * Hook function for ip6_forward() - returns true to divert packets to + * "dst" to proxy (instead of forwarding them via "netif" or dropping). + */ +int +pxremap_ip6_divert(struct netif *netif, ip6_addr_t *dst) +{ + return proxy_ip6_is_mapped_loopback(netif, dst); +} + + +/** + * Mapping from local network to loopback for outbound connections. + * + * Copy "src" to "dst" with ip6_addr_set(dst, src), but if "src" is a + * local network address that maps host's loopback address, copy IPv6 + * loopback address to "dst". + */ +int +pxremap_outbound_ip6(ip6_addr_t *dst, ip6_addr_t *src) +{ + struct netif *netif; + int i; + + LWIP_ASSERT1(dst != NULL); + LWIP_ASSERT1(src != NULL); + + for (netif = netif_list; netif != NULL; netif = netif->next) { + if (!netif_is_up(netif) /* || this is not a proxy netif */) { + continue; + } + + for (i = 0; i < LWIP_IPV6_NUM_ADDRESSES; ++i) { + if (ip6_addr_ispreferred(netif_ip6_addr_state(netif, i)) + && ip6_addr_isuniquelocal(netif_ip6_addr(netif, i))) + { + ip6_addr_t *ifaddr = netif_ip6_addr(netif, i); + if (memcmp(src, ifaddr, sizeof(ip6_addr_t) - 1) == 0 + && ((IP6_ADDR_BLOCK8(src) & 0xff) + == (IP6_ADDR_BLOCK8(ifaddr) & 0xff) + 1)) + { + ip6_addr_set_loopback(dst); + return PXREMAP_MAPPED; + } + } + } + } + + /* not remapped, just copy src */ + ip6_addr_set(dst, src); + return PXREMAP_ASIS; +} + + +/** + * Mapping from loopback to local network for inbound (port-forwarded) + * connections. + * + * Copy "src" to "dst" with ip6_addr_set(dst, src), but if "src" is a + * host's loopback address, copy local network address that maps it to + * "dst". + */ +int +pxremap_inbound_ip6(ip6_addr_t *dst, ip6_addr_t *src) +{ + ip6_addr_t loopback; + struct netif *netif; + int i; + + ip6_addr_set_loopback(&loopback); + if (!ip6_addr_cmp(src, &loopback)) { + ip6_addr_set(dst, src); + return PXREMAP_ASIS; + } + +#if 0 /* ?TODO: with multiple interfaces we need to consider fwspec::dst */ + netif = ip6_route_fwd(target); + if (netif == NULL) { + return PXREMAP_FAILED; + } +#else + netif = netif_list; + LWIP_ASSERT1(netif != NULL); + LWIP_ASSERT1(netif->next == NULL); +#endif + + for (i = 0; i < LWIP_IPV6_NUM_ADDRESSES; ++i) { + ip6_addr_t *ifaddr = netif_ip6_addr(netif, i); + if (ip6_addr_ispreferred(netif_ip6_addr_state(netif, i)) + && ip6_addr_isuniquelocal(ifaddr)) + { + ip6_addr_set(dst, ifaddr); + ++((u8_t *)&dst->addr[3])[3]; + return PXREMAP_MAPPED; + } + } + + return PXREMAP_FAILED; +} diff --git a/src/VBox/NetworkServices/NAT/pxremap.h b/src/VBox/NetworkServices/NAT/pxremap.h new file mode 100644 index 00000000..b99d8111 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxremap.h @@ -0,0 +1,62 @@ +/* $Id: pxremap.h $ */ +/** @file + * NAT Network - Loopback remapping, declarations and definitions. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NAT_pxremap_h +#define VBOX_INCLUDED_SRC_NAT_pxremap_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include "lwip/err.h" +#include "lwip/ip_addr.h" + +struct netif; + + +#define PXREMAP_FAILED (-1) +#define PXREMAP_ASIS 0 +#define PXREMAP_MAPPED 1 + +/* IPv4 */ +#if ARP_PROXY +int pxremap_proxy_arp(struct netif *netif, ip_addr_t *dst); +#endif +int pxremap_ip4_divert(struct netif *netif, ip_addr_t *dst); +int pxremap_outbound_ip4(ip_addr_t *dst, ip_addr_t *src); +int pxremap_inbound_ip4(ip_addr_t *dst, ip_addr_t *src); + +/* IPv6 */ +int pxremap_proxy_na(struct netif *netif, ip6_addr_t *dst); +int pxremap_ip6_divert(struct netif *netif, ip6_addr_t *dst); +int pxremap_outbound_ip6(ip6_addr_t *dst, ip6_addr_t *src); +int pxremap_inbound_ip6(ip6_addr_t *dst, ip6_addr_t *src); + +#define pxremap_outbound_ipX(is_ipv6, dst, src) \ + ((is_ipv6) ? pxremap_outbound_ip6(&(dst)->ip6, &(src)->ip6) \ + : pxremap_outbound_ip4(&(dst)->ip4, &(src)->ip4)) + +#endif /* !VBOX_INCLUDED_SRC_NAT_pxremap_h */ diff --git a/src/VBox/NetworkServices/NAT/pxtcp.c b/src/VBox/NetworkServices/NAT/pxtcp.c new file mode 100644 index 00000000..716e9aff --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxtcp.c @@ -0,0 +1,2524 @@ +/* $Id: pxtcp.c $ */ +/** @file + * NAT Network - TCP proxy. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" + +#include "pxtcp.h" + +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "pxremap.h" +#include "portfwd.h" /* fwspec */ + +#ifndef RT_OS_WINDOWS +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#ifdef RT_OS_SOLARIS +#include <sys/filio.h> /* FIONREAD is BSD'ism */ +#endif +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <poll.h> + +#include <err.h> /* BSD'ism */ +#else +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <iprt/stdint.h> +#include "winpoll.h" +#endif + +#include "lwip/opt.h" + +#include "lwip/sys.h" +#include "lwip/tcpip.h" +#include "lwip/netif.h" +#include "lwip/tcp_impl.h" /* XXX: to access tcp_abandon() */ +#include "lwip/icmp.h" +#include "lwip/icmp6.h" + +/* + * Different OSes have different quirks in reporting POLLHUP for TCP + * sockets. + * + * Using shutdown(2) "how" values here would be more readable, but + * since SHUT_RD is 0, we can't use 0 for "none", unfortunately. + */ +#if defined(RT_OS_NETBSD) || defined(RT_OS_SOLARIS) +# define HAVE_TCP_POLLHUP 0 /* not reported */ +#elif defined(RT_OS_DARWIN) || defined(RT_OS_WINDOWS) +# define HAVE_TCP_POLLHUP POLLIN /* reported when remote closes */ +#else +# define HAVE_TCP_POLLHUP (POLLIN|POLLOUT) /* reported when both directions are closed */ +#endif + + +/** + * Ring buffer for inbound data. Filled with data from the host + * socket on poll manager thread. Data consumed by scheduling + * tcp_write() to the pcb on the lwip thread. + * + * NB: There is actually third party present, the lwip stack itself. + * Thus the buffer doesn't have dual free vs. data split, but rather + * three-way free / send and unACKed data / unsent data split. + */ +struct ringbuf { + char *buf; + size_t bufsize; + + /* + * Start of free space, producer writes here (up till "unacked"). + */ + volatile size_t vacant; + + /* + * Start of sent but unacknowledged data. The data are "owned" by + * the stack as it may need to retransmit. This is the free space + * limit for producer. + */ + volatile size_t unacked; + + /* + * Start of unsent data, consumer reads/sends from here (up till + * "vacant"). Not declared volatile since it's only accessed from + * the consumer thread. + */ + size_t unsent; +}; + + +/** + */ +struct pxtcp { + /** + * Our poll manager handler. Must be first, strong/weak + * references depend on this "inheritance". + */ + struct pollmgr_handler pmhdl; + + /** + * lwIP (internal/guest) side of the proxied connection. + */ + struct tcp_pcb *pcb; + + /** + * Host (external) side of the proxied connection. + */ + SOCKET sock; + + /** + * Socket events we are currently polling for. + */ + int events; + + /** + * Socket error. Currently used to save connect(2) errors so that + * we can decide if we need to send ICMP error. + */ + int sockerr; + + /** + * Interface that we have got the SYN from. Needed to send ICMP + * with correct source address. + */ + struct netif *netif; + + /** + * For tentatively accepted connections for which we are in + * process of connecting to the real destination this is the + * initial pbuf that we might need to build ICMP error. + * + * When connection is established this is used to hold outbound + * pbuf chain received by pxtcp_pcb_recv() but not yet completely + * forwarded over the socket. We cannot "return" it to lwIP since + * the head of the chain is already sent and freed. + */ + struct pbuf *unsent; + + /** + * Guest has closed its side. Reported to pxtcp_pcb_recv() only + * once and we might not be able to forward it immediately if we + * have unsent pbuf. + */ + int outbound_close; + + /** + * Outbound half-close has been done on the socket. + */ + int outbound_close_done; + + /** + * External has closed its side. We might not be able to forward + * it immediately if we have unforwarded data. + */ + int inbound_close; + + /** + * Inbound half-close has been done on the pcb. + */ + int inbound_close_done; + + /** + * On systems that report POLLHUP as soon as the final FIN is + * received on a socket we cannot continue polling for the rest of + * input, so we have to read (pull) last data from the socket on + * the lwIP thread instead of polling/pushing it from the poll + * manager thread. See comment in pxtcp_pmgr_pump() POLLHUP case. + */ + int inbound_pull; + + + /** + * When poll manager schedules delete we may not be able to delete + * a pxtcp immediately if not all inbound data has been acked by + * the guest: lwIP may need to resend and the data are in pxtcp's + * inbuf::buf. We defer delete until all data are acked to + * pxtcp_pcb_sent(). + */ + int deferred_delete; + + /** + * Ring-buffer for inbound data. + */ + struct ringbuf inbuf; + + /** + * lwIP thread's strong reference to us. + */ + struct pollmgr_refptr *rp; + + + /* + * We use static messages to call functions on the lwIP thread to + * void malloc/free overhead. + */ + struct tcpip_msg msg_delete; /* delete pxtcp */ + struct tcpip_msg msg_reset; /* reset connection and delete pxtcp */ + struct tcpip_msg msg_accept; /* confirm accept of proxied connection */ + struct tcpip_msg msg_outbound; /* trigger send of outbound data */ + struct tcpip_msg msg_inbound; /* trigger send of inbound data */ +#if HAVE_TCP_POLLHUP + struct tcpip_msg msg_inpull; /* trigger pull of last inbound data */ +#endif +}; + + + +static struct pxtcp *pxtcp_allocate(void); +static void pxtcp_free(struct pxtcp *); + +static void pxtcp_pcb_associate(struct pxtcp *, struct tcp_pcb *); +static void pxtcp_pcb_dissociate(struct pxtcp *); + +/* poll manager callbacks for pxtcp related channels */ +static int pxtcp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int); +static int pxtcp_pmgr_chan_pollout(struct pollmgr_handler *, SOCKET, int); +static int pxtcp_pmgr_chan_pollin(struct pollmgr_handler *, SOCKET, int); +#if !(HAVE_TCP_POLLHUP & POLLOUT) +static int pxtcp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int); +#endif +static int pxtcp_pmgr_chan_reset(struct pollmgr_handler *, SOCKET, int); + +/* helper functions for sending/receiving pxtcp over poll manager channels */ +static ssize_t pxtcp_chan_send(enum pollmgr_slot_t, struct pxtcp *); +static ssize_t pxtcp_chan_send_weak(enum pollmgr_slot_t, struct pxtcp *); +static struct pxtcp *pxtcp_chan_recv(struct pollmgr_handler *, SOCKET, int); +static struct pxtcp *pxtcp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int); + +/* poll manager callbacks for individual sockets */ +static int pxtcp_pmgr_connect(struct pollmgr_handler *, SOCKET, int); +static int pxtcp_pmgr_pump(struct pollmgr_handler *, SOCKET, int); + +/* get incoming traffic into ring buffer */ +static ssize_t pxtcp_sock_read(struct pxtcp *, int *); +static ssize_t pxtcp_sock_recv(struct pxtcp *, IOVEC *, size_t); /* default */ + +/* convenience functions for poll manager callbacks */ +static int pxtcp_schedule_delete(struct pxtcp *); +static int pxtcp_schedule_reset(struct pxtcp *); +static int pxtcp_schedule_reject(struct pxtcp *); + +/* lwip thread callbacks called via proxy_lwip_post() */ +static void pxtcp_pcb_delete_pxtcp(void *); +static void pxtcp_pcb_reset_pxtcp(void *); +static void pxtcp_pcb_accept_refuse(void *); +static void pxtcp_pcb_accept_confirm(void *); +static void pxtcp_pcb_write_outbound(void *); +static void pxtcp_pcb_write_inbound(void *); +#if HAVE_TCP_POLLHUP +static void pxtcp_pcb_pull_inbound(void *); +#endif + +/* tcp pcb callbacks */ +static err_t pxtcp_pcb_heard(void *, struct tcp_pcb *, struct pbuf *); /* global */ +static err_t pxtcp_pcb_accept(void *, struct tcp_pcb *, err_t); +static err_t pxtcp_pcb_connected(void *, struct tcp_pcb *, err_t); +static err_t pxtcp_pcb_recv(void *, struct tcp_pcb *, struct pbuf *, err_t); +static err_t pxtcp_pcb_sent(void *, struct tcp_pcb *, u16_t); +static err_t pxtcp_pcb_poll(void *, struct tcp_pcb *); +static void pxtcp_pcb_err(void *, err_t); + +static err_t pxtcp_pcb_forward_outbound(struct pxtcp *, struct pbuf *); +static void pxtcp_pcb_forward_outbound_close(struct pxtcp *); + +static ssize_t pxtcp_sock_send(struct pxtcp *, IOVEC *, size_t); + +static void pxtcp_pcb_forward_inbound(struct pxtcp *); +static void pxtcp_pcb_forward_inbound_close(struct pxtcp *); +DECLINLINE(int) pxtcp_pcb_forward_inbound_done(const struct pxtcp *); +static void pxtcp_pcb_schedule_poll(struct pxtcp *); +static void pxtcp_pcb_cancel_poll(struct pxtcp *); + +static void pxtcp_pcb_reject(struct tcp_pcb *, int, struct netif *, struct pbuf *); +DECLINLINE(void) pxtcp_pcb_maybe_deferred_delete(struct pxtcp *); + +/* poll manager handlers for pxtcp channels */ +static struct pollmgr_handler pxtcp_pmgr_chan_add_hdl; +static struct pollmgr_handler pxtcp_pmgr_chan_pollout_hdl; +static struct pollmgr_handler pxtcp_pmgr_chan_pollin_hdl; +#if !(HAVE_TCP_POLLHUP & POLLOUT) +static struct pollmgr_handler pxtcp_pmgr_chan_del_hdl; +#endif +static struct pollmgr_handler pxtcp_pmgr_chan_reset_hdl; + + +/** + * Init PXTCP - must be run when neither lwIP tcpip thread, nor poll + * manager threads haven't been created yet. + */ +void +pxtcp_init(void) +{ + /* + * Create channels. + */ +#define CHANNEL(SLOT, NAME) do { \ + NAME##_hdl.callback = NAME; \ + NAME##_hdl.data = NULL; \ + NAME##_hdl.slot = -1; \ + pollmgr_add_chan(SLOT, &NAME##_hdl); \ + } while (0) + + CHANNEL(POLLMGR_CHAN_PXTCP_ADD, pxtcp_pmgr_chan_add); + CHANNEL(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp_pmgr_chan_pollin); + CHANNEL(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp_pmgr_chan_pollout); +#if !(HAVE_TCP_POLLHUP & POLLOUT) + CHANNEL(POLLMGR_CHAN_PXTCP_DEL, pxtcp_pmgr_chan_del); +#endif + CHANNEL(POLLMGR_CHAN_PXTCP_RESET, pxtcp_pmgr_chan_reset); + +#undef CHANNEL + + /* + * Listen to outgoing connection from guest(s). + */ + tcp_proxy_accept(pxtcp_pcb_heard); +} + + +/** + * Syntactic sugar for sending pxtcp pointer over poll manager + * channel. Used by lwip thread functions. + */ +static ssize_t +pxtcp_chan_send(enum pollmgr_slot_t slot, struct pxtcp *pxtcp) +{ + return pollmgr_chan_send(slot, &pxtcp, sizeof(pxtcp)); +} + + +/** + * Syntactic sugar for sending weak reference to pxtcp over poll + * manager channel. Used by lwip thread functions. + */ +static ssize_t +pxtcp_chan_send_weak(enum pollmgr_slot_t slot, struct pxtcp *pxtcp) +{ + pollmgr_refptr_weak_ref(pxtcp->rp); + return pollmgr_chan_send(slot, &pxtcp->rp, sizeof(pxtcp->rp)); +} + + +/** + * Counterpart of pxtcp_chan_send(). + */ +static struct pxtcp * +pxtcp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + + pxtcp = (struct pxtcp *)pollmgr_chan_recv_ptr(handler, fd, revents); + return pxtcp; +} + + +/** + * Counterpart of pxtcp_chan_send_weak(). + */ +static struct pxtcp * +pxtcp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pollmgr_refptr *rp; + struct pollmgr_handler *base; + struct pxtcp *pxtcp; + + rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents); + base = (struct pollmgr_handler *)pollmgr_refptr_get(rp); + pxtcp = (struct pxtcp *)base; + + return pxtcp; +} + + +/** + * Register pxtcp with poll manager. + * + * Used for POLLMGR_CHAN_PXTCP_ADD and by port-forwarding. Since + * error handling is different in these two cases, we leave it up to + * the caller. + */ +int +pxtcp_pmgr_add(struct pxtcp *pxtcp) +{ + int status; + + LWIP_ASSERT1(pxtcp != NULL); +#ifdef RT_OS_WINDOWS + LWIP_ASSERT1(pxtcp->sock != INVALID_SOCKET); +#else + LWIP_ASSERT1(pxtcp->sock >= 0); +#endif + LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL); + LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp); + LWIP_ASSERT1(pxtcp->pmhdl.slot < 0); + + status = pollmgr_add(&pxtcp->pmhdl, pxtcp->sock, pxtcp->events); + return status; +} + + +/** + * Unregister pxtcp with poll manager. + * + * Used for POLLMGR_CHAN_PXTCP_RESET and by port-forwarding (on error + * leg). + */ +void +pxtcp_pmgr_del(struct pxtcp *pxtcp) +{ + LWIP_ASSERT1(pxtcp != NULL); + + pollmgr_del_slot(pxtcp->pmhdl.slot); +} + + +/** + * POLLMGR_CHAN_PXTCP_ADD handler. + * + * Get new pxtcp from lwip thread and start polling its socket. + */ +static int +pxtcp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + int status; + + pxtcp = pxtcp_chan_recv(handler, fd, revents); + DPRINTF0(("pxtcp_add: new pxtcp %p; pcb %p; sock %d\n", + (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock)); + + status = pxtcp_pmgr_add(pxtcp); + if (status < 0) { + (void) pxtcp_schedule_reset(pxtcp); + } + + return POLLIN; +} + + +/** + * POLLMGR_CHAN_PXTCP_POLLOUT handler. + * + * pxtcp_pcb_forward_outbound() on the lwIP thread tried to send data + * and failed, it now requests us to poll the socket for POLLOUT and + * schedule pxtcp_pcb_forward_outbound() when sock is writable again. + */ +static int +pxtcp_pmgr_chan_pollout(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + + pxtcp = pxtcp_chan_recv_strong(handler, fd, revents); + DPRINTF0(("pxtcp_pollout: pxtcp %p\n", (void *)pxtcp)); + + if (pxtcp == NULL) { + return POLLIN; + } + + LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp); + LWIP_ASSERT1(pxtcp->pmhdl.slot > 0); + + pxtcp->events |= POLLOUT; + pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events); + + return POLLIN; +} + + +/** + * POLLMGR_CHAN_PXTCP_POLLIN handler. + */ +static int +pxtcp_pmgr_chan_pollin(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + + pxtcp = pxtcp_chan_recv_strong(handler, fd, revents); + DPRINTF2(("pxtcp_pollin: pxtcp %p\n", (void *)pxtcp)); + + if (pxtcp == NULL) { + return POLLIN; + } + + LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp); + LWIP_ASSERT1(pxtcp->pmhdl.slot > 0); + + if (pxtcp->inbound_close) { + return POLLIN; + } + + pxtcp->events |= POLLIN; + pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events); + + return POLLIN; +} + + +#if !(HAVE_TCP_POLLHUP & POLLOUT) +/** + * POLLMGR_CHAN_PXTCP_DEL handler. + * + * Schedule pxtcp deletion. We only need this if host system doesn't + * report POLLHUP for fully closed tcp sockets. + */ +static int +pxtcp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + + pxtcp = pxtcp_chan_recv_strong(handler, fd, revents); + if (pxtcp == NULL) { + return POLLIN; + } + + DPRINTF(("PXTCP_DEL: pxtcp %p; pcb %p; sock %d\n", + (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock)); + + LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL); + LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp); + + LWIP_ASSERT1(pxtcp->inbound_close); /* EOF read */ + LWIP_ASSERT1(pxtcp->outbound_close_done); /* EOF sent */ + + pxtcp_pmgr_del(pxtcp); + (void) pxtcp_schedule_delete(pxtcp); + + return POLLIN; +} +#endif /* !(HAVE_TCP_POLLHUP & POLLOUT) */ + + +/** + * POLLMGR_CHAN_PXTCP_RESET handler. + * + * Close the socket with RST and delete pxtcp. + */ +static int +pxtcp_pmgr_chan_reset(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + + pxtcp = pxtcp_chan_recv_strong(handler, fd, revents); + if (pxtcp == NULL) { + return POLLIN; + } + + DPRINTF0(("PXTCP_RESET: pxtcp %p; pcb %p; sock %d\n", + (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock)); + + LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL); + LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp); + + pxtcp_pmgr_del(pxtcp); + + proxy_reset_socket(pxtcp->sock); + pxtcp->sock = INVALID_SOCKET; + + (void) pxtcp_schedule_reset(pxtcp); + + return POLLIN; +} + + +static struct pxtcp * +pxtcp_allocate(void) +{ + struct pxtcp *pxtcp; + + pxtcp = (struct pxtcp *)malloc(sizeof(*pxtcp)); + if (pxtcp == NULL) { + return NULL; + } + + pxtcp->pmhdl.callback = NULL; + pxtcp->pmhdl.data = (void *)pxtcp; + pxtcp->pmhdl.slot = -1; + + pxtcp->pcb = NULL; + pxtcp->sock = INVALID_SOCKET; + pxtcp->events = 0; + pxtcp->sockerr = 0; + pxtcp->netif = NULL; + pxtcp->unsent = NULL; + pxtcp->outbound_close = 0; + pxtcp->outbound_close_done = 0; + pxtcp->inbound_close = 0; + pxtcp->inbound_close_done = 0; + pxtcp->inbound_pull = 0; + pxtcp->deferred_delete = 0; + + pxtcp->inbuf.bufsize = 64 * 1024; + pxtcp->inbuf.buf = (char *)malloc(pxtcp->inbuf.bufsize); + if (pxtcp->inbuf.buf == NULL) { + free(pxtcp); + return NULL; + } + pxtcp->inbuf.vacant = 0; + pxtcp->inbuf.unacked = 0; + pxtcp->inbuf.unsent = 0; + + pxtcp->rp = pollmgr_refptr_create(&pxtcp->pmhdl); + if (pxtcp->rp == NULL) { + free(pxtcp->inbuf.buf); + free(pxtcp); + return NULL; + } + +#define CALLBACK_MSG(MSG, FUNC) \ + do { \ + pxtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \ + pxtcp->MSG.sem = NULL; \ + pxtcp->MSG.msg.cb.function = FUNC; \ + pxtcp->MSG.msg.cb.ctx = (void *)pxtcp; \ + } while (0) + + CALLBACK_MSG(msg_delete, pxtcp_pcb_delete_pxtcp); + CALLBACK_MSG(msg_reset, pxtcp_pcb_reset_pxtcp); + CALLBACK_MSG(msg_accept, pxtcp_pcb_accept_confirm); + CALLBACK_MSG(msg_outbound, pxtcp_pcb_write_outbound); + CALLBACK_MSG(msg_inbound, pxtcp_pcb_write_inbound); +#if HAVE_TCP_POLLHUP + CALLBACK_MSG(msg_inpull, pxtcp_pcb_pull_inbound); +#endif + +#undef CALLBACK_MSG + + return pxtcp; +} + + +/** + * Exported to fwtcp to create pxtcp for incoming port-forwarded + * connections. Completed with pcb in pxtcp_pcb_connect(). + */ +struct pxtcp * +pxtcp_create_forwarded(SOCKET sock) +{ + struct pxtcp *pxtcp; + + pxtcp = pxtcp_allocate(); + if (pxtcp == NULL) { + return NULL; + } + + pxtcp->sock = sock; + pxtcp->pmhdl.callback = pxtcp_pmgr_pump; + pxtcp->events = 0; + + return pxtcp; +} + + +static void +pxtcp_pcb_associate(struct pxtcp *pxtcp, struct tcp_pcb *pcb) +{ + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pcb != NULL); + + pxtcp->pcb = pcb; + + tcp_arg(pcb, pxtcp); + + tcp_recv(pcb, pxtcp_pcb_recv); + tcp_sent(pcb, pxtcp_pcb_sent); + tcp_poll(pcb, NULL, 255); + tcp_err(pcb, pxtcp_pcb_err); +} + + +static void +pxtcp_free(struct pxtcp *pxtcp) +{ + if (pxtcp->unsent != NULL) { + pbuf_free(pxtcp->unsent); + } + if (pxtcp->inbuf.buf != NULL) { + free(pxtcp->inbuf.buf); + } + free(pxtcp); +} + + +/** + * Counterpart to pxtcp_create_forwarded() to destruct pxtcp that + * fwtcp failed to register with poll manager to post to lwip thread + * for doing connect. + */ +void +pxtcp_cancel_forwarded(struct pxtcp *pxtcp) +{ + LWIP_ASSERT1(pxtcp->pcb == NULL); + pxtcp_pcb_reset_pxtcp(pxtcp); +} + + +static void +pxtcp_pcb_dissociate(struct pxtcp *pxtcp) +{ + if (pxtcp == NULL || pxtcp->pcb == NULL) { + return; + } + + DPRINTF(("%s: pxtcp %p <-> pcb %p\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb)); + + /* + * We must have dissociated from a fully closed pcb immediately + * since lwip recycles them and we don't wan't to mess with what + * would be someone else's pcb that we happen to have a stale + * pointer to. + */ + LWIP_ASSERT1(pxtcp->pcb->callback_arg == pxtcp); + + tcp_recv(pxtcp->pcb, NULL); + tcp_sent(pxtcp->pcb, NULL); + tcp_poll(pxtcp->pcb, NULL, 255); + tcp_err(pxtcp->pcb, NULL); + tcp_arg(pxtcp->pcb, NULL); + pxtcp->pcb = NULL; +} + + +/** + * Lwip thread callback invoked via pxtcp::msg_delete + * + * Since we use static messages to communicate to the lwip thread, we + * cannot delete pxtcp without making sure there are no unprocessed + * messages in the lwip thread mailbox. + * + * The easiest way to ensure that is to send this "delete" message as + * the last one and when it's processed we know there are no more and + * it's safe to delete pxtcp. + * + * Poll manager handlers should use pxtcp_schedule_delete() + * convenience function. + */ +static void +pxtcp_pcb_delete_pxtcp(void *ctx) +{ + struct pxtcp *pxtcp = (struct pxtcp *)ctx; + + DPRINTF(("%s: pxtcp %p, pcb %p, sock %d%s\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock, + (pxtcp->deferred_delete && !pxtcp->inbound_pull + ? " (was deferred)" : ""))); + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->pmhdl.slot < 0); + LWIP_ASSERT1(pxtcp->outbound_close_done); + LWIP_ASSERT1(pxtcp->inbound_close); /* not necessarily done */ + + + /* + * pxtcp is no longer registered with poll manager, so it's safe + * to close the socket. + */ + if (pxtcp->sock != INVALID_SOCKET) { + closesocket(pxtcp->sock); + pxtcp->sock = INVALID_SOCKET; + } + + /* + * We might have already dissociated from a fully closed pcb, or + * guest might have sent us a reset while msg_delete was in + * transit. If there's no pcb, we are done. + */ + if (pxtcp->pcb == NULL) { + pollmgr_refptr_unref(pxtcp->rp); + pxtcp_free(pxtcp); + return; + } + + /* + * Have we completely forwarded all inbound traffic to the guest? + * + * We may still be waiting for ACKs. We may have failed to send + * some of the data (tcp_write() failed with ERR_MEM). We may + * have failed to send the FIN (tcp_shutdown() failed with + * ERR_MEM). + */ + if (pxtcp_pcb_forward_inbound_done(pxtcp)) { + pxtcp_pcb_dissociate(pxtcp); + pollmgr_refptr_unref(pxtcp->rp); + pxtcp_free(pxtcp); + } + else { + DPRINTF2(("delete: pxtcp %p; pcb %p:" + " unacked %d, unsent %d, vacant %d, %s - DEFER!\n", + (void *)pxtcp, (void *)pxtcp->pcb, + (int)pxtcp->inbuf.unacked, + (int)pxtcp->inbuf.unsent, + (int)pxtcp->inbuf.vacant, + pxtcp->inbound_close_done ? "FIN sent" : "FIN is NOT sent")); + + LWIP_ASSERT1(!pxtcp->deferred_delete); + pxtcp->deferred_delete = 1; + } +} + + +/** + * If we couldn't delete pxtcp right away in the msg_delete callback + * from the poll manager thread, we repeat the check at the end of + * relevant pcb callbacks. + */ +DECLINLINE(void) +pxtcp_pcb_maybe_deferred_delete(struct pxtcp *pxtcp) +{ + if (pxtcp->deferred_delete && pxtcp_pcb_forward_inbound_done(pxtcp)) { + pxtcp_pcb_delete_pxtcp(pxtcp); + } +} + + +/** + * Poll manager callbacks should use this convenience wrapper to + * schedule pxtcp deletion on the lwip thread and to deregister from + * the poll manager. + */ +static int +pxtcp_schedule_delete(struct pxtcp *pxtcp) +{ + /* + * If pollmgr_refptr_get() is called by any channel before + * scheduled deletion happens, let them know we are gone. + */ + pxtcp->pmhdl.slot = -1; + + /* + * Schedule deletion. Since poll manager thread may be pre-empted + * right after we send the message, the deletion may actually + * happen on the lwip thread before we return from this function, + * so it's not safe to refer to pxtcp after this call. + */ + proxy_lwip_post(&pxtcp->msg_delete); + + /* tell poll manager to deregister us */ + return -1; +} + + +/** + * Lwip thread callback invoked via pxtcp::msg_reset + * + * Like pxtcp_pcb_delete(), but sends RST to the guest before + * deleting this pxtcp. + */ +static void +pxtcp_pcb_reset_pxtcp(void *ctx) +{ + struct pxtcp *pxtcp = (struct pxtcp *)ctx; + LWIP_ASSERT1(pxtcp != NULL); + + DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock)); + + if (pxtcp->sock != INVALID_SOCKET) { + proxy_reset_socket(pxtcp->sock); + pxtcp->sock = INVALID_SOCKET; + } + + if (pxtcp->pcb != NULL) { + struct tcp_pcb *pcb = pxtcp->pcb; + pxtcp_pcb_dissociate(pxtcp); + tcp_abort(pcb); + } + + pollmgr_refptr_unref(pxtcp->rp); + pxtcp_free(pxtcp); +} + + + +/** + * Poll manager callbacks should use this convenience wrapper to + * schedule pxtcp reset and deletion on the lwip thread and to + * deregister from the poll manager. + * + * See pxtcp_schedule_delete() for additional comments. + */ +static int +pxtcp_schedule_reset(struct pxtcp *pxtcp) +{ + pxtcp->pmhdl.slot = -1; + proxy_lwip_post(&pxtcp->msg_reset); + return -1; +} + + +/** + * Reject proxy connection attempt. Depending on the cause (sockerr) + * we may just drop the pcb silently, generate an ICMP datagram or + * send TCP reset. + */ +static void +pxtcp_pcb_reject(struct tcp_pcb *pcb, int sockerr, + struct netif *netif, struct pbuf *p) +{ + int reset = 0; + + if (sockerr == ECONNREFUSED) { + reset = 1; + } + else if (p != NULL) { + struct netif *oif; + + LWIP_ASSERT1(netif != NULL); + + oif = ip_current_netif(); + ip_current_netif() = netif; + + if (PCB_ISIPV6(pcb)) { + if (sockerr == EHOSTDOWN) { + icmp6_dest_unreach(p, ICMP6_DUR_ADDRESS); /* XXX: ??? */ + } + else if (sockerr == EHOSTUNREACH + || sockerr == ENETDOWN + || sockerr == ENETUNREACH) + { + icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE); + } + } + else { + if (sockerr == EHOSTDOWN + || sockerr == EHOSTUNREACH + || sockerr == ENETDOWN + || sockerr == ENETUNREACH) + { + icmp_dest_unreach(p, ICMP_DUR_HOST); + } + } + + ip_current_netif() = oif; + } + + tcp_abandon(pcb, reset); +} + + +/** + * Called from poll manager thread via pxtcp::msg_accept when proxy + * failed to connect to the destination. Also called when we failed + * to register pxtcp with poll manager. + * + * This is like pxtcp_pcb_reset_pxtcp() but is more discriminate in + * how this unestablished connection is terminated. + */ +static void +pxtcp_pcb_accept_refuse(void *ctx) +{ + struct pxtcp *pxtcp = (struct pxtcp *)ctx; + + DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d: %R[sockerr]\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb, + pxtcp->sock, pxtcp->sockerr)); + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->sock == INVALID_SOCKET); + + if (pxtcp->pcb != NULL) { + struct tcp_pcb *pcb = pxtcp->pcb; + pxtcp_pcb_dissociate(pxtcp); + pxtcp_pcb_reject(pcb, pxtcp->sockerr, pxtcp->netif, pxtcp->unsent); + } + + pollmgr_refptr_unref(pxtcp->rp); + pxtcp_free(pxtcp); +} + + +/** + * Convenience wrapper for poll manager connect callback to reject + * connection attempt. + * + * Like pxtcp_schedule_reset(), but the callback is more discriminate + * in how this unestablished connection is terminated. + */ +static int +pxtcp_schedule_reject(struct pxtcp *pxtcp) +{ + pxtcp->msg_accept.msg.cb.function = pxtcp_pcb_accept_refuse; + pxtcp->pmhdl.slot = -1; + proxy_lwip_post(&pxtcp->msg_accept); + return -1; +} + + +/** + * Global tcp_proxy_accept() callback for proxied outgoing TCP + * connections from guest(s). + */ +static err_t +pxtcp_pcb_heard(void *arg, struct tcp_pcb *newpcb, struct pbuf *syn) +{ + LWIP_UNUSED_ARG(arg); + + return pxtcp_pcb_accept_outbound(newpcb, syn, + PCB_ISIPV6(newpcb), &newpcb->local_ip, newpcb->local_port); +} + + +err_t +pxtcp_pcb_accept_outbound(struct tcp_pcb *newpcb, struct pbuf *p, + int is_ipv6, ipX_addr_t *dst_addr, u16_t dst_port) +{ + struct pxtcp *pxtcp; + ipX_addr_t mapped_dst_addr; + int sdom; + SOCKET sock; + ssize_t nsent; + int sockerr = 0; + + /* + * TCP first calls accept callback when it receives the first SYN + * and "tentatively accepts" new proxied connection attempt. When + * proxy "confirms" the SYN and sends SYN|ACK and the guest + * replies with ACK the accept callback is called again, this time + * with the established connection. + */ + LWIP_ASSERT1(newpcb->state == SYN_RCVD_0); + tcp_accept(newpcb, pxtcp_pcb_accept); + tcp_arg(newpcb, NULL); + + tcp_setprio(newpcb, TCP_PRIO_MAX); + + pxremap_outbound_ipX(is_ipv6, &mapped_dst_addr, dst_addr); + + sdom = is_ipv6 ? PF_INET6 : PF_INET; + sock = proxy_connected_socket(sdom, SOCK_STREAM, + &mapped_dst_addr, dst_port); + if (sock == INVALID_SOCKET) { + sockerr = SOCKERRNO(); + goto abort; + } + + pxtcp = pxtcp_allocate(); + if (pxtcp == NULL) { + proxy_reset_socket(sock); + goto abort; + } + + /* save initial datagram in case we need to reply with ICMP */ + if (p != NULL) { + pbuf_ref(p); + pxtcp->unsent = p; + pxtcp->netif = ip_current_netif(); + } + + pxtcp_pcb_associate(pxtcp, newpcb); + pxtcp->sock = sock; + + pxtcp->pmhdl.callback = pxtcp_pmgr_connect; + pxtcp->events = POLLOUT; + + nsent = pxtcp_chan_send(POLLMGR_CHAN_PXTCP_ADD, pxtcp); + if (nsent < 0) { + pxtcp->sock = INVALID_SOCKET; + proxy_reset_socket(sock); + pxtcp_pcb_accept_refuse(pxtcp); + return ERR_ABRT; + } + + return ERR_OK; + + abort: + DPRINTF0(("%s: pcb %p, sock %d: %R[sockerr]\n", + __func__, (void *)newpcb, sock, sockerr)); + pxtcp_pcb_reject(newpcb, sockerr, ip_current_netif(), p); + return ERR_ABRT; +} + + +/** + * tcp_proxy_accept() callback for accepted proxied outgoing TCP + * connections from guest(s). This is "real" accept with three-way + * handshake completed. + */ +static err_t +pxtcp_pcb_accept(void *arg, struct tcp_pcb *pcb, err_t error) +{ + struct pxtcp *pxtcp = (struct pxtcp *)arg; + + LWIP_UNUSED_ARG(pcb); /* used only in asserts */ + LWIP_UNUSED_ARG(error); /* always ERR_OK */ + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->pcb = pcb); + LWIP_ASSERT1(pcb->callback_arg == pxtcp); + + /* send any inbound data that are already queued */ + pxtcp_pcb_forward_inbound(pxtcp); + return ERR_OK; +} + + +/** + * Initial poll manager callback for proxied outgoing TCP connections. + * pxtcp_pcb_accept() sets pxtcp::pmhdl::callback to this. + * + * Waits for connect(2) to the destination to complete. On success + * replaces itself with pxtcp_pmgr_pump() callback common to all + * established TCP connections. + */ +static int +pxtcp_pmgr_connect(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + RT_NOREF(fd); + + pxtcp = (struct pxtcp *)handler->data; + LWIP_ASSERT1(handler == &pxtcp->pmhdl); + LWIP_ASSERT1(fd == pxtcp->sock); + LWIP_ASSERT1(pxtcp->sockerr == 0); + + if (revents & POLLNVAL) { + pxtcp->sock = INVALID_SOCKET; + pxtcp->sockerr = ETIMEDOUT; + return pxtcp_schedule_reject(pxtcp); + } + + /* + * Solaris and NetBSD don't report either POLLERR or POLLHUP when + * connect(2) fails, just POLLOUT. In that case we always need to + * check SO_ERROR. + */ +#if defined(RT_OS_SOLARIS) || defined(RT_OS_NETBSD) +# define CONNECT_CHECK_ERROR POLLOUT +#else +# define CONNECT_CHECK_ERROR (POLLERR | POLLHUP) +#endif + + /* + * Check the cause of the failure so that pxtcp_pcb_reject() may + * behave accordingly. + */ + if (revents & CONNECT_CHECK_ERROR) { + socklen_t optlen = (socklen_t)sizeof(pxtcp->sockerr); + int status; + SOCKET s; + + status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR, + (char *)&pxtcp->sockerr, &optlen); + if (RT_UNLIKELY(status == SOCKET_ERROR)) { /* should not happen */ + DPRINTF(("%s: sock %d: SO_ERROR failed: %R[sockerr]\n", + __func__, fd, SOCKERRNO())); + pxtcp->sockerr = ETIMEDOUT; + } + else { + /* don't spam this log on successful connect(2) */ + if ((revents & (POLLERR | POLLHUP)) /* we were told it's failed */ + || pxtcp->sockerr != 0) /* we determined it's failed */ + { + DPRINTF(("%s: sock %d: connect: %R[sockerr]\n", + __func__, fd, pxtcp->sockerr)); + } + + if ((revents & (POLLERR | POLLHUP)) + && RT_UNLIKELY(pxtcp->sockerr == 0)) + { + /* if we're told it's failed, make sure it's marked as such */ + pxtcp->sockerr = ETIMEDOUT; + } + } + + if (pxtcp->sockerr != 0) { + s = pxtcp->sock; + pxtcp->sock = INVALID_SOCKET; + closesocket(s); + return pxtcp_schedule_reject(pxtcp); + } + } + + if (revents & POLLOUT) { /* connect is successful */ + /* confirm accept to the guest */ + proxy_lwip_post(&pxtcp->msg_accept); + + /* + * Switch to common callback used for all established proxied + * connections. + */ + pxtcp->pmhdl.callback = pxtcp_pmgr_pump; + + /* + * Initially we poll for incoming traffic only. Outgoing + * traffic is fast-forwarded by pxtcp_pcb_recv(); if it fails + * it will ask us to poll for POLLOUT too. + */ + pxtcp->events = POLLIN; + return pxtcp->events; + } + + /* should never get here */ + DPRINTF0(("%s: pxtcp %p, sock %d: unexpected revents 0x%x\n", + __func__, (void *)pxtcp, fd, revents)); + return pxtcp_schedule_reset(pxtcp); +} + + +/** + * Called from poll manager thread via pxtcp::msg_accept when proxy + * connected to the destination. Finalize accept by sending SYN|ACK + * to the guest. + */ +static void +pxtcp_pcb_accept_confirm(void *ctx) +{ + struct pxtcp *pxtcp = (struct pxtcp *)ctx; + err_t error; + + LWIP_ASSERT1(pxtcp != NULL); + if (pxtcp->pcb == NULL) { + return; + } + + /* we are not going to reply with ICMP, so we can drop initial pbuf */ + if (pxtcp->unsent != NULL) { + pbuf_free(pxtcp->unsent); + pxtcp->unsent = NULL; + } + + error = tcp_proxy_accept_confirm(pxtcp->pcb); + + /* + * If lwIP failed to enqueue SYN|ACK because it's out of pbufs it + * abandons the pcb. Retrying that is not very easy, since it + * would require keeping "fractional state". From guest's point + * of view there is no reply to its SYN so it will either resend + * the SYN (effetively triggering full connection retry for us), + * or it will eventually time out. + */ + if (error == ERR_ABRT) { + pxtcp->pcb = NULL; /* pcb is gone */ + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp); + } + + /* + * else if (error != ERR_OK): even if tcp_output() failed with + * ERR_MEM - don't give up, that SYN|ACK is enqueued and will be + * retransmitted eventually. + */ +} + + +/** + * Entry point for port-forwarding. + * + * fwtcp accepts new incoming connection, creates pxtcp for the socket + * (with no pcb yet) and adds it to the poll manager (polling for + * errors only). Then it calls this function to construct the pcb and + * perform connection to the guest. + */ +void +pxtcp_pcb_connect(struct pxtcp *pxtcp, const struct fwspec *fwspec) +{ + struct sockaddr_storage ss; + socklen_t sslen; + struct tcp_pcb *pcb; + ipX_addr_t src_addr, dst_addr; + u16_t src_port, dst_port; + int status; + err_t error; + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->pcb == NULL); + LWIP_ASSERT1(fwspec->stype == SOCK_STREAM); + + pcb = tcp_new(); + if (pcb == NULL) { + goto reset; + } + + tcp_setprio(pcb, TCP_PRIO_MAX); + pxtcp_pcb_associate(pxtcp, pcb); + + sslen = sizeof(ss); + status = getpeername(pxtcp->sock, (struct sockaddr *)&ss, &sslen); + if (status == SOCKET_ERROR) { + goto reset; + } + + /* nit: compares PF and AF, but they are the same everywhere */ + LWIP_ASSERT1(ss.ss_family == fwspec->sdom); + + status = fwany_ipX_addr_set_src(&src_addr, (const struct sockaddr *)&ss); + if (status == PXREMAP_FAILED) { + goto reset; + } + + if (ss.ss_family == PF_INET) { + const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss; + + src_port = peer4->sin_port; + + memcpy(&dst_addr.ip4, &fwspec->dst.sin.sin_addr, sizeof(ip_addr_t)); + dst_port = fwspec->dst.sin.sin_port; + } + else { /* PF_INET6 */ + const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss; + ip_set_v6(pcb, 1); + + src_port = peer6->sin6_port; + + memcpy(&dst_addr.ip6, &fwspec->dst.sin6.sin6_addr, sizeof(ip6_addr_t)); + dst_port = fwspec->dst.sin6.sin6_port; + } + + /* lwip port arguments are in host order */ + src_port = ntohs(src_port); + dst_port = ntohs(dst_port); + + error = tcp_proxy_bind(pcb, ipX_2_ip(&src_addr), src_port); + if (error != ERR_OK) { + goto reset; + } + + error = tcp_connect(pcb, ipX_2_ip(&dst_addr), dst_port, + /* callback: */ pxtcp_pcb_connected); + if (error != ERR_OK) { + goto reset; + } + + return; + + reset: + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp); +} + + +/** + * Port-forwarded connection to guest is successful, pump data. + */ +static err_t +pxtcp_pcb_connected(void *arg, struct tcp_pcb *pcb, err_t error) +{ + struct pxtcp *pxtcp = (struct pxtcp *)arg; + + LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */ + LWIP_UNUSED_ARG(error); + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->pcb == pcb); + LWIP_ASSERT1(pcb->callback_arg == pxtcp); + LWIP_UNUSED_ARG(pcb); + + DPRINTF0(("%s: new pxtcp %p; pcb %p; sock %d\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock)); + + /* ACK on connection is like ACK on data in pxtcp_pcb_sent() */ + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp); + + return ERR_OK; +} + + +/** + * tcp_recv() callback. + */ +static err_t +pxtcp_pcb_recv(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t error) +{ + struct pxtcp *pxtcp = (struct pxtcp *)arg; + + LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */ + LWIP_UNUSED_ARG(error); + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->pcb == pcb); + LWIP_ASSERT1(pcb->callback_arg == pxtcp); + LWIP_UNUSED_ARG(pcb); + + + /* + * Have we done sending previous batch? + */ + if (pxtcp->unsent != NULL) { + if (p != NULL) { + /* + * Return an error to tell TCP to hold onto that pbuf. + * It will be presented to us later from tcp_fasttmr(). + */ + return ERR_WOULDBLOCK; + } + else { + /* + * Unlike data, p == NULL indicating orderly shutdown is + * NOT presented to us again + */ + pxtcp->outbound_close = 1; + return ERR_OK; + } + } + + + /* + * Guest closed? + */ + if (p == NULL) { + pxtcp->outbound_close = 1; + pxtcp_pcb_forward_outbound_close(pxtcp); + return ERR_OK; + } + + + /* + * Got data, send what we can without blocking. + */ + return pxtcp_pcb_forward_outbound(pxtcp, p); +} + + +/** + * Guest half-closed its TX side of the connection. + * + * Called either immediately from pxtcp_pcb_recv() when it gets NULL, + * or from pxtcp_pcb_forward_outbound() when it finishes forwarding + * previously unsent data and sees pxtcp::outbound_close flag saved by + * pxtcp_pcb_recv(). + */ +static void +pxtcp_pcb_forward_outbound_close(struct pxtcp *pxtcp) +{ + struct tcp_pcb *pcb; + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->outbound_close); + LWIP_ASSERT1(!pxtcp->outbound_close_done); + + pcb = pxtcp->pcb; + LWIP_ASSERT1(pcb != NULL); + + DPRINTF(("outbound_close: pxtcp %p; pcb %p %s\n", + (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state))); + + + /* set the flag first, since shutdown() may trigger POLLHUP */ + pxtcp->outbound_close_done = 1; + shutdown(pxtcp->sock, SHUT_WR); /* half-close the socket */ + +#if !(HAVE_TCP_POLLHUP & POLLOUT) + /* + * We need to nudge poll manager manually, since OS will not + * report POLLHUP. + */ + if (pxtcp->inbound_close) { + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_DEL, pxtcp); + } +#endif + + + /* no more outbound data coming to us */ + tcp_recv(pcb, NULL); + + /* + * If we have already done inbound close previously (active close + * on the pcb), then we must not hold onto a pcb in TIME_WAIT + * state since those will be recycled by lwip when it runs out of + * free pcbs in the pool. + * + * The test is true also for a pcb in CLOSING state that waits + * just for the ACK of its FIN (to transition to TIME_WAIT). + */ + if (pxtcp_pcb_forward_inbound_done(pxtcp)) { + pxtcp_pcb_dissociate(pxtcp); + } +} + + +/** + * Forward outbound data from pcb to socket. + * + * Called by pxtcp_pcb_recv() to forward new data and by callout + * triggered by POLLOUT on the socket to send previously unsent data. + * + * (Re)scehdules one-time callout if not all data are sent. + */ +static err_t +pxtcp_pcb_forward_outbound(struct pxtcp *pxtcp, struct pbuf *p) +{ + struct pbuf *qs, *q; + size_t qoff; + size_t forwarded; + int sockerr; + + LWIP_ASSERT1(pxtcp->unsent == NULL || pxtcp->unsent == p); + + forwarded = 0; + sockerr = 0; + + q = NULL; + qoff = 0; + + qs = p; + while (qs != NULL) { + IOVEC iov[8]; + const size_t iovsize = sizeof(iov)/sizeof(iov[0]); + size_t fwd1; + ssize_t nsent; + size_t i; + + fwd1 = 0; + for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) { + LWIP_ASSERT1(q->len > 0); + IOVEC_SET_BASE(iov[i], q->payload); + IOVEC_SET_LEN(iov[i], q->len); + fwd1 += q->len; + } + + /* + * TODO: This is where application-level proxy can hook into + * to process outbound traffic. + */ + nsent = pxtcp_sock_send(pxtcp, iov, i); + + if (nsent == (ssize_t)fwd1) { + /* successfully sent this chain fragment completely */ + forwarded += nsent; + qs = q; + } + else if (nsent >= 0) { + /* successfully sent only some data */ + forwarded += nsent; + + /* find the first pbuf that was not completely forwarded */ + qoff = nsent; + for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) { + if (qoff < q->len) { + break; + } + qoff -= q->len; + } + LWIP_ASSERT1(q != NULL); + LWIP_ASSERT1(qoff < q->len); + break; + } + else { + sockerr = -nsent; + + /* + * Some errors are really not errors - if we get them, + * it's not different from getting nsent == 0, so filter + * them out here. + */ + if (proxy_error_is_transient(sockerr)) { + sockerr = 0; + } + q = qs; + qoff = 0; + break; + } + } + + if (forwarded > 0) { + DPRINTF2(("forward_outbound: pxtcp %p, pcb %p: sent %d bytes\n", + (void *)pxtcp, (void *)pxtcp->pcb, (int)forwarded)); + tcp_recved(pxtcp->pcb, (u16_t)forwarded); + } + + if (q == NULL) { /* everything is forwarded? */ + LWIP_ASSERT1(sockerr == 0); + LWIP_ASSERT1(forwarded == p->tot_len); + + pxtcp->unsent = NULL; + pbuf_free(p); + if (pxtcp->outbound_close) { + pxtcp_pcb_forward_outbound_close(pxtcp); + } + } + else { + if (q != p) { + /* free forwarded pbufs at the beginning of the chain */ + pbuf_ref(q); + pbuf_free(p); + } + if (qoff > 0) { + /* advance payload pointer past the forwarded part */ + pbuf_header(q, -(s16_t)qoff); + } + pxtcp->unsent = q; + DPRINTF2(("forward_outbound: pxtcp %p, pcb %p: kept %d bytes\n", + (void *)pxtcp, (void *)pxtcp->pcb, (int)q->tot_len)); + + /* + * Have sendmsg() failed? + * + * Connection reset will be detected by poll and + * pxtcp_schedule_reset() will be called. + * + * Otherwise something *really* unexpected must have happened, + * so we'd better abort. + */ + if (sockerr != 0 && sockerr != ECONNRESET) { + struct tcp_pcb *pcb = pxtcp->pcb; + DPRINTF2(("forward_outbound: pxtcp %p, pcb %p: %R[sockerr]\n", + (void *)pxtcp, (void *)pcb, sockerr)); + + pxtcp_pcb_dissociate(pxtcp); + + tcp_abort(pcb); + + /* call error callback manually since we've already dissociated */ + pxtcp_pcb_err((void *)pxtcp, ERR_ABRT); + return ERR_ABRT; + } + + /* schedule one-shot POLLOUT on the socket */ + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp); + } + return ERR_OK; +} + + +#if !defined(RT_OS_WINDOWS) +static ssize_t +pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen) +{ + struct msghdr mh; + ssize_t nsent; + +#ifdef MSG_NOSIGNAL + const int send_flags = MSG_NOSIGNAL; +#else + const int send_flags = 0; +#endif + + memset(&mh, 0, sizeof(mh)); + + mh.msg_iov = iov; + mh.msg_iovlen = iovlen; + + nsent = sendmsg(pxtcp->sock, &mh, send_flags); + if (nsent < 0) { + nsent = -SOCKERRNO(); + } + + return nsent; +} +#else /* RT_OS_WINDOWS */ +static ssize_t +pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen) +{ + DWORD nsent; + int status; + + status = WSASend(pxtcp->sock, iov, (DWORD)iovlen, &nsent, + 0, NULL, NULL); + if (status == SOCKET_ERROR) { + return -SOCKERRNO(); + } + + return nsent; +} +#endif /* RT_OS_WINDOWS */ + + +/** + * Callback from poll manager (on POLLOUT) to send data from + * pxtcp::unsent pbuf to socket. + */ +static void +pxtcp_pcb_write_outbound(void *ctx) +{ + struct pxtcp *pxtcp = (struct pxtcp *)ctx; + LWIP_ASSERT1(pxtcp != NULL); + + if (pxtcp->pcb == NULL) { + return; + } + + pxtcp_pcb_forward_outbound(pxtcp, pxtcp->unsent); +} + + +/** + * Common poll manager callback used by both outgoing and incoming + * (port-forwarded) connections that has connected socket. + */ +static int +pxtcp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + int status; + int sockerr; + RT_NOREF(fd); + + pxtcp = (struct pxtcp *)handler->data; + LWIP_ASSERT1(handler == &pxtcp->pmhdl); + LWIP_ASSERT1(fd == pxtcp->sock); + + if (revents & POLLNVAL) { + pxtcp->sock = INVALID_SOCKET; + return pxtcp_schedule_reset(pxtcp); + } + + if (revents & POLLERR) { + socklen_t optlen = (socklen_t)sizeof(sockerr); + + status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR, + (char *)&sockerr, &optlen); + if (status == SOCKET_ERROR) { /* should not happen */ + DPRINTF(("sock %d: POLLERR: SO_ERROR failed: %R[sockerr]\n", + fd, SOCKERRNO())); + } + else { + DPRINTF0(("sock %d: POLLERR: %R[sockerr]\n", fd, sockerr)); + } + return pxtcp_schedule_reset(pxtcp); + } + + if (revents & POLLOUT) { + pxtcp->events &= ~POLLOUT; + proxy_lwip_post(&pxtcp->msg_outbound); + } + + if (revents & POLLIN) { + ssize_t nread; + int stop_pollin; + + nread = pxtcp_sock_read(pxtcp, &stop_pollin); + if (nread < 0) { + sockerr = -(int)nread; + DPRINTF0(("sock %d: POLLIN: %R[sockerr]\n", fd, sockerr)); + return pxtcp_schedule_reset(pxtcp); + } + + if (stop_pollin) { + pxtcp->events &= ~POLLIN; + } + + if (nread > 0) { + proxy_lwip_post(&pxtcp->msg_inbound); +#if !HAVE_TCP_POLLHUP + /* + * If host does not report POLLHUP for closed sockets + * (e.g. NetBSD) we should check for full close manually. + */ + if (pxtcp->inbound_close && pxtcp->outbound_close_done) { + LWIP_ASSERT1((revents & POLLHUP) == 0); + return pxtcp_schedule_delete(pxtcp); + } +#endif + } + } + +#if !HAVE_TCP_POLLHUP + LWIP_ASSERT1((revents & POLLHUP) == 0); +#else + if (revents & POLLHUP) { + DPRINTF(("sock %d: HUP\n", fd)); + +#if HAVE_TCP_POLLHUP == POLLIN + /* + * XXX: OSX reports POLLHUP once more when inbound is already + * half-closed (which has already been reported as a "normal" + * POLLHUP, handled below), the socket is polled for POLLOUT + * (guest sends a lot of data that we can't push out fast + * enough), and remote sends a reset - e.g. an http client + * that half-closes after request and then aborts the transfer. + * + * It really should have been reported as POLLERR, but it + * seems OSX never reports POLLERR for sockets. + */ +#if defined(RT_OS_DARWIN) + { + socklen_t optlen = (socklen_t)sizeof(sockerr); + + status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR, + (char *)&sockerr, &optlen); + if (status == SOCKET_ERROR) { /* should not happen */ + DPRINTF(("sock %d: POLLHUP: SO_ERROR failed: %R[sockerr]\n", + fd, SOCKERRNO())); + sockerr = ECONNRESET; + } + else if (sockerr != 0) { + DPRINTF0(("sock %d: POLLHUP: %R[sockerr]\n", fd, sockerr)); + } + + if (sockerr != 0) { /* XXX: should have been POLLERR */ + return pxtcp_schedule_reset(pxtcp); + } + } +#endif /* RT_OS_DARWIN */ + + /* + * Remote closed inbound. + */ + if (!pxtcp->outbound_close_done) { + /* + * We might still need to poll for POLLOUT, but we can not + * poll for POLLIN anymore (even if not all data are read) + * because we will be spammed by POLLHUP. + */ + pxtcp->events &= ~POLLIN; + if (!pxtcp->inbound_close) { + /* the rest of the input has to be pulled */ + proxy_lwip_post(&pxtcp->msg_inpull); + } + } + else +#endif + /* + * Both directions are closed. + */ + { + LWIP_ASSERT1(pxtcp->outbound_close_done); + + if (pxtcp->inbound_close) { + /* there's no unread data, we are done */ + return pxtcp_schedule_delete(pxtcp); + } + else { + /* pull the rest of the input first (deferred_delete) */ + pxtcp->pmhdl.slot = -1; + proxy_lwip_post(&pxtcp->msg_inpull); + return -1; + } + /* NOTREACHED */ + } + + } +#endif /* HAVE_TCP_POLLHUP */ + + return pxtcp->events; +} + + +/** + * Read data from socket to ringbuf. This may be used both on lwip + * and poll manager threads. + * + * Flag pointed to by pstop is set when further reading is impossible, + * either temporary when buffer is full, or permanently when EOF is + * received. + * + * Returns number of bytes read. NB: EOF is reported as 1! + * + * Returns zero if nothing was read, either because buffer is full, or + * if no data is available (EWOULDBLOCK, EINTR &c). + * + * Returns -errno on real socket errors. + */ +static ssize_t +pxtcp_sock_read(struct pxtcp *pxtcp, int *pstop) +{ + IOVEC iov[2]; + size_t iovlen; + ssize_t nread; + + const size_t sz = pxtcp->inbuf.bufsize; + size_t beg, lim, wrnew; + + *pstop = 0; + + beg = pxtcp->inbuf.vacant; + IOVEC_SET_BASE(iov[0], &pxtcp->inbuf.buf[beg]); + + /* lim is the index we can NOT write to */ + lim = pxtcp->inbuf.unacked; + if (lim == 0) { + lim = sz - 1; /* empty slot at the end */ + } + else if (lim == 1 && beg != 0) { + lim = sz; /* empty slot at the beginning */ + } + else { + --lim; + } + + if (beg == lim) { + /* + * Buffer is full, stop polling for POLLIN. + * + * pxtcp_pcb_sent() will re-enable POLLIN when guest ACKs + * data, freeing space in the ring buffer. + */ + *pstop = 1; + return 0; + } + + if (beg < lim) { + /* free space in one chunk */ + iovlen = 1; + IOVEC_SET_LEN(iov[0], lim - beg); + } + else { + /* free space in two chunks */ + iovlen = 2; + IOVEC_SET_LEN(iov[0], sz - beg); + IOVEC_SET_BASE(iov[1], &pxtcp->inbuf.buf[0]); + IOVEC_SET_LEN(iov[1], lim); + } + + /* + * TODO: This is where application-level proxy can hook into to + * process inbound traffic. + */ + nread = pxtcp_sock_recv(pxtcp, iov, iovlen); + + if (nread > 0) { + wrnew = beg + nread; + if (wrnew >= sz) { + wrnew -= sz; + } + pxtcp->inbuf.vacant = wrnew; + DPRINTF2(("pxtcp %p: sock %d read %d bytes\n", + (void *)pxtcp, pxtcp->sock, (int)nread)); + return nread; + } + else if (nread == 0) { + *pstop = 1; + pxtcp->inbound_close = 1; + DPRINTF2(("pxtcp %p: sock %d read EOF\n", + (void *)pxtcp, pxtcp->sock)); + return 1; + } + else { + int sockerr = -nread; + + if (proxy_error_is_transient(sockerr)) { + /* haven't read anything, just return */ + DPRINTF2(("pxtcp %p: sock %d read cancelled\n", + (void *)pxtcp, pxtcp->sock)); + return 0; + } + else { + /* socket error! */ + DPRINTF0(("pxtcp %p: sock %d read: %R[sockerr]\n", + (void *)pxtcp, pxtcp->sock, sockerr)); + return -sockerr; + } + } +} + + +#if !defined(RT_OS_WINDOWS) +static ssize_t +pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen) +{ + struct msghdr mh; + ssize_t nread; + + memset(&mh, 0, sizeof(mh)); + + mh.msg_iov = iov; + mh.msg_iovlen = iovlen; + + nread = recvmsg(pxtcp->sock, &mh, 0); + if (nread < 0) { + nread = -SOCKERRNO(); + } + + return nread; +} +#else /* RT_OS_WINDOWS */ +static ssize_t +pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen) +{ + DWORD flags; + DWORD nread; + int status; + + flags = 0; + status = WSARecv(pxtcp->sock, iov, (DWORD)iovlen, &nread, + &flags, NULL, NULL); + if (status == SOCKET_ERROR) { + return -SOCKERRNO(); + } + + return (ssize_t)nread; +} +#endif /* RT_OS_WINDOWS */ + + +/** + * Callback from poll manager (pxtcp::msg_inbound) to trigger output + * from ringbuf to guest. + */ +static void +pxtcp_pcb_write_inbound(void *ctx) +{ + struct pxtcp *pxtcp = (struct pxtcp *)ctx; + LWIP_ASSERT1(pxtcp != NULL); + + if (pxtcp->pcb == NULL) { + return; + } + + pxtcp_pcb_forward_inbound(pxtcp); +} + + +/** + * tcp_poll() callback + * + * We swtich it on when tcp_write() or tcp_shutdown() fail with + * ERR_MEM to prevent connection from stalling. If there are ACKs or + * more inbound data then pxtcp_pcb_forward_inbound() will be + * triggered again, but if neither happens, tcp_poll() comes to the + * rescue. + */ +static err_t +pxtcp_pcb_poll(void *arg, struct tcp_pcb *pcb) +{ + struct pxtcp *pxtcp = (struct pxtcp *)arg; + LWIP_UNUSED_ARG(pcb); + + DPRINTF2(("%s: pxtcp %p; pcb %p\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb)); + + pxtcp_pcb_forward_inbound(pxtcp); + + /* + * If the last thing holding up deletion of the pxtcp was failed + * tcp_shutdown() and it succeeded, we may be the last callback. + */ + pxtcp_pcb_maybe_deferred_delete(pxtcp); + + return ERR_OK; +} + + +static void +pxtcp_pcb_schedule_poll(struct pxtcp *pxtcp) +{ + tcp_poll(pxtcp->pcb, pxtcp_pcb_poll, 0); +} + + +static void +pxtcp_pcb_cancel_poll(struct pxtcp *pxtcp) +{ + tcp_poll(pxtcp->pcb, NULL, 255); +} + + +/** + * Forward inbound data from ring buffer to the guest. + * + * Scheduled by poll manager thread after it receives more data into + * the ring buffer (we have more data to send). + + * Also called from tcp_sent() callback when guest ACKs some data, + * increasing pcb->snd_buf (we are permitted to send more data). + * + * Also called from tcp_poll() callback if previous attempt to forward + * inbound data failed with ERR_MEM (we need to try again). + */ +static void +pxtcp_pcb_forward_inbound(struct pxtcp *pxtcp) +{ + struct tcp_pcb *pcb; + size_t sndbuf; + size_t beg, lim, sndlim; + size_t toeob, tolim; + size_t nsent; + err_t error; + + LWIP_ASSERT1(pxtcp != NULL); + pcb = pxtcp->pcb; + if (pcb == NULL) { + return; + } + + if (/* __predict_false */ pcb->state < ESTABLISHED) { + /* + * If we have just confirmed accept of this connection, the + * pcb is in SYN_RCVD state and we still haven't received the + * ACK of our SYN. It's only in SYN_RCVD -> ESTABLISHED + * transition that lwip decrements pcb->acked so that that ACK + * is not reported to pxtcp_pcb_sent(). If we send something + * now and immediately close (think "daytime", e.g.) while + * still in SYN_RCVD state, we will move directly to + * FIN_WAIT_1 and when our confirming SYN is ACK'ed lwip will + * report it to pxtcp_pcb_sent(). + */ + DPRINTF2(("forward_inbound: pxtcp %p; pcb %p %s - later...\n", + (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state))); + return; + } + + + beg = pxtcp->inbuf.unsent; /* private to lwip thread */ + lim = pxtcp->inbuf.vacant; + + if (beg == lim) { + if (pxtcp->inbound_close && !pxtcp->inbound_close_done) { + pxtcp_pcb_forward_inbound_close(pxtcp); + tcp_output(pcb); + return; + } + + /* + * Else, there's no data to send. + * + * If there is free space in the buffer, producer will + * reschedule us as it receives more data and vacant (lim) + * advances. + * + * If buffer is full when all data have been passed to + * tcp_write() but not yet acknowledged, we will advance + * unacked on ACK, freeing some space for producer to write to + * (then see above). + */ + return; + } + + sndbuf = tcp_sndbuf(pcb); + if (sndbuf == 0) { + /* + * Can't send anything now. As guest ACKs some data, TCP will + * call pxtcp_pcb_sent() callback and we will come here again. + */ + return; + } + + nsent = 0; + + /* + * We have three limits to consider: + * - how much data we have in the ringbuf + * - how much data we are allowed to send + * - ringbuf size + */ + toeob = pxtcp->inbuf.bufsize - beg; + if (lim < beg) { /* lim wrapped */ + if (sndbuf < toeob) { /* but we are limited by sndbuf */ + /* so beg is not going to wrap, treat sndbuf as lim */ + lim = beg + sndbuf; /* ... and proceed to the simple case */ + } + else { /* we are limited by the end of the buffer, beg will wrap */ + u8_t maybemore; + if (toeob == sndbuf || lim == 0) { + maybemore = 0; + } + else { + maybemore = TCP_WRITE_FLAG_MORE; + } + + Assert(toeob == (u16_t)toeob); + error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)toeob, maybemore); + if (error != ERR_OK) { + goto writeerr; + } + nsent += toeob; + pxtcp->inbuf.unsent = 0; /* wrap */ + + if (maybemore) { + beg = 0; + sndbuf -= toeob; + } + else { + /* we are done sending, but ... */ + goto check_inbound_close; + } + } + } + + LWIP_ASSERT1(beg < lim); + sndlim = beg + sndbuf; + if (lim > sndlim) { + lim = sndlim; + } + tolim = lim - beg; + if (tolim > 0) { + error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)tolim, 0); + if (error != ERR_OK) { + goto writeerr; + } + nsent += tolim; + pxtcp->inbuf.unsent = lim; + } + + check_inbound_close: + if (pxtcp->inbound_close && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant) { + pxtcp_pcb_forward_inbound_close(pxtcp); + } + + DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes\n", + (void *)pxtcp, (void *)pcb, (int)nsent)); + tcp_output(pcb); + pxtcp_pcb_cancel_poll(pxtcp); + return; + + writeerr: + if (error == ERR_MEM) { + if (nsent > 0) { /* first write succeeded, second failed */ + DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes only\n", + (void *)pxtcp, (void *)pcb, (int)nsent)); + tcp_output(pcb); + } + DPRINTF(("forward_inbound: pxtcp %p, pcb %p: ERR_MEM\n", + (void *)pxtcp, (void *)pcb)); + pxtcp_pcb_schedule_poll(pxtcp); + } + else { + DPRINTF(("forward_inbound: pxtcp %p, pcb %p: %s\n", + (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error))); + + /* XXX: We shouldn't get ERR_ARG. Check ERR_CONN conditions early? */ + LWIP_ASSERT1(error == ERR_MEM); + } +} + + +static void +pxtcp_pcb_forward_inbound_close(struct pxtcp *pxtcp) +{ + struct tcp_pcb *pcb; + err_t error; + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->inbound_close); + LWIP_ASSERT1(!pxtcp->inbound_close_done); + LWIP_ASSERT1(pxtcp->inbuf.unsent == pxtcp->inbuf.vacant); + + pcb = pxtcp->pcb; + LWIP_ASSERT1(pcb != NULL); + + DPRINTF(("inbound_close: pxtcp %p; pcb %p: %s\n", + (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state))); + + error = tcp_shutdown(pcb, /*RX*/ 0, /*TX*/ 1); + if (error != ERR_OK) { + DPRINTF(("inbound_close: pxtcp %p; pcb %p:" + " tcp_shutdown: error=%s\n", + (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error))); + pxtcp_pcb_schedule_poll(pxtcp); + return; + } + + pxtcp_pcb_cancel_poll(pxtcp); + pxtcp->inbound_close_done = 1; + + + /* + * If we have already done outbound close previously (passive + * close on the pcb), then we must not hold onto a pcb in LAST_ACK + * state since those will be deleted by lwip when that last ack + * comes from the guest. + * + * NB: We do NOT check for deferred delete here, even though we + * have just set one of its conditions, inbound_close_done. We + * let pcb callbacks that called us do that. It's simpler and + * cleaner that way. + */ + if (pxtcp->outbound_close_done && pxtcp_pcb_forward_inbound_done(pxtcp)) { + pxtcp_pcb_dissociate(pxtcp); + } +} + + +/** + * Check that all forwarded inbound data is sent and acked, and that + * inbound close is scheduled (we aren't called back when it's acked). + */ +DECLINLINE(int) +pxtcp_pcb_forward_inbound_done(const struct pxtcp *pxtcp) +{ + return (pxtcp->inbound_close_done /* also implies that all data forwarded */ + && pxtcp->inbuf.unacked == pxtcp->inbuf.unsent); +} + + +/** + * tcp_sent() callback - guest acknowledged len bytes. + * + * We can advance inbuf::unacked index, making more free space in the + * ringbuf and wake up producer on poll manager thread. + * + * We can also try to send more data if we have any since pcb->snd_buf + * was increased and we are now permitted to send more. + */ +static err_t +pxtcp_pcb_sent(void *arg, struct tcp_pcb *pcb, u16_t len) +{ + struct pxtcp *pxtcp = (struct pxtcp *)arg; + size_t unacked; + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->pcb == pcb); + LWIP_ASSERT1(pcb->callback_arg == pxtcp); + LWIP_UNUSED_ARG(pcb); /* only in assert */ + + DPRINTF2(("%s: pxtcp %p; pcb %p: +%d ACKed:" + " unacked %d, unsent %d, vacant %d\n", + __func__, (void *)pxtcp, (void *)pcb, (int)len, + (int)pxtcp->inbuf.unacked, + (int)pxtcp->inbuf.unsent, + (int)pxtcp->inbuf.vacant)); + + if (/* __predict_false */ len == 0) { + /* we are notified to start pulling */ + LWIP_ASSERT1(!pxtcp->inbound_close); + LWIP_ASSERT1(pxtcp->inbound_pull); + + unacked = pxtcp->inbuf.unacked; + } + else { + /* + * Advance unacked index. Guest acknowledged the data, so it + * won't be needed again for potential retransmits. + */ + unacked = pxtcp->inbuf.unacked + len; + if (unacked > pxtcp->inbuf.bufsize) { + unacked -= pxtcp->inbuf.bufsize; + } + pxtcp->inbuf.unacked = unacked; + } + + /* arrange for more inbound data */ + if (!pxtcp->inbound_close) { + if (!pxtcp->inbound_pull) { + /* wake up producer, in case it has stopped polling for POLLIN */ + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp); +#ifdef RT_OS_WINDOWS + /** + * We have't got enought room in ring buffer to read atm, + * but we don't want to lose notification from WSAW4ME when + * space would be available, so we reset event with empty recv + */ + recv(pxtcp->sock, NULL, 0, 0); +#endif + } + else { + ssize_t nread; + int stop_pollin; /* ignored */ + + nread = pxtcp_sock_read(pxtcp, &stop_pollin); + + if (nread < 0) { + int sockerr = -(int)nread; + LWIP_UNUSED_ARG(sockerr); + DPRINTF0(("%s: sock %d: %R[sockerr]\n", + __func__, pxtcp->sock, sockerr)); + +#if HAVE_TCP_POLLHUP == POLLIN /* see counterpart in pxtcp_pmgr_pump() */ + /* + * It may still be registered with poll manager for POLLOUT. + */ + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp); + return ERR_OK; +#else + /* + * It is no longer registered with poll manager so we + * can kill it directly. + */ + pxtcp_pcb_reset_pxtcp(pxtcp); + return ERR_ABRT; +#endif + } + } + } + + /* forward more data if we can */ + if (!pxtcp->inbound_close_done) { + pxtcp_pcb_forward_inbound(pxtcp); + + /* + * NB: we might have dissociated from a pcb that transitioned + * to LAST_ACK state, so don't refer to pcb below. + */ + } + + + /* have we got all the acks? */ + if (pxtcp->inbound_close /* no more new data */ + && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant /* all data is sent */ + && unacked == pxtcp->inbuf.unsent) /* ... and is acked */ + { + char *buf; + + DPRINTF(("%s: pxtcp %p; pcb %p; all data ACKed\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb)); + + /* no more retransmits, so buf is not needed */ + buf = pxtcp->inbuf.buf; + pxtcp->inbuf.buf = NULL; + free(buf); + + /* no more acks, so no more callbacks */ + if (pxtcp->pcb != NULL) { + tcp_sent(pxtcp->pcb, NULL); + } + + /* + * We may be the last callback for this pcb if we have also + * successfully forwarded inbound_close. + */ + pxtcp_pcb_maybe_deferred_delete(pxtcp); + } + + return ERR_OK; +} + + +#if HAVE_TCP_POLLHUP +/** + * Callback from poll manager (pxtcp::msg_inpull) to switch + * pxtcp_pcb_sent() to actively pull the last bits of input. See + * POLLHUP comment in pxtcp_pmgr_pump(). + * + * pxtcp::sock is deregistered from poll manager after this callback + * is scheduled. + */ +static void +pxtcp_pcb_pull_inbound(void *ctx) +{ + struct pxtcp *pxtcp = (struct pxtcp *)ctx; + LWIP_ASSERT1(pxtcp != NULL); + + if (pxtcp->pcb == NULL) { + DPRINTF(("%s: pxtcp %p: PCB IS GONE\n", __func__, (void *)pxtcp)); + pxtcp_pcb_reset_pxtcp(pxtcp); + return; + } + + pxtcp->inbound_pull = 1; + if (pxtcp->pmhdl.slot < 0) { + DPRINTF(("%s: pxtcp %p: pcb %p (deferred delete)\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb)); + pxtcp->deferred_delete = 1; + } + else { + DPRINTF(("%s: pxtcp %p: pcb %p\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb)); + } + + pxtcp_pcb_sent(pxtcp, pxtcp->pcb, 0); +} +#endif /* HAVE_TCP_POLLHUP */ + + +/** + * tcp_err() callback. + * + * pcb is not passed to this callback since it may be already + * deallocated by the stack, but we can't do anything useful with it + * anyway since connection is gone. + */ +static void +pxtcp_pcb_err(void *arg, err_t error) +{ + struct pxtcp *pxtcp = (struct pxtcp *)arg; + LWIP_ASSERT1(pxtcp != NULL); + + /* + * ERR_CLSD is special - it is reported here when: + * + * . guest has already half-closed + * . we send FIN to guest when external half-closes + * . guest acks that FIN + * + * Since connection is closed but receive has been already closed + * lwip can only report this via tcp_err. At this point the pcb + * is still alive, so we can peek at it if need be. + * + * The interesting twist is when the ACK from guest that akcs our + * FIN also acks some data. In this scenario lwip will NOT call + * tcp_sent() callback with the ACK for that last bit of data but + * instead will call tcp_err with ERR_CLSD right away. Since that + * ACK also acknowledges all the data, we should run some of + * pxtcp_pcb_sent() logic here. + */ + if (error == ERR_CLSD) { + struct tcp_pcb *pcb = pxtcp->pcb; /* still alive */ + + DPRINTF2(("ERR_CLSD: pxtcp %p; pcb %p:" + " pcb->acked %d;" + " unacked %d, unsent %d, vacant %d\n", + (void *)pxtcp, (void *)pcb, + pcb->acked, + (int)pxtcp->inbuf.unacked, + (int)pxtcp->inbuf.unsent, + (int)pxtcp->inbuf.vacant)); + + LWIP_ASSERT1(pxtcp->pcb == pcb); + LWIP_ASSERT1(pcb->callback_arg == pxtcp); + + if (pcb->acked > 0) { + pxtcp_pcb_sent(pxtcp, pcb, pcb->acked); + } + return; + } + + DPRINTF0(("tcp_err: pxtcp=%p, error=%s\n", + (void *)pxtcp, proxy_lwip_strerr(error))); + + pxtcp->pcb = NULL; /* pcb is gone */ + if (pxtcp->deferred_delete) { + pxtcp_pcb_reset_pxtcp(pxtcp); + } + else { + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp); + } +} diff --git a/src/VBox/NetworkServices/NAT/pxtcp.h b/src/VBox/NetworkServices/NAT/pxtcp.h new file mode 100644 index 00000000..6e182411 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxtcp.h @@ -0,0 +1,52 @@ +/* $Id: pxtcp.h $ */ +/** @file + * NAT Network - TCP proxy, internal interface declarations. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NAT_pxtcp_h +#define VBOX_INCLUDED_SRC_NAT_pxtcp_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include "lwip/err.h" +#include "lwip/ip_addr.h" + +struct pbuf; +struct tcp_pcb; +struct pxtcp; +struct fwspec; + +err_t pxtcp_pcb_accept_outbound(struct tcp_pcb *, struct pbuf *, int, ipX_addr_t *, u16_t); + +struct pxtcp *pxtcp_create_forwarded(SOCKET); +void pxtcp_cancel_forwarded(struct pxtcp *); + +void pxtcp_pcb_connect(struct pxtcp *, const struct fwspec *); + +int pxtcp_pmgr_add(struct pxtcp *); +void pxtcp_pmgr_del(struct pxtcp *); + +#endif /* !VBOX_INCLUDED_SRC_NAT_pxtcp_h */ diff --git a/src/VBox/NetworkServices/NAT/pxudp.c b/src/VBox/NetworkServices/NAT/pxudp.c new file mode 100644 index 00000000..1c8fab12 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxudp.c @@ -0,0 +1,858 @@ +/* $Id: pxudp.c $ */ +/** @file + * NAT Network - UDP proxy. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "winutils.h" +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "pxremap.h" + +#ifndef RT_OS_WINDOWS +#include <sys/types.h> +#include <sys/socket.h> +#ifdef RT_OS_DARWIN +# define __APPLE_USE_RFC_3542 +#endif +#include <netinet/in.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <poll.h> + +#include <err.h> /* BSD'ism */ +#else +#include <stdlib.h> +#include <iprt/stdint.h> +#include <stdio.h> +#include "winpoll.h" +#endif + +#include "lwip/opt.h" + +#include "lwip/sys.h" +#include "lwip/tcpip.h" +#include "lwip/udp.h" +#include "lwip/icmp.h" + +struct pxudp { + /** + * Our poll manager handler. + */ + struct pollmgr_handler pmhdl; + + /** + * lwIP ("internal") side of the proxied connection. + */ + struct udp_pcb *pcb; + + /** + * Host ("external") side of the proxied connection. + */ + SOCKET sock; + + /** + * Is this pcb a mapped host loopback? + */ + int is_mapped; + + /** + * Cached value of TTL socket option. + */ + int ttl; + + /** + * Cached value of TOS socket option. + */ + int tos; + + /** + * Cached value of "don't fragment" socket option. + */ + int df; + + /** + * For some protocols (notably: DNS) we know we are getting just + * one reply, so we don't want the pcb and the socket to sit there + * waiting to be g/c'ed by timeout. This field counts request and + * replies for them. + */ + int count; + + /** + * Mailbox for inbound pbufs. + * + * XXX: since we have single producer and single consumer we can + * use lockless ringbuf like for pxtcp. + */ + sys_mbox_t inmbox; + + /** + * lwIP thread's strong reference to us. + */ + struct pollmgr_refptr *rp; + + /* + * We use static messages to void malloc/free overhead. + */ + struct tcpip_msg msg_delete; /* delete pxudp */ + struct tcpip_msg msg_inbound; /* trigger send of inbound data */ +}; + + +static struct pxudp *pxudp_allocate(void); +static void pxudp_drain_inmbox(struct pxudp *); +static void pxudp_free(struct pxudp *); + +static struct udp_pcb *pxudp_pcb_dissociate(struct pxudp *); + +/* poll manager callbacks for pxudp related channels */ +static int pxudp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int); +static int pxudp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int); + +/* helper functions for sending/receiving pxudp over poll manager channels */ +static ssize_t pxudp_chan_send(enum pollmgr_slot_t, struct pxudp *); +static ssize_t pxudp_chan_send_weak(enum pollmgr_slot_t, struct pxudp *); +static struct pxudp *pxudp_chan_recv(struct pollmgr_handler *, SOCKET, int); +static struct pxudp *pxudp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int); + +/* poll manager callbacks for individual sockets */ +static int pxudp_pmgr_pump(struct pollmgr_handler *, SOCKET, int); + +/* convenience function for poll manager callback */ +static int pxudp_schedule_delete(struct pxudp *); + +/* lwip thread callbacks called via proxy_lwip_post() */ +static void pxudp_pcb_delete_pxudp(void *); + +/* outbound ttl check */ +static int pxudp_ttl_expired(struct pbuf *); + +/* udp pcb callbacks &c */ +static void pxudp_pcb_accept(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t); +static void pxudp_pcb_recv(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t); +static void pxudp_pcb_forward_outbound(struct pxudp *, struct pbuf *, ip_addr_t *, u16_t); +static void pxudp_pcb_expired(struct pxudp *); +static void pxudp_pcb_write_inbound(void *); +static void pxudp_pcb_forward_inbound(struct pxudp *); + +/* poll manager handlers for pxudp channels */ +static struct pollmgr_handler pxudp_pmgr_chan_add_hdl; +static struct pollmgr_handler pxudp_pmgr_chan_del_hdl; + + +void +pxudp_init(void) +{ + /* + * Create channels. + */ + pxudp_pmgr_chan_add_hdl.callback = pxudp_pmgr_chan_add; + pxudp_pmgr_chan_add_hdl.data = NULL; + pxudp_pmgr_chan_add_hdl.slot = -1; + pollmgr_add_chan(POLLMGR_CHAN_PXUDP_ADD, &pxudp_pmgr_chan_add_hdl); + + pxudp_pmgr_chan_del_hdl.callback = pxudp_pmgr_chan_del; + pxudp_pmgr_chan_del_hdl.data = NULL; + pxudp_pmgr_chan_del_hdl.slot = -1; + pollmgr_add_chan(POLLMGR_CHAN_PXUDP_DEL, &pxudp_pmgr_chan_del_hdl); + + udp_proxy_accept(pxudp_pcb_accept); +} + + +/** + * Syntactic sugar for sending pxudp pointer over poll manager + * channel. Used by lwip thread functions. + */ +static ssize_t +pxudp_chan_send(enum pollmgr_slot_t chan, struct pxudp *pxudp) +{ + return pollmgr_chan_send(chan, &pxudp, sizeof(pxudp)); +} + + +/** + * Syntactic sugar for sending weak reference to pxudp over poll + * manager channel. Used by lwip thread functions. + */ +static ssize_t +pxudp_chan_send_weak(enum pollmgr_slot_t chan, struct pxudp *pxudp) +{ + pollmgr_refptr_weak_ref(pxudp->rp); + return pollmgr_chan_send(chan, &pxudp->rp, sizeof(pxudp->rp)); +} + + +/** + * Counterpart of pxudp_chan_send(). + */ +static struct pxudp * +pxudp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxudp *pxudp; + + pxudp = (struct pxudp *)pollmgr_chan_recv_ptr(handler, fd, revents); + return pxudp; +} + + +/** + * Counterpart of pxudp_chan_send_weak(). + */ +struct pxudp * +pxudp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pollmgr_refptr *rp; + struct pollmgr_handler *base; + struct pxudp *pxudp; + + rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents); + base = (struct pollmgr_handler *)pollmgr_refptr_get(rp); + pxudp = (struct pxudp *)base; + + return pxudp; +} + + +/** + * POLLMGR_CHAN_PXUDP_ADD handler. + * + * Get new pxudp from lwip thread and start polling its socket. + */ +static int +pxudp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxudp *pxudp; + int status; + + pxudp = pxudp_chan_recv(handler, fd, revents); + DPRINTF(("pxudp_add: new pxudp %p; pcb %p\n", + (void *)pxudp, (void *)pxudp->pcb)); + + LWIP_ASSERT1(pxudp != NULL); + LWIP_ASSERT1(pxudp->pmhdl.callback != NULL); + LWIP_ASSERT1(pxudp->pmhdl.data = (void *)pxudp); + LWIP_ASSERT1(pxudp->pmhdl.slot < 0); + + + status = pollmgr_add(&pxudp->pmhdl, pxudp->sock, POLLIN); + if (status < 0) { + pxudp_schedule_delete(pxudp); + } + + return POLLIN; +} + + +/** + * POLLMGR_CHAN_PXUDP_DEL handler. + */ +static int +pxudp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxudp *pxudp; + + pxudp = pxudp_chan_recv_strong(handler, fd, revents); + if (pxudp == NULL) { + return POLLIN; + } + + DPRINTF(("pxudp_del: pxudp %p; socket %d\n", (void *)pxudp, pxudp->sock)); + + pollmgr_del_slot(pxudp->pmhdl.slot); + + /* + * Go back to lwip thread to delete after any pending callbacks + * for unprocessed inbound traffic are drained. + */ + pxudp_schedule_delete(pxudp); + + return POLLIN; +} + + +static struct pxudp * +pxudp_allocate(void) +{ + struct pxudp *pxudp; + err_t error; + + pxudp = (struct pxudp *)malloc(sizeof(*pxudp)); + if (pxudp == NULL) { + return NULL; + } + + pxudp->pmhdl.callback = NULL; + pxudp->pmhdl.data = (void *)pxudp; + pxudp->pmhdl.slot = -1; + + pxudp->pcb = NULL; + pxudp->sock = INVALID_SOCKET; + pxudp->df = -1; + pxudp->ttl = -1; + pxudp->tos = -1; + pxudp->count = 0; + + pxudp->rp = pollmgr_refptr_create(&pxudp->pmhdl); + if (pxudp->rp == NULL) { + free(pxudp); + return NULL; + } + + error = sys_mbox_new(&pxudp->inmbox, 16); + if (error != ERR_OK) { + pollmgr_refptr_unref(pxudp->rp); + free(pxudp); + return NULL; + } + +#define CALLBACK_MSG(MSG, FUNC) \ + do { \ + pxudp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \ + pxudp->MSG.sem = NULL; \ + pxudp->MSG.msg.cb.function = FUNC; \ + pxudp->MSG.msg.cb.ctx = (void *)pxudp; \ + } while (0) + + CALLBACK_MSG(msg_delete, pxudp_pcb_delete_pxudp); + CALLBACK_MSG(msg_inbound, pxudp_pcb_write_inbound); + + return pxudp; +} + + +static void +pxudp_drain_inmbox(struct pxudp *pxudp) +{ + void *ptr; + + if (!sys_mbox_valid(&pxudp->inmbox)) { + return; + } + + while (sys_mbox_tryfetch(&pxudp->inmbox, &ptr) != SYS_MBOX_EMPTY) { + struct pbuf *p = (struct pbuf *)ptr; + pbuf_free(p); + } + + sys_mbox_free(&pxudp->inmbox); + sys_mbox_set_invalid(&pxudp->inmbox); +} + + +static void +pxudp_free(struct pxudp *pxudp) +{ + pxudp_drain_inmbox(pxudp); + free(pxudp); +} + + +/** + * Dissociate pxudp and its udp_pcb. + * + * Unlike its TCP cousin returns the pcb since UDP pcbs need to be + * actively deleted, so save callers the trouble of saving a copy + * before calling us. + */ +static struct udp_pcb * +pxudp_pcb_dissociate(struct pxudp *pxudp) +{ + struct udp_pcb *pcb; + + if (pxudp == NULL || pxudp->pcb == NULL) { + return NULL; + } + + pcb = pxudp->pcb; + + udp_recv(pxudp->pcb, NULL, NULL); + pxudp->pcb = NULL; + + return pcb; +} + + +/** + * Lwip thread callback invoked via pxudp::msg_delete + * + * Since we use static messages to communicate to the lwip thread, we + * cannot delete pxudp without making sure there are no unprocessed + * messages in the lwip thread mailbox. + * + * The easiest way to ensure that is to send this "delete" message as + * the last one and when it's processed we know there are no more and + * it's safe to delete pxudp. + * + * Channel callback should use pxudp_schedule_delete() convenience + * function defined below. + */ +static void +pxudp_pcb_delete_pxudp(void *arg) +{ + struct pxudp *pxudp = (struct pxudp *)arg; + struct udp_pcb *pcb; + + LWIP_ASSERT1(pxudp != NULL); + + if (pxudp->sock != INVALID_SOCKET) { + closesocket(pxudp->sock); + pxudp->sock = INVALID_SOCKET; + } + + pcb = pxudp_pcb_dissociate(pxudp); + if (pcb != NULL) { + udp_remove(pcb); + } + + pollmgr_refptr_unref(pxudp->rp); + pxudp_free(pxudp); +} + + +/** + * Poll manager callback should use this convenience wrapper to + * schedule pxudp deletion on the lwip thread and to deregister from + * the poll manager. + */ +static int +pxudp_schedule_delete(struct pxudp *pxudp) +{ + /* + * If pollmgr_refptr_get() is called by any channel before + * scheduled deletion happens, let them know we are gone. + */ + pxudp->pmhdl.slot = -1; + + /* + * Schedule deletion. Since poll manager thread may be pre-empted + * right after we send the message, the deletion may actually + * happen on the lwip thread before we return from this function, + * so it's not safe to refer to pxudp after this call. + */ + proxy_lwip_post(&pxudp->msg_delete); + + /* tell poll manager to deregister us */ + return -1; +} + + +/** + * Outbound TTL/HOPL check. + */ +static int +pxudp_ttl_expired(struct pbuf *p) +{ + int ttl; + + if (ip_current_is_v6()) { + ttl = IP6H_HOPLIM(ip6_current_header()); + } + else { + ttl = IPH_TTL(ip_current_header()); + } + + if (RT_UNLIKELY(ttl <= 1)) { + int status = pbuf_header(p, ip_current_header_tot_len() + UDP_HLEN); + if (RT_LIKELY(status == 0)) { + if (ip_current_is_v6()) { + icmp6_time_exceeded(p, ICMP6_TE_HL); + } + else { + icmp_time_exceeded(p, ICMP_TE_TTL); + } + } + pbuf_free(p); + return 1; + } + + return 0; +} + + +/** + * New proxied UDP conversation created. + * Global callback for udp_proxy_accept(). + */ +static void +pxudp_pcb_accept(void *arg, struct udp_pcb *newpcb, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + struct pxudp *pxudp; + ipX_addr_t dst_addr; + int mapping; + int sdom; + SOCKET sock; + + LWIP_ASSERT1(newpcb != NULL); + LWIP_ASSERT1(p != NULL); + LWIP_UNUSED_ARG(arg); + + mapping = pxremap_outbound_ipX(PCB_ISIPV6(newpcb), &dst_addr, &newpcb->local_ip); + if (mapping != PXREMAP_MAPPED && pxudp_ttl_expired(p)) { + udp_remove(newpcb); + return; + } + + pxudp = pxudp_allocate(); + if (pxudp == NULL) { + DPRINTF(("pxudp_allocate: failed\n")); + udp_remove(newpcb); + pbuf_free(p); + return; + } + + sdom = PCB_ISIPV6(newpcb) ? PF_INET6 : PF_INET; + pxudp->is_mapped = (mapping == PXREMAP_MAPPED); + +#if 0 /* XXX: DNS IPv6->IPv4 remapping hack */ + if (pxudp->is_mapped + && newpcb->local_port == 53 + && PCB_ISIPV6(newpcb)) + { + /* + * "Remap" DNS over IPv6 to IPv4 since Ubuntu dnsmasq does not + * listen on IPv6. + */ + sdom = PF_INET; + ipX_addr_set_loopback(0, &dst_addr); + } +#endif /* DNS IPv6->IPv4 remapping hack */ + + sock = proxy_connected_socket(sdom, SOCK_DGRAM, + &dst_addr, newpcb->local_port); + if (sock == INVALID_SOCKET) { + udp_remove(newpcb); + pbuf_free(p); + return; + } + + pxudp->sock = sock; + pxudp->pcb = newpcb; + udp_recv(newpcb, pxudp_pcb_recv, pxudp); + + pxudp->pmhdl.callback = pxudp_pmgr_pump; + pxudp_chan_send(POLLMGR_CHAN_PXUDP_ADD, pxudp); + + /* dispatch directly instead of calling pxudp_pcb_recv() */ + pxudp_pcb_forward_outbound(pxudp, p, addr, port); +} + + +/** + * udp_recv() callback. + */ +static void +pxudp_pcb_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + struct pxudp *pxudp = (struct pxudp *)arg; + + LWIP_ASSERT1(pxudp != NULL); + LWIP_ASSERT1(pcb == pxudp->pcb); + LWIP_UNUSED_ARG(pcb); + + if (p != NULL) { + pxudp_pcb_forward_outbound(pxudp, p, addr, port); + } + else { + pxudp_pcb_expired(pxudp); + } +} + + +static void +pxudp_pcb_forward_outbound(struct pxudp *pxudp, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + int status; + + LWIP_UNUSED_ARG(addr); + LWIP_UNUSED_ARG(port); + + if (!pxudp->is_mapped && pxudp_ttl_expired(p)) { + return; + } + + if (!ip_current_is_v6()) { /* IPv4 */ + const struct ip_hdr *iph = ip_current_header(); + int ttl, tos, df; + + /* + * Different OSes have different socket options for DF. + * Unlike pxping.c, we can't use IP_HDRINCL here as it's only + * valid for SOCK_RAW. + */ +# define USE_DF_OPTION(_Optname) \ + const int dfopt = _Optname; \ + const char * const dfoptname = #_Optname; \ + RT_NOREF_PV(dfoptname) +#if defined(IP_MTU_DISCOVER) /* Linux */ + USE_DF_OPTION(IP_MTU_DISCOVER); +#elif defined(IP_DONTFRAG) /* Solaris 11+, FreeBSD */ + USE_DF_OPTION(IP_DONTFRAG); +#elif defined(IP_DONTFRAGMENT) /* Windows */ + USE_DF_OPTION(IP_DONTFRAGMENT); +#else + USE_DF_OPTION(0); +#endif + + ttl = IPH_TTL(iph); + if (!pxudp->is_mapped) { + LWIP_ASSERT1(ttl > 1); + --ttl; + } + + if (ttl != pxudp->ttl) { + status = setsockopt(pxudp->sock, IPPROTO_IP, IP_TTL, + (char *)&ttl, sizeof(ttl)); + if (RT_LIKELY(status == 0)) { + pxudp->ttl = ttl; + } + else { + DPRINTF(("IP_TTL: %R[sockerr]\n", SOCKERRNO())); + } + } + + tos = IPH_TOS(iph); + if (tos != pxudp->tos) { + status = setsockopt(pxudp->sock, IPPROTO_IP, IP_TOS, + (char *)&tos, sizeof(tos)); + if (RT_LIKELY(status == 0)) { + pxudp->tos = tos; + } + else { + DPRINTF(("IP_TOS: %R[sockerr]\n", SOCKERRNO())); + } + } + + if (dfopt) { + df = (IPH_OFFSET(iph) & PP_HTONS(IP_DF)) != 0; +#if defined(IP_MTU_DISCOVER) + df = df ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; +#endif + if (df != pxudp->df) { + status = setsockopt(pxudp->sock, IPPROTO_IP, dfopt, + (char *)&df, sizeof(df)); + if (RT_LIKELY(status == 0)) { + pxudp->df = df; + } + else { + DPRINTF(("%s: %R[sockerr]\n", dfoptname, SOCKERRNO())); + } + } + } + } + else { /* IPv6 */ + const struct ip6_hdr *iph = ip6_current_header(); + int ttl; + + ttl = IP6H_HOPLIM(iph); + if (!pxudp->is_mapped) { + LWIP_ASSERT1(ttl > 1); + --ttl; + } + + if (ttl != pxudp->ttl) { + status = setsockopt(pxudp->sock, IPPROTO_IPV6, IPV6_UNICAST_HOPS, + (char *)&ttl, sizeof(ttl)); + if (RT_LIKELY(status == 0)) { + pxudp->ttl = ttl; + } + else { + DPRINTF(("IPV6_UNICAST_HOPS: %R[sockerr]\n", SOCKERRNO())); + } + } + } + + if (pxudp->pcb->local_port == 53) { + ++pxudp->count; + } + + proxy_sendto(pxudp->sock, p, NULL, 0); + pbuf_free(p); +} + + +/** + * Proxy udp_pcbs are expired by timer, which is signaled by passing + * NULL pbuf to the udp_recv() callback. At that point the pcb is + * removed from the list of proxy udp pcbs so no new datagrams will be + * delivered. + */ +static void +pxudp_pcb_expired(struct pxudp *pxudp) +{ + struct udp_pcb *pcb; + + DPRINTF2(("%s: pxudp %p, pcb %p, sock %d: expired\n", + __func__, (void *)pxudp, (void *)pxudp->pcb, pxudp->sock)); + + pcb = pxudp_pcb_dissociate(pxudp); + if (pcb != NULL) { + udp_remove(pcb); + } + + pxudp_chan_send_weak(POLLMGR_CHAN_PXUDP_DEL, pxudp); +} + + +/** + */ +static int +pxudp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxudp *pxudp; + struct pbuf *p; + ssize_t nread; + err_t error; + + pxudp = (struct pxudp *)handler->data; + LWIP_ASSERT1(handler == &pxudp->pmhdl); + LWIP_ASSERT1(fd == pxudp->sock); + LWIP_UNUSED_ARG(fd); + + + if (revents & ~(POLLIN|POLLERR)) { + DPRINTF(("%s: unexpected revents 0x%x\n", __func__, revents)); + return pxudp_schedule_delete(pxudp); + } + + /* + * XXX: AFAICS, there's no way to match the error with the + * outgoing datagram that triggered it, since we do non-blocking + * sends from lwip thread. + */ + if (revents & POLLERR) { + int sockerr = -1; + socklen_t optlen = (socklen_t)sizeof(sockerr); + int status; + + status = getsockopt(pxudp->sock, SOL_SOCKET, + SO_ERROR, (char *)&sockerr, &optlen); + if (status < 0) { + DPRINTF(("%s: sock %d: SO_ERROR failed:%R[sockerr]\n", + __func__, pxudp->sock, SOCKERRNO())); + } + else { + DPRINTF(("%s: sock %d: %R[sockerr]\n", + __func__, pxudp->sock, sockerr)); + } + } + + if ((revents & POLLIN) == 0) { + return POLLIN; + } + +#ifdef RT_OS_WINDOWS + nread = recv(pxudp->sock, (char *)pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0); +#else + nread = recv(pxudp->sock, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0); +#endif + if (nread == SOCKET_ERROR) { + DPRINTF(("%s: %R[sockerr]\n", __func__, SOCKERRNO())); + return POLLIN; + } + + p = pbuf_alloc(PBUF_RAW, (u16_t)nread, PBUF_RAM); + if (p == NULL) { + DPRINTF(("%s: pbuf_alloc(%d) failed\n", __func__, (int)nread)); + return POLLIN; + } + + error = pbuf_take(p, pollmgr_udpbuf, (u16_t)nread); + if (error != ERR_OK) { + DPRINTF(("%s: pbuf_take(%d) failed\n", __func__, (int)nread)); + pbuf_free(p); + return POLLIN; + } + + error = sys_mbox_trypost(&pxudp->inmbox, p); + if (error != ERR_OK) { + pbuf_free(p); + return POLLIN; + } + + proxy_lwip_post(&pxudp->msg_inbound); + + return POLLIN; +} + + +/** + * Callback from poll manager to trigger sending to guest. + */ +static void +pxudp_pcb_write_inbound(void *ctx) +{ + struct pxudp *pxudp = (struct pxudp *)ctx; + LWIP_ASSERT1(pxudp != NULL); + + if (pxudp->pcb == NULL) { + return; + } + + pxudp_pcb_forward_inbound(pxudp); +} + + +static void +pxudp_pcb_forward_inbound(struct pxudp *pxudp) +{ + struct pbuf *p; + u32_t timo; + err_t error; + + if (!sys_mbox_valid(&pxudp->inmbox)) { + return; + } + + timo = sys_mbox_tryfetch(&pxudp->inmbox, (void **)&p); + if (timo == SYS_MBOX_EMPTY) { + return; + } + + error = udp_send(pxudp->pcb, p); + if (error != ERR_OK) { + DPRINTF(("%s: udp_send(pcb %p) err %d\n", + __func__, (void *)pxudp, error)); + } + + pbuf_free(p); + + /* + * If we enabled counting in pxudp_pcb_forward_outbound() check + * that we have (all) the reply(s). + */ + if (pxudp->count > 0) { + --pxudp->count; + if (pxudp->count == 0) { + pxudp_pcb_expired(pxudp); + } + } +} diff --git a/src/VBox/NetworkServices/NAT/rtmon_bsd.c b/src/VBox/NetworkServices/NAT/rtmon_bsd.c new file mode 100644 index 00000000..05bec3da --- /dev/null +++ b/src/VBox/NetworkServices/NAT/rtmon_bsd.c @@ -0,0 +1,126 @@ +/* $Id: rtmon_bsd.c $ */ +/** @file + * NAT Network - IPv6 default route monitor for BSD routing sockets. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "proxy.h" + +#include <sys/types.h> +#include <sys/socket.h> + +#include <net/if_dl.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/ip6.h> + +#include <errno.h> +#include <string.h> +#include <unistd.h> + + +/** + * Query IPv6 routing table - BSD routing sockets version. + * + * We don't actually monitor the routing socket for updates, and + * instead query the kernel each time. + * + * We take a shortcut and don't read the reply to our RTM_GET - if + * there's no default IPv6 route, write(2) will fail with ESRCH + * synchronously. In theory it may fail asynchronously and we should + * wait for the RTM_GET reply and check rt_msghdr::rtm_errno. + * + * KAME code in *BSD maintains internally a list of default routers + * that it learned from RAs, and installs only one of them into the + * routing table (actually, I'm not sure if BSD routing table can + * handle multiple routes to the same destination). One side-effect + * of this is that when manually configured route (e.g. teredo) is + * deleted, the system will lose its default route even when KAME IPv6 + * has default router(s) in its internal list. Next RA will force the + * update, though. + * + * Solaris does expose multiple routes in the routing table and + * replies to RTM_GET with "default default". + */ +int +rtmon_get_defaults(void) +{ + int rtsock; + struct req { + struct rt_msghdr rtm; + struct sockaddr_in6 dst; + struct sockaddr_in6 mask; + struct sockaddr_dl ifp; + } req; + ssize_t nsent; + + rtsock = socket(PF_ROUTE, SOCK_RAW, AF_INET6); + if (rtsock < 0) { + DPRINTF0(("rtmon: failed to create routing socket\n")); + return -1; + } + + memset(&req, 0, sizeof(req)); + + req.rtm.rtm_type = RTM_GET; + req.rtm.rtm_version = RTM_VERSION; + req.rtm.rtm_msglen = sizeof(req); + req.rtm.rtm_seq = 0x12345; + + req.rtm.rtm_flags = RTF_UP; + req.rtm.rtm_addrs = RTA_DST | RTA_NETMASK | RTA_IFP; + + req.dst.sin6_family = AF_INET6; +#if HAVE_SA_LEN + req.dst.sin6_len = sizeof(req.dst); +#endif + + req.mask.sin6_family = AF_INET6; +#if HAVE_SA_LEN + req.mask.sin6_len = sizeof(req.mask); +#endif + + req.ifp.sdl_family = AF_LINK; +#if HAVE_SA_LEN + req.ifp.sdl_len = sizeof(req.ifp); +#endif + + nsent = write(rtsock, &req, req.rtm.rtm_msglen); + if (nsent < 0) { + if (errno == ESRCH) { + /* there's no default route */ + return 0; + } + else { + DPRINTF0(("rtmon: failed to send RTM_GET\n")); + return -1; + } + } + + return 1; +} diff --git a/src/VBox/NetworkServices/NAT/rtmon_linux.c b/src/VBox/NetworkServices/NAT/rtmon_linux.c new file mode 100644 index 00000000..1ef37e34 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/rtmon_linux.c @@ -0,0 +1,259 @@ +/* $Id: rtmon_linux.c $ */ +/** @file + * NAT Network - IPv6 default route monitor for Linux netlink. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +#define LOG_GROUP LOG_GROUP_NAT_SERVICE + +#include "proxy.h" + +#include <sys/types.h> /* must come before linux/netlink */ +#include <sys/socket.h> + +#include <asm/types.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +#include <errno.h> +#include <string.h> +#include <unistd.h> + + +static int rtmon_check_defaults(const void *buf, size_t len); + + +/** + * Read IPv6 routing table - Linux rtnetlink version. + * + * XXX: TODO: To avoid re-reading the table we should subscribe to + * updates by binding a monitoring NETLINK_ROUTE socket to + * sockaddr_nl::nl_groups = RTMGRP_IPV6_ROUTE. + * + * But that will provide updates only. Documentation is scarce, but + * from what I've seen it seems that to get accurate routing info the + * monitoring socket needs to be created first, then full routing + * table requested (easier to do via spearate socket), then monitoring + * socket polled for input. The first update(s) of the monitoring + * socket may happen before full table is returned, so we can't just + * count the defaults, we need to keep track of their { oif, gw } to + * correctly ignore updates that are reported via monitoring socket, + * but that are already reflected in the full routing table returned + * in response to our request. + */ +int +rtmon_get_defaults(void) +{ + int rtsock; + ssize_t nsent, ssize; + int ndefrts; + + char *buf = NULL; + size_t bufsize; + + struct { + struct nlmsghdr nh; + struct rtmsg rtm; + char attrbuf[512]; + } rtreq; + + memset(&rtreq, 0, sizeof(rtreq)); + rtreq.nh.nlmsg_type = RTM_GETROUTE; + rtreq.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + rtreq.rtm.rtm_family = AF_INET6; + rtreq.rtm.rtm_table = RT_TABLE_MAIN; + rtreq.rtm.rtm_protocol = RTPROT_UNSPEC; + + rtreq.nh.nlmsg_len = NLMSG_SPACE(sizeof(rtreq.rtm)); + + bufsize = 1024; + ssize = bufsize; + for (;;) { + char *newbuf; + int recverr; + + newbuf = (char *)realloc(buf, ssize); + if (newbuf == NULL) { + DPRINTF0(("rtmon: failed to %sallocate buffer\n", + buf == NULL ? "" : "re")); + free(buf); + return -1; + } + + buf = newbuf; + bufsize = ssize; + + /* it's easier to reopen than to flush */ + rtsock = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (rtsock < 0) { + DPRINTF0(("rtmon: failed to create netlink socket: %s", strerror(errno))); + free(buf); + return -1; + } + + nsent = send(rtsock, &rtreq, rtreq.nh.nlmsg_len, 0); + if (nsent < 0) { + DPRINTF0(("rtmon: RTM_GETROUTE failed: %s", strerror(errno))); + close (rtsock); + free(buf); + return -1; + } + + ssize = recv(rtsock, buf, bufsize, MSG_TRUNC); + recverr = errno; + close (rtsock); + + if (ssize < 0) { + DPRINTF(("rtmon: failed to read RTM_GETROUTE response: %s", + strerror(recverr))); + free(buf); + return -1; + } + + if ((size_t)ssize <= bufsize) { + DPRINTF2(("rtmon: RTM_GETROUTE: %lu bytes\n", + (unsigned long)ssize)); + break; + } + + DPRINTF2(("rtmon: RTM_GETROUTE: truncated %lu to %lu bytes, retrying\n", + (unsigned long)ssize, (unsigned long)bufsize)); + /* try again with larger buffer */ + } + + ndefrts = rtmon_check_defaults(buf, (size_t)ssize); + free(buf); + + if (ndefrts == 0) { + DPRINTF(("rtmon: no IPv6 default routes found\n")); + } + else { + DPRINTF(("rtmon: %d IPv6 default route%s found\n", + ndefrts, + ndefrts == 1 || ndefrts == -1 ? "" : "s")); + } + + return ndefrts; +} + + +/** + * Scan netlink message in the buffer for IPv6 default route changes. + */ +static int +rtmon_check_defaults(const void *buf, size_t len) +{ + struct nlmsghdr *nh; + int dfltdiff = 0; + + for (nh = (struct nlmsghdr *)buf; + NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) + { + struct rtmsg *rtm; + struct rtattr *rta; + int attrlen; + int delta = 0; + const void *gwbuf; + size_t gwlen; + int oif; + + DPRINTF2(("nlmsg seq %d type %d flags 0x%x\n", + nh->nlmsg_seq, nh->nlmsg_type, nh->nlmsg_flags)); + + if (nh->nlmsg_type == NLMSG_DONE) { + break; + } + + if (nh->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *ne = (struct nlmsgerr *)NLMSG_DATA(nh); + DPRINTF2(("> error %d\n", ne->error)); + LWIP_UNUSED_ARG(ne); + break; + } + + if (nh->nlmsg_type < RTM_BASE || RTM_MAX <= nh->nlmsg_type) { + /* shouldn't happen */ + DPRINTF2(("> not an RTM message!\n")); + continue; + } + + + rtm = (struct rtmsg *)NLMSG_DATA(nh); + attrlen = RTM_PAYLOAD(nh); + + if (nh->nlmsg_type == RTM_NEWROUTE) { + delta = +1; + } + else if (nh->nlmsg_type == RTM_DELROUTE) { + delta = -1; + } + else { + /* shouldn't happen */ + continue; + } + + /* + * Is this an IPv6 default route in the main table? (Local + * table always has ::/0 reject route, hence the last check). + */ + if (rtm->rtm_family == AF_INET6 /* should always be true */ + && rtm->rtm_dst_len == 0 + && rtm->rtm_table == RT_TABLE_MAIN) + { + dfltdiff += delta; + } + else { + /* some other route change */ + continue; + } + + + gwbuf = NULL; + gwlen = 0; + oif = -1; + + for (rta = RTM_RTA(rtm); + RTA_OK(rta, attrlen); + rta = RTA_NEXT(rta, attrlen)) + { + if (rta->rta_type == RTA_GATEWAY) { + gwbuf = RTA_DATA(rta); + gwlen = RTA_PAYLOAD(rta); + } + else if (rta->rta_type == RTA_OIF) { + /* assert RTA_PAYLOAD(rta) == 4 */ + memcpy(&oif, RTA_DATA(rta), sizeof(oif)); + } + } + + /* XXX: TODO: note that { oif, gw } was added/removed */ + LWIP_UNUSED_ARG(gwbuf); + LWIP_UNUSED_ARG(gwlen); + LWIP_UNUSED_ARG(oif); + } + + return dfltdiff; +} diff --git a/src/VBox/NetworkServices/NAT/rtmon_win.c b/src/VBox/NetworkServices/NAT/rtmon_win.c new file mode 100644 index 00000000..7ceb431b --- /dev/null +++ b/src/VBox/NetworkServices/NAT/rtmon_win.c @@ -0,0 +1,31 @@ +/* $Id: rtmon_win.c $ */ +/** @file + * NAT Network - IPv6 default route monitor for Windows. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +int +rtmon_get_defaults(void) { + return 0; +} diff --git a/src/VBox/NetworkServices/NAT/tftp.h b/src/VBox/NetworkServices/NAT/tftp.h new file mode 100644 index 00000000..7e8d67b4 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/tftp.h @@ -0,0 +1,59 @@ +/* $Id: tftp.h $ */ +/** @file + * NAT Network - Definitions for TFTP protocol. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NAT_tftp_h +#define VBOX_INCLUDED_SRC_NAT_tftp_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#define TFTP_SERVER_PORT 69 + +/* opcodes */ +#define TFTP_RRQ 1 +#define TFTP_WRQ 2 +#define TFTP_DATA 3 +#define TFTP_ACK 4 +#define TFTP_ERROR 5 +/* RFC 2347 */ +#define TFTP_OACK 6 + + +/* error codes */ +#define TFTP_EUNDEF 0 /* Not defined, see error message (if any). */ +#define TFTP_ENOENT 1 /* File not found. */ +#define TFTP_EACCESS 2 /* Access violation. */ +#define TFTP_EFBIG 3 /* Disk full or allocation exceeded. */ +#define TFTP_ENOSYS 4 /* Illegal TFTP operation. */ +#define TFTP_ESRCH 5 /* Unknown transfer ID. */ +#define TFTP_EEXIST 6 /* File already exists. */ +#define TFTP_EUSER 7 /* No such user. */ +/* RFC 2347 */ +#define TFTP_EONAK 8 /* Option refused. */ + + +#endif /* !VBOX_INCLUDED_SRC_NAT_tftp_h */ diff --git a/src/VBox/NetworkServices/NAT/winpoll.h b/src/VBox/NetworkServices/NAT/winpoll.h new file mode 100644 index 00000000..5223946d --- /dev/null +++ b/src/VBox/NetworkServices/NAT/winpoll.h @@ -0,0 +1,63 @@ +/* $Id: winpoll.h $ */ +/** @file + * NAT Network - poll(2) for winsock, definitions and declarations. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NAT_winpoll_h +#define VBOX_INCLUDED_SRC_NAT_winpoll_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif +# include <iprt/cdefs.h> +/** + * WinSock2 has definition for POLL* and pollfd, but it defined for _WIN32_WINNT > 0x0600 + * and used in WSAPoll, which has very unclear history. + */ +# if(_WIN32_WINNT < 0x0600) +# define POLLRDNORM 0x0100 +# define POLLRDBAND 0x0200 +# define POLLIN (POLLRDNORM | POLLRDBAND) +# define POLLPRI 0x0400 + +# define POLLWRNORM 0x0010 +# define POLLOUT (POLLWRNORM) +# define POLLWRBAND 0x0020 + +# define POLLERR 0x0001 +# define POLLHUP 0x0002 +# define POLLNVAL 0x0004 + +struct pollfd { + + SOCKET fd; + SHORT events; + SHORT revents; + +}; +#endif +RT_C_DECLS_BEGIN +int RTWinPoll(struct pollfd *pFds, unsigned int nfds, int timeout, int *pNready); +RT_C_DECLS_END +#endif /* !VBOX_INCLUDED_SRC_NAT_winpoll_h */ diff --git a/src/VBox/NetworkServices/NAT/winutils.h b/src/VBox/NetworkServices/NAT/winutils.h new file mode 100644 index 00000000..be34680a --- /dev/null +++ b/src/VBox/NetworkServices/NAT/winutils.h @@ -0,0 +1,220 @@ +/* $Id: winutils.h $ */ +/** @file + * NAT Network - winsock compatibility shim. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NAT_winutils_h +#define VBOX_INCLUDED_SRC_NAT_winutils_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +# include <iprt/cdefs.h> + +# ifdef RT_OS_WINDOWS +# include <iprt/win/winsock2.h> +# include <iprt/win/ws2tcpip.h> +# include <mswsock.h> +# include <iprt/win/windows.h> +# include <iprt/err.h> +# include <iprt/net.h> +# include <iprt/log.h> +/** + * Inclusion of lwip/def.h was added here to avoid conflict of definitions + * of hton-family functions in LWIP and windock's headers. + */ +# include <lwip/def.h> + +# ifndef PF_LOCAL +# define PF_LOCAL AF_INET +# endif + +# ifdef DEBUG +# define err(code,...) do { \ + AssertMsgFailed((__VA_ARGS__)); \ + }while(0) +#else +# define err(code,...) do { \ + DPRINTF0((__VA_ARGS__)); \ + ExitProcess(code); \ + }while(0) +#endif +# define errx err +# define __func__ __FUNCTION__ +# define __attribute__(x) /* IGNORE */ + +# define SOCKERRNO() (WSAGetLastError()) +# define SET_SOCKERRNO(error) do { WSASetLastError(error); } while (0) + +/** + * "Windows Sockets Error Codes" obtained with WSAGetLastError(). + * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740668(v=vs.85).aspx + * + * This block of error codes from <winsock2.h> conflicts with "POSIX + * supplement" error codes from <errno.h>, but we don't expect to ever + * encounter the latter in the proxy code, so redefine them to their + * unixy names. + */ +# undef EWOULDBLOCK +# define EWOULDBLOCK WSAEWOULDBLOCK +# undef EINPROGRESS +# define EINPROGRESS WSAEINPROGRESS +# undef EALREADY +# define EALREADY WSAEALREADY +# undef ENOTSOCK +# define ENOTSOCK WSAENOTSOCK +# undef EDESTADDRREQ +# define EDESTADDRREQ WSAEDESTADDRREQ +# undef EMSGSIZE +# define EMSGSIZE WSAEMSGSIZE +# undef EPROTOTYPE +# define EPROTOTYPE WSAEPROTOTYPE +# undef ENOPROTOOPT +# define ENOPROTOOPT WSAENOPROTOOPT +# undef EPROTONOSUPPORT +# define EPROTONOSUPPORT WSAEPROTONOSUPPORT +# undef ESOCKTNOSUPPORT +# define ESOCKTNOSUPPORT WSAESOCKTNOSUPPORT +# undef EOPNOTSUPP +# define EOPNOTSUPP WSAEOPNOTSUPP +# undef EPFNOSUPPORT +# define EPFNOSUPPORT WSAEPFNOSUPPORT +# undef EAFNOSUPPORT +# define EAFNOSUPPORT WSAEAFNOSUPPORT +# undef EADDRINUSE +# define EADDRINUSE WSAEADDRINUSE +# undef EADDRNOTAVAIL +# define EADDRNOTAVAIL WSAEADDRNOTAVAIL +# undef ENETDOWN +# define ENETDOWN WSAENETDOWN +# undef ENETUNREACH +# define ENETUNREACH WSAENETUNREACH +# undef ENETRESET +# define ENETRESET WSAENETRESET +# undef ECONNABORTED +# define ECONNABORTED WSAECONNABORTED +# undef ECONNRESET +# define ECONNRESET WSAECONNRESET +# undef ENOBUFS +# define ENOBUFS WSAENOBUFS +# undef EISCONN +# define EISCONN WSAEISCONN +# undef ENOTCONN +# define ENOTCONN WSAENOTCONN +# undef ESHUTDOWN +# define ESHUTDOWN WSAESHUTDOWN +# undef ETOOMANYREFS +# define ETOOMANYREFS WSAETOOMANYREFS +# undef ETIMEDOUT +# define ETIMEDOUT WSAETIMEDOUT +# undef ECONNREFUSED +# define ECONNREFUSED WSAECONNREFUSED +# undef ELOOP +# define ELOOP WSAELOOP +# undef ENAMETOOLONG +# define ENAMETOOLONG WSAENAMETOOLONG +# undef EHOSTDOWN +# define EHOSTDOWN WSAEHOSTDOWN +# undef EHOSTUNREACH +# define EHOSTUNREACH WSAEHOSTUNREACH + +/** + * parameters to shutdown (2) with Winsock2 + * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740481(v=vs.85).aspx + */ +# define SHUT_RD SD_RECEIVE +# define SHUT_WR SD_SEND +# define SHUT_RDWR SD_BOTH + +typedef ULONG nfds_t; + +typedef WSABUF IOVEC; + +# define IOVEC_GET_BASE(iov) ((iov).buf) +# define IOVEC_SET_BASE(iov, b) ((iov).buf = (b)) + +# define IOVEC_GET_LEN(iov) ((iov).len) +# define IOVEC_SET_LEN(iov, l) ((iov).len = (ULONG)(l)) + +#if _WIN32_WINNT < 0x0600 +/* otherwise defined the other way around in ws2def.h */ +#define cmsghdr _WSACMSGHDR + +#undef CMSG_DATA /* wincrypt.h can byte my shiny metal #undef */ +#define CMSG_DATA WSA_CMSG_DATA +#define CMSG_LEN WSA_CMSG_LEN +#define CMSG_SPACE WSA_CMSG_SPACE + +#define CMSG_FIRSTHDR WSA_CMSG_FIRSTHDR +#define CMSG_NXTHDR WSA_CMSG_NXTHDR +#endif /* _WIN32_WINNT < 0x0600 - provide unglified CMSG names */ + +RT_C_DECLS_BEGIN +int RTWinSocketPair(int domain, int type, int protocol, SOCKET socket_vector[2]); +RT_C_DECLS_END + +# else /* !RT_OS_WINDOWS */ + +# include <errno.h> +# include <unistd.h> + +# define SOCKET int +# define INVALID_SOCKET (-1) +# define SOCKET_ERROR (-1) + +# define SOCKERRNO() (errno) +# define SET_SOCKERRNO(error) do { errno = (error); } while (0) + +# define closesocket(s) close(s) +# define ioctlsocket(s, req, arg) ioctl((s), (req), (arg)) + +typedef struct iovec IOVEC; + +# define IOVEC_GET_BASE(iov) ((iov).iov_base) +# define IOVEC_SET_BASE(iov, b) ((iov).iov_base = (b)) + +# define IOVEC_GET_LEN(iov) ((iov).iov_len) +# define IOVEC_SET_LEN(iov, l) ((iov).iov_len = (l)) +# endif + +DECLINLINE(int) +proxy_error_is_transient(int error) +{ +# if !defined(RT_OS_WINDOWS) + return error == EWOULDBLOCK +# if EAGAIN != EWOULDBLOCK + || error == EAGAIN +# endif + || error == EINTR + || error == ENOBUFS + || error == ENOMEM; +# else + return error == WSAEWOULDBLOCK + || error == WSAEINTR /* NB: we don't redefine EINTR above */ + || error == WSAENOBUFS; +# endif +} + +#endif /* !VBOX_INCLUDED_SRC_NAT_winutils_h */ diff --git a/src/VBox/NetworkServices/NetLib/IntNetIf.cpp b/src/VBox/NetworkServices/NetLib/IntNetIf.cpp new file mode 100644 index 00000000..850474c2 --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/IntNetIf.cpp @@ -0,0 +1,575 @@ +/* $Id: IntNetIf.cpp $ */ +/** @file + * IntNetIfCtx - Abstract API implementing an IntNet connection using the R0 support driver or some R3 IPC variant. + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#if defined(VBOX_WITH_INTNET_SERVICE_IN_R3) +# if defined(RT_OS_DARWIN) +# include <xpc/xpc.h> /* This needs to be here because it drags PVM in and cdefs.h needs to undefine it... */ +# else +# error "R3 internal networking not implemented for this platform yet!" +# endif +#endif + +#include <iprt/cdefs.h> +#include <iprt/path.h> +#include <iprt/semaphore.h> + +#include <VBox/err.h> +#include <VBox/sup.h> +#include <VBox/intnetinline.h> +#include <VBox/vmm/pdmnetinline.h> + +#include "IntNetIf.h" + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ + + +/** + * Internal network interface context instance data. + */ +typedef struct INTNETIFCTXINT +{ + /** The support driver session handle. */ + PSUPDRVSESSION pSupDrvSession; + /** Interface handle. */ + INTNETIFHANDLE hIf; + /** The internal network buffer. */ + PINTNETBUF pBuf; +#if defined (VBOX_WITH_INTNET_SERVICE_IN_R3) + /** Flag whether this interface is using the internal network switch in userspace path. */ + bool fIntNetR3Svc; + /** Receive event semaphore. */ + RTSEMEVENT hEvtRecv; +# if defined(RT_OS_DARWIN) + /** XPC connection handle to the R3 internal network switch service. */ + xpc_connection_t hXpcCon; + /** Size of the communication buffer in bytes. */ + size_t cbBuf; +# endif +#endif +} INTNETIFCTXINT; +/** Pointer to the internal network interface context instance data. */ +typedef INTNETIFCTXINT *PINTNETIFCTXINT; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ + +/** + * Calls the internal networking switch service living in either R0 or in another R3 process. + * + * @returns VBox status code. + * @param pThis The internal network driver instance data. + * @param uOperation The operation to execute. + * @param pReqHdr Pointer to the request header. + */ +static int intnetR3IfCallSvc(PINTNETIFCTXINT pThis, uint32_t uOperation, PSUPVMMR0REQHDR pReqHdr) +{ +#if defined(VBOX_WITH_INTNET_SERVICE_IN_R3) + if (pThis->fIntNetR3Svc) + { +# if defined(RT_OS_DARWIN) + size_t cbReq = pReqHdr->cbReq; + xpc_object_t hObj = xpc_dictionary_create(NULL, NULL, 0); + xpc_dictionary_set_uint64(hObj, "req-id", uOperation); + xpc_dictionary_set_data(hObj, "req", pReqHdr, pReqHdr->cbReq); + xpc_object_t hObjReply = xpc_connection_send_message_with_reply_sync(pThis->hXpcCon, hObj); + xpc_release(hObj); + + int rc = (int)xpc_dictionary_get_int64(hObjReply, "rc"); + + size_t cbReply = 0; + const void *pvData = xpc_dictionary_get_data(hObjReply, "reply", &cbReply); + AssertRelease(cbReply == cbReq); + memcpy(pReqHdr, pvData, cbReq); + xpc_release(hObjReply); + + return rc; +# endif + } + else +#else + RT_NOREF(pThis); +#endif + return SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, uOperation, 0, pReqHdr); +} + + +#if defined(RT_OS_DARWIN) && defined(VBOX_WITH_INTNET_SERVICE_IN_R3) +/** + * Calls the internal networking switch service living in either R0 or in another R3 process. + * + * @returns VBox status code. + * @param pThis The internal network driver instance data. + * @param uOperation The operation to execute. + * @param pReqHdr Pointer to the request header. + */ +static int intnetR3IfCallSvcAsync(PINTNETIFCTXINT pThis, uint32_t uOperation, PSUPVMMR0REQHDR pReqHdr) +{ + if (pThis->fIntNetR3Svc) + { + xpc_object_t hObj = xpc_dictionary_create(NULL, NULL, 0); + xpc_dictionary_set_uint64(hObj, "req-id", uOperation); + xpc_dictionary_set_data(hObj, "req", pReqHdr, pReqHdr->cbReq); + xpc_connection_send_message(pThis->hXpcCon, hObj); + return VINF_SUCCESS; + } + else + return SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, uOperation, 0, pReqHdr); +} +#endif + + +/** + * Map the ring buffer pointer into this process R3 address space. + * + * @returns VBox status code. + * @param pThis The internal network driver instance data. + */ +static int intnetR3IfMapBufferPointers(PINTNETIFCTXINT pThis) +{ + int rc = VINF_SUCCESS; + + INTNETIFGETBUFFERPTRSREQ GetBufferPtrsReq; + GetBufferPtrsReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + GetBufferPtrsReq.Hdr.cbReq = sizeof(GetBufferPtrsReq); + GetBufferPtrsReq.pSession = pThis->pSupDrvSession; + GetBufferPtrsReq.hIf = pThis->hIf; + GetBufferPtrsReq.pRing3Buf = NULL; + GetBufferPtrsReq.pRing0Buf = NIL_RTR0PTR; + +#if defined(VBOX_WITH_INTNET_SERVICE_IN_R3) + if (pThis->fIntNetR3Svc) + { +#if defined(RT_OS_DARWIN) + xpc_object_t hObj = xpc_dictionary_create(NULL, NULL, 0); + xpc_dictionary_set_uint64(hObj, "req-id", VMMR0_DO_INTNET_IF_GET_BUFFER_PTRS); + xpc_dictionary_set_data(hObj, "req", &GetBufferPtrsReq, sizeof(GetBufferPtrsReq)); + xpc_object_t hObjReply = xpc_connection_send_message_with_reply_sync(pThis->hXpcCon, hObj); + xpc_release(hObj); + + rc = (int)xpc_dictionary_get_int64(hObjReply, "rc"); + if (RT_SUCCESS(rc)) + { + /* Get the shared memory object. */ + xpc_object_t hObjShMem = xpc_dictionary_get_value(hObjReply, "buf-ptr"); + size_t cbMem = xpc_shmem_map(hObjShMem, (void **)&pThis->pBuf); + if (!cbMem) + rc = VERR_NO_MEMORY; + else + pThis->cbBuf = cbMem; + } + xpc_release(hObjReply); +#endif + } + else +#endif + { + rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_GET_BUFFER_PTRS, 0 /*u64Arg*/, &GetBufferPtrsReq.Hdr); + if (RT_SUCCESS(rc)) + { + AssertRelease(RT_VALID_PTR(GetBufferPtrsReq.pRing3Buf)); + pThis->pBuf = GetBufferPtrsReq.pRing3Buf; + } + } + + return rc; +} + + +static void intnetR3IfClose(PINTNETIFCTXINT pThis) +{ + if (pThis->hIf != INTNET_HANDLE_INVALID) + { + INTNETIFCLOSEREQ CloseReq; + CloseReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + CloseReq.Hdr.cbReq = sizeof(CloseReq); + CloseReq.pSession = pThis->pSupDrvSession; + CloseReq.hIf = pThis->hIf; + + pThis->hIf = INTNET_HANDLE_INVALID; + int rc = intnetR3IfCallSvc(pThis, VMMR0_DO_INTNET_IF_CLOSE, &CloseReq.Hdr); + AssertRC(rc); + } +} + + +DECLHIDDEN(int) IntNetR3IfCreate(PINTNETIFCTX phIfCtx, const char *pszNetwork) +{ + return IntNetR3IfCreateEx(phIfCtx, pszNetwork, kIntNetTrunkType_WhateverNone, "", + _128K /*cbSend*/, _256K /*cbRecv*/, 0 /*fFlags*/); +} + + +DECLHIDDEN(int) IntNetR3IfCreateEx(PINTNETIFCTX phIfCtx, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType, + const char *pszTrunk, uint32_t cbSend, uint32_t cbRecv, uint32_t fFlags) +{ + AssertPtrReturn(phIfCtx, VERR_INVALID_POINTER); + AssertPtrReturn(pszNetwork, VERR_INVALID_POINTER); + AssertPtrReturn(pszTrunk, VERR_INVALID_POINTER); + + PSUPDRVSESSION pSession = NIL_RTR0PTR; + int rc = SUPR3Init(&pSession); + if (RT_SUCCESS(rc)) + { + PINTNETIFCTXINT pThis = (PINTNETIFCTXINT)RTMemAllocZ(sizeof(*pThis)); + if (RT_LIKELY(pThis)) + { + pThis->pSupDrvSession = pSession; +#if defined(VBOX_WITH_INTNET_SERVICE_IN_R3) + pThis->hEvtRecv = NIL_RTSEMEVENT; +#endif + + /* Driverless operation needs support for running the internal network switch using IPC. */ + if (SUPR3IsDriverless()) + { +#if defined(VBOX_WITH_INTNET_SERVICE_IN_R3) +# if defined(RT_OS_DARWIN) + xpc_connection_t hXpcCon = xpc_connection_create(INTNET_R3_SVC_NAME, NULL); + xpc_connection_set_event_handler(hXpcCon, ^(xpc_object_t hObj) { + if (xpc_get_type(hObj) == XPC_TYPE_ERROR) + { + /** @todo Error handling - reconnecting. */ + } + else + { + /* Out of band messages should only come when there is something to receive. */ + RTSemEventSignal(pThis->hEvtRecv); + } + }); + + xpc_connection_resume(hXpcCon); + pThis->hXpcCon = hXpcCon; +# endif + pThis->fIntNetR3Svc = true; + rc = RTSemEventCreate(&pThis->hEvtRecv); +#else + rc = VERR_SUP_DRIVERLESS; +#endif + } + else + { + /* Need to load VMMR0.r0 containing the network switching code. */ + char szPathVMMR0[RTPATH_MAX]; + + rc = RTPathExecDir(szPathVMMR0, sizeof(szPathVMMR0)); + if (RT_SUCCESS(rc)) + { + rc = RTPathAppend(szPathVMMR0, sizeof(szPathVMMR0), "VMMR0.r0"); + if (RT_SUCCESS(rc)) + rc = SUPR3LoadVMM(szPathVMMR0, /* :pErrInfo */ NULL); + } + } + + if (RT_SUCCESS(rc)) + { + /* Open the interface. */ + INTNETOPENREQ OpenReq; + RT_ZERO(OpenReq); + + OpenReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + OpenReq.Hdr.cbReq = sizeof(OpenReq); + OpenReq.pSession = pThis->pSupDrvSession; + OpenReq.enmTrunkType = enmTrunkType; + OpenReq.fFlags = fFlags; + OpenReq.cbSend = cbSend; + OpenReq.cbRecv = cbRecv; + OpenReq.hIf = INTNET_HANDLE_INVALID; + + rc = RTStrCopy(OpenReq.szNetwork, sizeof(OpenReq.szNetwork), pszNetwork); + if (RT_SUCCESS(rc)) + rc = RTStrCopy(OpenReq.szTrunk, sizeof(OpenReq.szTrunk), pszTrunk); + if (RT_SUCCESS(rc)) + { + rc = intnetR3IfCallSvc(pThis, VMMR0_DO_INTNET_OPEN, &OpenReq.Hdr); + if (RT_SUCCESS(rc)) + { + pThis->hIf = OpenReq.hIf; + + rc = intnetR3IfMapBufferPointers(pThis); + if (RT_SUCCESS(rc)) + { + *phIfCtx = pThis; + return VINF_SUCCESS; + } + } + + intnetR3IfClose(pThis); + } + } + +#if defined(VBOX_WITH_INTNET_SERVICE_IN_R3) + if (pThis->fIntNetR3Svc) + { +# if defined(RT_OS_DARWIN) + if (pThis->hXpcCon) + xpc_connection_cancel(pThis->hXpcCon); + pThis->hXpcCon = NULL; +# endif + + if (pThis->hEvtRecv != NIL_RTSEMEVENT) + RTSemEventDestroy(pThis->hEvtRecv); + } +#endif + + RTMemFree(pThis); + } + + SUPR3Term(); + } + + return rc; +} + + +DECLHIDDEN(int) IntNetR3IfDestroy(INTNETIFCTX hIfCtx) +{ + PINTNETIFCTXINT pThis = hIfCtx; + AssertPtrReturn(pThis, VERR_INVALID_HANDLE); + + intnetR3IfClose(pThis); + +#if defined(VBOX_WITH_INTNET_SERVICE_IN_R3) + if (pThis->fIntNetR3Svc) + { +# if defined(RT_OS_DARWIN) + /* Unmap the shared buffer. */ + munmap(pThis->pBuf, pThis->cbBuf); + xpc_connection_cancel(pThis->hXpcCon); + pThis->hXpcCon = NULL; +# endif + RTSemEventDestroy(pThis->hEvtRecv); + pThis->fIntNetR3Svc = false; + } +#endif + + RTMemFree(pThis); + return VINF_SUCCESS; +} + + +DECLHIDDEN(int) IntNetR3IfQueryBufferPtr(INTNETIFCTX hIfCtx, PINTNETBUF *ppIfBuf) +{ + PINTNETIFCTXINT pThis = hIfCtx; + AssertPtrReturn(pThis, VERR_INVALID_HANDLE); + AssertPtrReturn(ppIfBuf, VERR_INVALID_POINTER); + + *ppIfBuf = pThis->pBuf; + return VINF_SUCCESS; +} + + +DECLHIDDEN(int) IntNetR3IfSetActive(INTNETIFCTX hIfCtx, bool fActive) +{ + PINTNETIFCTXINT pThis = hIfCtx; + AssertPtrReturn(pThis, VERR_INVALID_HANDLE); + + INTNETIFSETACTIVEREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.pSession = pThis->pSupDrvSession; + Req.hIf = pThis->hIf; + Req.fActive = fActive; + return intnetR3IfCallSvc(pThis, VMMR0_DO_INTNET_IF_SET_ACTIVE, &Req.Hdr); +} + + +DECLHIDDEN(int) IntNetR3IfSetPromiscuous(INTNETIFCTX hIfCtx, bool fPromiscuous) +{ + PINTNETIFCTXINT pThis = hIfCtx; + AssertPtrReturn(pThis, VERR_INVALID_HANDLE); + + INTNETIFSETPROMISCUOUSMODEREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.pSession = pThis->pSupDrvSession; + Req.hIf = pThis->hIf; + Req.fPromiscuous = fPromiscuous; + return intnetR3IfCallSvc(pThis, VMMR0_DO_INTNET_IF_SET_PROMISCUOUS_MODE, &Req.Hdr); +} + + +DECLHIDDEN(int) IntNetR3IfSend(INTNETIFCTX hIfCtx) +{ + PINTNETIFCTXINT pThis = hIfCtx; + AssertPtrReturn(pThis, VERR_INVALID_HANDLE); + + INTNETIFSENDREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.pSession = pThis->pSupDrvSession; + Req.hIf = pThis->hIf; + return intnetR3IfCallSvc(pThis, VMMR0_DO_INTNET_IF_SEND, &Req.Hdr); +} + + +DECLHIDDEN(int) IntNetR3IfWait(INTNETIFCTX hIfCtx, uint32_t cMillies) +{ + PINTNETIFCTXINT pThis = hIfCtx; + AssertPtrReturn(pThis, VERR_INVALID_HANDLE); + + int rc = VINF_SUCCESS; + INTNETIFWAITREQ WaitReq; + WaitReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + WaitReq.Hdr.cbReq = sizeof(WaitReq); + WaitReq.pSession = pThis->pSupDrvSession; + WaitReq.hIf = pThis->hIf; + WaitReq.cMillies = cMillies; +#if defined(VBOX_WITH_INTNET_SERVICE_IN_R3) + if (pThis->fIntNetR3Svc) + { + /* Send an asynchronous message. */ + rc = intnetR3IfCallSvcAsync(pThis, VMMR0_DO_INTNET_IF_WAIT, &WaitReq.Hdr); + if (RT_SUCCESS(rc)) + { + /* Wait on the receive semaphore. */ + rc = RTSemEventWait(pThis->hEvtRecv, cMillies); + } + } + else +#endif + rc = intnetR3IfCallSvc(pThis, VMMR0_DO_INTNET_IF_WAIT, &WaitReq.Hdr); + + return rc; +} + + +DECLHIDDEN(int) IntNetR3IfWaitAbort(INTNETIFCTX hIfCtx) +{ + PINTNETIFCTXINT pThis = hIfCtx; + AssertPtrReturn(pThis, VERR_INVALID_HANDLE); + + INTNETIFABORTWAITREQ AbortWaitReq; + AbortWaitReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + AbortWaitReq.Hdr.cbReq = sizeof(AbortWaitReq); + AbortWaitReq.pSession = pThis->pSupDrvSession; + AbortWaitReq.hIf = pThis->hIf; + AbortWaitReq.fNoMoreWaits = true; + return intnetR3IfCallSvc(pThis, VMMR0_DO_INTNET_IF_ABORT_WAIT, &AbortWaitReq.Hdr); +} + + +DECLHIDDEN(int) IntNetR3IfPumpPkts(INTNETIFCTX hIfCtx, PFNINPUT pfnInput, void *pvUser, + PFNINPUTGSO pfnInputGso, void *pvUserGso) +{ + PINTNETIFCTXINT pThis = hIfCtx; + AssertPtrReturn(pThis, VERR_INVALID_HANDLE); + AssertPtrReturn(pfnInput, VERR_INVALID_POINTER); + + int rc; + for (;;) + { + rc = IntNetR3IfWait(hIfCtx, RT_INDEFINITE_WAIT); + if (RT_SUCCESS(rc) || rc == VERR_INTERRUPTED || rc == VERR_TIMEOUT) + { + PCINTNETHDR pHdr = IntNetRingGetNextFrameToRead(&pThis->pBuf->Recv); + while (pHdr) + { + const uint8_t u8Type = pHdr->u8Type; + void *pvSegFrame; + uint32_t cbSegFrame; + + if (u8Type == INTNETHDR_TYPE_FRAME) + { + pvSegFrame = IntNetHdrGetFramePtr(pHdr, pThis->pBuf); + cbSegFrame = pHdr->cbFrame; + + /* pass the frame to the user callback */ + pfnInput(pvUser, pvSegFrame, cbSegFrame); + } + else if (u8Type == INTNETHDR_TYPE_GSO) + { + size_t cbGso = pHdr->cbFrame; + size_t cbFrame = cbGso - sizeof(PDMNETWORKGSO); + + PCPDMNETWORKGSO pcGso = IntNetHdrGetGsoContext(pHdr, pThis->pBuf); + if (PDMNetGsoIsValid(pcGso, cbGso, cbFrame)) + { + if (pfnInputGso != NULL) + { + /* pass the frame to the user GSO input callback if set */ + pfnInputGso(pvUserGso, pcGso, (uint32_t)cbFrame); + } + else + { + const uint32_t cSegs = PDMNetGsoCalcSegmentCount(pcGso, cbFrame); + for (uint32_t i = 0; i < cSegs; ++i) + { + uint8_t abHdrScratch[256]; + pvSegFrame = PDMNetGsoCarveSegmentQD(pcGso, (uint8_t *)(pcGso + 1), cbFrame, + abHdrScratch, + i, cSegs, + &cbSegFrame); + + /* pass carved frames to the user input callback */ + pfnInput(pvUser, pvSegFrame, (uint32_t)cbSegFrame); + } + } + } + } + + /* advance to the next input frame */ + IntNetRingSkipFrame(&pThis->pBuf->Recv); + pHdr = IntNetRingGetNextFrameToRead(&pThis->pBuf->Recv); + } + } + else + break; + } + return rc; +} + + +DECLHIDDEN(int) IntNetR3IfQueryOutputFrame(INTNETIFCTX hIfCtx, uint32_t cbFrame, PINTNETFRAME pFrame) +{ + PINTNETIFCTXINT pThis = hIfCtx; + AssertPtrReturn(pThis, VERR_INVALID_HANDLE); + + return IntNetRingAllocateFrame(&pThis->pBuf->Send, cbFrame, &pFrame->pHdr, &pFrame->pvFrame); +} + + +DECLHIDDEN(int) IntNetR3IfOutputFrameCommit(INTNETIFCTX hIfCtx, PCINTNETFRAME pFrame) +{ + PINTNETIFCTXINT pThis = hIfCtx; + AssertPtrReturn(pThis, VERR_INVALID_HANDLE); + + IntNetRingCommitFrame(&pThis->pBuf->Send, pFrame->pHdr); + return IntNetR3IfSend(hIfCtx); +} diff --git a/src/VBox/NetworkServices/NetLib/IntNetIf.h b/src/VBox/NetworkServices/NetLib/IntNetIf.h new file mode 100644 index 00000000..7e8c0063 --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/IntNetIf.h @@ -0,0 +1,112 @@ +/* $Id: IntNetIf.h $ */ +/** @file + * IntNetIf - Convenience class implementing an IntNet connection. + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NetLib_IntNetIf_h +#define VBOX_INCLUDED_SRC_NetLib_IntNetIf_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <iprt/cdefs.h> + +#include <iprt/initterm.h> +#include <iprt/cpp/ministring.h> + +#include <VBox/sup.h> +#include <VBox/vmm/vmm.h> +#include <VBox/intnet.h> + + + +/** + * Low-level internal network access helpers to hide away the different variants (R0 SUP or R3 XPC on macOS). + */ +/** Internal networking interface context handle. */ +typedef struct INTNETIFCTXINT *INTNETIFCTX; +/** Pointer to an internal networking interface context handle. */ +typedef INTNETIFCTX *PINTNETIFCTX; + +/** + * User input callback function. + * + * @param pvUser The user specified argument. + * @param pvFrame The pointer to the frame data. + * @param cbFrame The length of the frame data. + */ +typedef DECLCALLBACKTYPE(void, FNINPUT,(void *pvUser, void *pvFrame, uint32_t cbFrame)); + +/** Pointer to the user input callback function. */ +typedef FNINPUT *PFNINPUT; + +/** + * User GSO input callback function. + * + * @param pvUser The user specified argument. + * @param pcGso The pointer to the GSO context. + * @param cbFrame The length of the GSO data. + */ +typedef DECLCALLBACKTYPE(void, FNINPUTGSO,(void *pvUser, PCPDMNETWORKGSO pcGso, uint32_t cbFrame)); + +/** Pointer to the user GSO input callback function. */ +typedef FNINPUTGSO *PFNINPUTGSO; + + +/** + * An output frame in the send ring buffer. + * + * Obtained with IntNetR3IfCtxQueryOutputFrame(). Caller should copy frame + * contents to pvFrame and pass the frame structure to IntNetR3IfCtxOutputFrameCommit() + * to be sent to the network. + */ +typedef struct INTNETFRAME +{ + /** The intrnal network frame header. */ + PINTNETHDR pHdr; + /** The actual frame data. */ + void *pvFrame; +} INTNETFRAME; +typedef INTNETFRAME *PINTNETFRAME; +typedef const INTNETFRAME *PCINTNETFRAME; + + +DECLHIDDEN(int) IntNetR3IfCreate(PINTNETIFCTX phIfCtx, const char *pszNetwork); +DECLHIDDEN(int) IntNetR3IfCreateEx(PINTNETIFCTX phIfCtx, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType, + const char *pszTrunk, uint32_t cbSend, uint32_t cbRecv, uint32_t fFlags); +DECLHIDDEN(int) IntNetR3IfDestroy(INTNETIFCTX hIfCtx); +DECLHIDDEN(int) IntNetR3IfQueryBufferPtr(INTNETIFCTX hIfCtx, PINTNETBUF *ppIfBuf); +DECLHIDDEN(int) IntNetR3IfSetActive(INTNETIFCTX hIfCtx, bool fActive); +DECLHIDDEN(int) IntNetR3IfSetPromiscuous(INTNETIFCTX hIfCtx, bool fPromiscuous); +DECLHIDDEN(int) IntNetR3IfSend(INTNETIFCTX hIfCtx); +DECLHIDDEN(int) IntNetR3IfWait(INTNETIFCTX hIfCtx, uint32_t cMillies); +DECLHIDDEN(int) IntNetR3IfWaitAbort(INTNETIFCTX hIfCtx); + +DECLHIDDEN(int) IntNetR3IfPumpPkts(INTNETIFCTX hIfCtx, PFNINPUT pfnInput, void *pvUser, + PFNINPUTGSO pfnInputGso, void *pvUserGso); +DECLHIDDEN(int) IntNetR3IfQueryOutputFrame(INTNETIFCTX hIfCtx, uint32_t cbFrame, PINTNETFRAME pFrame); +DECLHIDDEN(int) IntNetR3IfOutputFrameCommit(INTNETIFCTX hIfCtx, PCINTNETFRAME pFrame); + +#endif /* !VBOX_INCLUDED_SRC_NetLib_IntNetIf_h */ diff --git a/src/VBox/NetworkServices/NetLib/Makefile.kup b/src/VBox/NetworkServices/NetLib/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/Makefile.kup diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetARP.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetARP.cpp new file mode 100644 index 00000000..510685c9 --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/VBoxNetARP.cpp @@ -0,0 +1,165 @@ +/* $Id: VBoxNetARP.cpp $ */ +/** @file + * VBoxNetARP - IntNet ARP Client Routines. + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DEFAULT +#include "VBoxNetLib.h" +#include <iprt/string.h> +#include <VBox/intnetinline.h> +#include <VBox/log.h> + + +/** + * Deal with ARP queries. + * + * @returns true if ARP. + * + * @param pSession The support driver session. + * @param hIf The internal network interface handle. + * @param pBuf The internal network interface buffer. + * @param pMacAddr Our MAC address. + * @param IPv4Addr Our IPv4 address. + */ +bool VBoxNetArpHandleIt(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf, PCRTMAC pMacAddr, RTNETADDRIPV4 IPv4Addr) +{ + /* + * Valid IntNet Ethernet frame? Skip GSO, no ARP in there. + */ + PCINTNETHDR pHdr = IntNetRingGetNextFrameToRead(&pBuf->Recv); + if ( !pHdr + || pHdr->u8Type != INTNETHDR_TYPE_FRAME) + return false; + + size_t cbFrame = pHdr->cbFrame; + const void *pvFrame = IntNetHdrGetFramePtr(pHdr, pBuf); + PCRTNETETHERHDR pEthHdr = (PCRTNETETHERHDR)pvFrame; + + /* + * Arp frame? + */ + if (pEthHdr->EtherType != RT_H2N_U16_C(RTNET_ETHERTYPE_ARP)) + return false; + if ( ( pEthHdr->DstMac.au16[0] != 0xffff + || pEthHdr->DstMac.au16[1] != 0xffff + || pEthHdr->DstMac.au16[2] != 0xffff) + && ( pEthHdr->DstMac.au16[0] != pMacAddr->au16[0] + || pEthHdr->DstMac.au16[1] != pMacAddr->au16[1] + || pEthHdr->DstMac.au16[2] != pMacAddr->au16[2]) + ) + return false; + if (cbFrame < sizeof(RTNETARPIPV4) + sizeof(RTNETETHERHDR)) + return false; + + PCRTNETARPHDR pArpHdr = (PCRTNETARPHDR)(pEthHdr + 1); + if (pArpHdr->ar_htype != RT_H2N_U16_C(RTNET_ARP_ETHER)) + return false; + if (pArpHdr->ar_hlen != sizeof(RTMAC)) + return false; + if (pArpHdr->ar_ptype != RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4)) + return false; + if (pArpHdr->ar_plen != sizeof(RTNETADDRIPV4)) + return false; + + /* It's ARP, alright. Anything we need to do something about. */ + PCRTNETARPIPV4 pArp = (PCRTNETARPIPV4)pArpHdr; + switch (pArp->Hdr.ar_oper) + { + case RT_H2N_U16_C(RTNET_ARPOP_REQUEST): + case RT_H2N_U16_C(RTNET_ARPOP_REVREQUEST): + case RT_H2N_U16_C(RTNET_ARPOP_INVREQUEST): + break; + default: + return true; + } + + /* + * Deal with the queries. + */ + RTNETARPIPV4 Reply; + switch (pArp->Hdr.ar_oper) + { + /* 'Who has ar_tpa? Tell ar_spa.' */ + case RT_H2N_U16_C(RTNET_ARPOP_REQUEST): + if (pArp->ar_tpa.u != IPv4Addr.u) + return true; + Reply.Hdr.ar_oper = RT_H2N_U16_C(RTNET_ARPOP_REPLY); + break; + + case RT_H2N_U16_C(RTNET_ARPOP_REVREQUEST): + if ( pArp->ar_tha.au16[0] != pMacAddr->au16[0] + || pArp->ar_tha.au16[1] != pMacAddr->au16[1] + || pArp->ar_tha.au16[2] != pMacAddr->au16[2]) + return true; + Reply.Hdr.ar_oper = RT_H2N_U16_C(RTNET_ARPOP_REVREPLY); + break; + + case RT_H2N_U16_C(RTNET_ARPOP_INVREQUEST): + /** @todo RTNET_ARPOP_INVREQUEST */ + return true; + //Reply.Hdr.ar_oper = RT_H2N_U16_C(RTNET_ARPOP_INVREPLY); + //break; + } + + /* + * Complete the reply and send it. + */ + Reply.Hdr.ar_htype = RT_H2N_U16_C(RTNET_ARP_ETHER); + Reply.Hdr.ar_ptype = RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4); + Reply.Hdr.ar_hlen = sizeof(RTMAC); + Reply.Hdr.ar_plen = sizeof(RTNETADDRIPV4); + Reply.ar_sha = *pMacAddr; + Reply.ar_spa = IPv4Addr; + Reply.ar_tha = pArp->ar_sha; + Reply.ar_tpa = pArp->ar_spa; + + + RTNETETHERHDR EthHdr; + EthHdr.DstMac = pArp->ar_sha; + EthHdr.SrcMac = *pMacAddr; + EthHdr.EtherType = RT_H2N_U16_C(RTNET_ETHERTYPE_ARP); + + uint8_t abTrailer[60 - sizeof(Reply) - sizeof(EthHdr)]; + RT_ZERO(abTrailer); + + INTNETSEG aSegs[3]; + aSegs[0].cb = sizeof(EthHdr); + aSegs[0].pv = &EthHdr; + + aSegs[1].pv = &Reply; + aSegs[1].cb = sizeof(Reply); + + aSegs[2].pv = &abTrailer[0]; + aSegs[2].cb = sizeof(abTrailer); + + VBoxNetIntIfSend(pSession, hIf, pBuf, RT_ELEMENTS(aSegs), &aSegs[0], true /* fFlush */); + + return true; +} + diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.cpp new file mode 100644 index 00000000..1700c0d1 --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.cpp @@ -0,0 +1,858 @@ +/* $Id: VBoxNetBaseService.cpp $ */ +/** @file + * VBoxNetBaseService - common services for VBoxNetDHCP and VBoxNetNAT. + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_NET_SERVICE + +#include <VBox/com/com.h> +#include <VBox/com/listeners.h> +#include <VBox/com/string.h> +#include <VBox/com/Guid.h> +#include <VBox/com/array.h> +#include <VBox/com/ErrorInfo.h> +#include <VBox/com/errorprint.h> +#include <VBox/com/VirtualBox.h> +#include <VBox/com/NativeEventQueue.h> + +#include <iprt/alloca.h> +#include <iprt/buildconfig.h> +#include <iprt/err.h> +#include <iprt/net.h> /* must come before getopt.h. */ +#include <iprt/getopt.h> +#include <iprt/initterm.h> +#include <iprt/param.h> +#include <iprt/path.h> +#include <iprt/process.h> +#include <iprt/stream.h> +#include <iprt/string.h> +#include <iprt/time.h> +#include <iprt/thread.h> +#include <iprt/mem.h> +#include <iprt/message.h> + +#include <VBox/sup.h> +#include <VBox/intnet.h> +#include <VBox/intnetinline.h> +#include <VBox/vmm/vmm.h> +#include <VBox/version.h> + +#include <vector> +#include <iprt/sanitized/string> + +#include <VBox/err.h> +#include <VBox/log.h> + +#include "VBoxNetLib.h" +#include "VBoxNetBaseService.h" + +#ifdef RT_OS_WINDOWS /* WinMain */ +# include <iprt/win/windows.h> +# include <stdlib.h> +#endif + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +struct VBoxNetBaseService::Data +{ + Data(const std::string& aServiceName, const std::string& aNetworkName): + m_ServiceName(aServiceName), + m_NetworkName(aNetworkName), + m_enmTrunkType(kIntNetTrunkType_WhateverNone), + m_pSession(NIL_RTR0PTR), + m_cbSendBuf(128 * _1K), + m_cbRecvBuf(256 * _1K), + m_hIf(INTNET_HANDLE_INVALID), + m_pIfBuf(NULL), + m_cVerbosity(0), + m_fNeedMain(false), + m_EventQ(NULL), + m_hThrRecv(NIL_RTTHREAD), + fShutdown(false) + { + int rc = RTCritSectInit(&m_csThis); + AssertRC(rc); + }; + + std::string m_ServiceName; + std::string m_NetworkName; + std::string m_TrunkName; + INTNETTRUNKTYPE m_enmTrunkType; + + RTMAC m_MacAddress; + RTNETADDRIPV4 m_Ipv4Address; + RTNETADDRIPV4 m_Ipv4Netmask; + + PSUPDRVSESSION m_pSession; + uint32_t m_cbSendBuf; + uint32_t m_cbRecvBuf; + INTNETIFHANDLE m_hIf; /**< The handle to the network interface. */ + PINTNETBUF m_pIfBuf; /**< Interface buffer. */ + + std::vector<PCRTGETOPTDEF> m_vecOptionDefs; + + int32_t m_cVerbosity; + + /* cs for syncing */ + RTCRITSECT m_csThis; + + /* Controls whether service will connect SVC for runtime needs */ + bool m_fNeedMain; + /* Event Queue */ + com::NativeEventQueue *m_EventQ; + + /** receiving thread, used only if main is used */ + RTTHREAD m_hThrRecv; + + bool fShutdown; + static DECLCALLBACK(int) recvLoop(RTTHREAD, void *); +}; + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/* Commonly used options for network configuration */ +static RTGETOPTDEF g_aGetOptDef[] = +{ + { "--name", 'N', RTGETOPT_REQ_STRING }, + { "--network", 'n', RTGETOPT_REQ_STRING }, + { "--trunk-name", 't', RTGETOPT_REQ_STRING }, + { "--trunk-type", 'T', RTGETOPT_REQ_STRING }, + { "--mac-address", 'a', RTGETOPT_REQ_MACADDR }, + { "--ip-address", 'i', RTGETOPT_REQ_IPV4ADDR }, + { "--netmask", 'm', RTGETOPT_REQ_IPV4ADDR }, + { "--verbose", 'v', RTGETOPT_REQ_NOTHING }, + { "--need-main", 'M', RTGETOPT_REQ_BOOL }, +}; + + +DECLCALLBACK(int) VBoxNetBaseService::Data::recvLoop(RTTHREAD, void *pvUser) +{ + VBoxNetBaseService *pThis = static_cast<VBoxNetBaseService *>(pvUser); + + HRESULT hrc = com::Initialize(); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + + pThis->doReceiveLoop(); + + return VINF_SUCCESS; +} + + +VBoxNetBaseService::VBoxNetBaseService(const std::string& aName, const std::string& aNetworkName):m(NULL) +{ + m = new VBoxNetBaseService::Data(aName, aNetworkName); + + for(unsigned int i = 0; i < RT_ELEMENTS(g_aGetOptDef); ++i) + m->m_vecOptionDefs.push_back(&g_aGetOptDef[i]); +} + + +VBoxNetBaseService::~VBoxNetBaseService() +{ + /* + * Close the interface connection. + */ + if (m) + { + shutdown(); + if (m->m_hIf != INTNET_HANDLE_INVALID) + { + INTNETIFCLOSEREQ CloseReq; + CloseReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + CloseReq.Hdr.cbReq = sizeof(CloseReq); + CloseReq.pSession = m->m_pSession; + CloseReq.hIf = m->m_hIf; + m->m_hIf = INTNET_HANDLE_INVALID; + int rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_CLOSE, 0, &CloseReq.Hdr); + AssertRC(rc); + } + + if (m->m_pSession != NIL_RTR0PTR) + { + SUPR3Term(false /*fForced*/); + m->m_pSession = NIL_RTR0PTR; + } + + RTCritSectDelete(&m->m_csThis); + + delete m; + m = NULL; + } +} + + +int VBoxNetBaseService::init() +{ + if (isMainNeeded()) + { + HRESULT hrc = com::Initialize(); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + + hrc = virtualboxClient.createInprocObject(CLSID_VirtualBoxClient); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + + hrc = virtualboxClient->COMGETTER(VirtualBox)(virtualbox.asOutParam()); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + } + + return VINF_SUCCESS; +} + + +bool VBoxNetBaseService::isMainNeeded() const +{ + return m->m_fNeedMain; +} + + +int VBoxNetBaseService::run() +{ + /** + * If the child class needs Main we start the receving thread which calls + * doReceiveLoop and enter to event polling loop. For other clients we do + * receiving on the current (main) thread. + */ + if (isMainNeeded()) + return startReceiveThreadAndEnterEventLoop(); + + doReceiveLoop(); + return VINF_SUCCESS; +} + +/** + * Parse the arguments. + * + * @returns 0 on success, fully bitched exit code on failure. + * + * @param argc Argument count. + * @param argv Argument vector. + * + * @todo r=bird: The --help and --version options shall not return a + * non-zero exit code. So, this method need to grow some + * complexity. I'm to blame for that blunder :/ + */ +int VBoxNetBaseService::parseArgs(int argc, char **argv) +{ + + RTGETOPTSTATE State; + PRTGETOPTDEF paOptionArray = getOptionsPtr(); + int rc = RTGetOptInit(&State, argc, argv, paOptionArray, m->m_vecOptionDefs.size(), 0, 0 /*fFlags*/); + AssertRCReturn(rc, 49); +#if 0 + /* default initialization */ + m_enmTrunkType = kIntNetTrunkType_WhateverNone; +#endif + Log2(("BaseService: parseArgs enter\n")); + + for (;;) + { + RTGETOPTUNION Val; + rc = RTGetOpt(&State, &Val); + if (!rc) + break; + switch (rc) + { + case 'N': // --name + m->m_ServiceName = Val.psz; + break; + + case 'n': // --network + m->m_NetworkName = Val.psz; + break; + + case 't': //--trunk-name + m->m_TrunkName = Val.psz; + break; + + case 'T': //--trunk-type + if (!strcmp(Val.psz, "none")) + m->m_enmTrunkType = kIntNetTrunkType_None; + else if (!strcmp(Val.psz, "whatever")) + m->m_enmTrunkType = kIntNetTrunkType_WhateverNone; + else if (!strcmp(Val.psz, "netflt")) + m->m_enmTrunkType = kIntNetTrunkType_NetFlt; + else if (!strcmp(Val.psz, "netadp")) + m->m_enmTrunkType = kIntNetTrunkType_NetAdp; + else if (!strcmp(Val.psz, "srvnat")) + m->m_enmTrunkType = kIntNetTrunkType_SrvNat; + else + { + RTStrmPrintf(g_pStdErr, "Invalid trunk type '%s'\n", Val.psz); + return RTEXITCODE_SYNTAX; + } + break; + + case 'a': // --mac-address + m->m_MacAddress = Val.MacAddr; + break; + + case 'i': // --ip-address + m->m_Ipv4Address = Val.IPv4Addr; + break; + + case 'm': // --netmask + m->m_Ipv4Netmask = Val.IPv4Addr; + break; + + case 'v': // --verbose + m->m_cVerbosity++; + break; + + case 'V': // --version (missed) + RTPrintf("%sr%u\n", RTBldCfgVersion(), RTBldCfgRevision()); + return 1; /** @todo this exit code is wrong, of course. :/ */ + + case 'M': // --need-main + m->m_fNeedMain = true; + break; + + case 'h': // --help (missed) + RTPrintf("%s Version %sr%u\n" + "Copyright (C) 2009-" VBOX_C_YEAR " " VBOX_VENDOR "\n" + "\n" + "Usage: %s <options>\n" + "\n" + "Options:\n", + RTProcShortName(), + RTBldCfgVersion(), + RTBldCfgRevision(), + RTProcShortName()); + for (unsigned int i = 0; i < m->m_vecOptionDefs.size(); i++) + RTPrintf(" -%c, %s\n", m->m_vecOptionDefs[i]->iShort, m->m_vecOptionDefs[i]->pszLong); + usage(); /* to print Service Specific usage */ + return 1; /** @todo this exit code is wrong, of course. :/ */ + + default: + { + int rc1 = parseOpt(rc, Val); + if (RT_FAILURE(rc1)) + { + RTEXITCODE rcExit = RTGetOptPrintError(rc, &Val); + RTPrintf("Use --help for more information.\n"); + return rcExit; + } + break; + } + } + } + + RTMemFree(paOptionArray); + return RTEXITCODE_SUCCESS; +} + + +int VBoxNetBaseService::tryGoOnline(void) +{ + /* + * Open the session, load ring-0 and issue the request. + */ + int rc = SUPR3Init(&m->m_pSession); + if (RT_FAILURE(rc)) + { + m->m_pSession = NIL_RTR0PTR; + LogRel(("VBoxNetBaseService: SUPR3Init -> %Rrc\n", rc)); + return rc; + } + + char szPath[RTPATH_MAX]; + rc = RTPathExecDir(szPath, sizeof(szPath) - sizeof("/VMMR0.r0")); + if (RT_FAILURE(rc)) + { + LogRel(("VBoxNetBaseService: RTPathExecDir -> %Rrc\n", rc)); + return rc; + } + + rc = SUPR3LoadVMM(strcat(szPath, "/VMMR0.r0"), NULL); + if (RT_FAILURE(rc)) + { + LogRel(("VBoxNetBaseService: SUPR3LoadVMM(\"%s\") -> %Rrc\n", szPath, rc)); + return rc; + } + + /* + * Create the open request. + */ + PINTNETBUF pBuf; + INTNETOPENREQ OpenReq; + OpenReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + OpenReq.Hdr.cbReq = sizeof(OpenReq); + OpenReq.pSession = m->m_pSession; + RTStrCopy(OpenReq.szNetwork, sizeof(OpenReq.szNetwork), m->m_NetworkName.c_str()); + OpenReq.szNetwork[sizeof(OpenReq.szNetwork) - 1] = '\0'; + RTStrCopy(OpenReq.szTrunk, sizeof(OpenReq.szTrunk), m->m_TrunkName.c_str()); + OpenReq.szTrunk[sizeof(OpenReq.szTrunk) - 1] = '\0'; + OpenReq.enmTrunkType = m->m_enmTrunkType; + OpenReq.fFlags = 0; /** @todo check this */ + OpenReq.cbSend = m->m_cbSendBuf; + OpenReq.cbRecv = m->m_cbRecvBuf; + OpenReq.hIf = INTNET_HANDLE_INVALID; + + /* + * Issue the request. + */ + Log2(("attempting to open/create network \"%s\"...\n", OpenReq.szNetwork)); + rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_OPEN, 0, &OpenReq.Hdr); + if (RT_FAILURE(rc)) + { + Log2(("VBoxNetBaseService: SUPR3CallVMMR0Ex(,VMMR0_DO_INTNET_OPEN,) failed, rc=%Rrc\n", rc)); + return rc; + } + m->m_hIf = OpenReq.hIf; + Log2(("successfully opened/created \"%s\" - hIf=%#x\n", OpenReq.szNetwork, m->m_hIf)); + + /* + * Get the ring-3 address of the shared interface buffer. + */ + INTNETIFGETBUFFERPTRSREQ GetBufferPtrsReq; + GetBufferPtrsReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + GetBufferPtrsReq.Hdr.cbReq = sizeof(GetBufferPtrsReq); + GetBufferPtrsReq.pSession = m->m_pSession; + GetBufferPtrsReq.hIf = m->m_hIf; + GetBufferPtrsReq.pRing3Buf = NULL; + GetBufferPtrsReq.pRing0Buf = NIL_RTR0PTR; + rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_GET_BUFFER_PTRS, 0, &GetBufferPtrsReq.Hdr); + if (RT_FAILURE(rc)) + { + Log2(("VBoxNetBaseService: SUPR3CallVMMR0Ex(,VMMR0_DO_INTNET_IF_GET_BUFFER_PTRS,) failed, rc=%Rrc\n", rc)); + return rc; + } + pBuf = GetBufferPtrsReq.pRing3Buf; + Log2(("pBuf=%p cbBuf=%d cbSend=%d cbRecv=%d\n", + pBuf, pBuf->cbBuf, pBuf->cbSend, pBuf->cbRecv)); + m->m_pIfBuf = pBuf; + + /* + * Activate the interface. + */ + INTNETIFSETACTIVEREQ ActiveReq; + ActiveReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + ActiveReq.Hdr.cbReq = sizeof(ActiveReq); + ActiveReq.pSession = m->m_pSession; + ActiveReq.hIf = m->m_hIf; + ActiveReq.fActive = true; + rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_SET_ACTIVE, 0, &ActiveReq.Hdr); + if (RT_SUCCESS(rc)) + return 0; + + /* bail out */ + Log2(("VBoxNetBaseService: SUPR3CallVMMR0Ex(,VMMR0_DO_INTNET_IF_SET_PROMISCUOUS_MODE,) failed, rc=%Rrc\n", rc)); + + /* ignore this error */ + return VINF_SUCCESS; +} + + +void VBoxNetBaseService::shutdown(void) +{ + syncEnter(); + if (!m->fShutdown) + { + m->fShutdown = true; + if (m->m_hThrRecv != NIL_RTTHREAD) + { + int rc = abortWait(); + AssertRC(rc == VINF_SUCCESS || rc == VERR_SEM_DESTROYED); + rc = m->m_EventQ->interruptEventQueueProcessing(); + if (RT_SUCCESS(rc)) + { + rc = RTThreadWait(m->m_hThrRecv, 60000, NULL); + if (RT_FAILURE(rc)) + Log1WarningFunc(("RTThreadWait(%RTthrd) -> %Rrc\n", m->m_hThrRecv, rc)); + } + else + { + AssertMsgFailed(("interruptEventQueueProcessing() failed\n")); + RTThreadWait(m->m_hThrRecv , 0, NULL); + } + } + } + syncLeave(); +} + + +int VBoxNetBaseService::syncEnter() +{ + return RTCritSectEnter(&m->m_csThis); +} + + +int VBoxNetBaseService::syncLeave() +{ + return RTCritSectLeave(&m->m_csThis); +} + + +int VBoxNetBaseService::waitForIntNetEvent(int cMillis) +{ + INTNETIFWAITREQ WaitReq; + LogFlowFunc(("ENTER:cMillis: %d\n", cMillis)); + WaitReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + WaitReq.Hdr.cbReq = sizeof(WaitReq); + WaitReq.pSession = m->m_pSession; + WaitReq.hIf = m->m_hIf; + WaitReq.cMillies = cMillis; + + int rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_WAIT, 0, &WaitReq.Hdr); + LogFlowFuncLeaveRC(rc); + return rc; +} + + +int VBoxNetBaseService::abortWait() +{ + INTNETIFABORTWAITREQ AbortReq; + LogFlowFunc(("ENTER:\n")); + AbortReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + AbortReq.Hdr.cbReq = sizeof(AbortReq); + AbortReq.pSession = m->m_pSession; + AbortReq.hIf = m->m_hIf; + AbortReq.fNoMoreWaits = true; + + int rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_ABORT_WAIT, 0, &AbortReq.Hdr); + LogFlowFuncLeaveRC(rc); + return rc; +} + + +/* S/G API */ +int VBoxNetBaseService::sendBufferOnWire(PCINTNETSEG paSegs, size_t cSegs, size_t cbFrame) +{ + /* Allocate frame */ + PINTNETHDR pHdr = NULL; + uint8_t *pbFrame = NULL; + int rc = IntNetRingAllocateFrame(&m->m_pIfBuf->Send, (uint32_t)cbFrame, &pHdr, (void **)&pbFrame); + AssertRCReturn(rc, rc); + + /* Now we fill pvFrame with S/G above */ + size_t offFrame = 0; + for (size_t idxSeg = 0; idxSeg < cSegs; ++idxSeg) + { + memcpy(&pbFrame[offFrame], paSegs[idxSeg].pv, paSegs[idxSeg].cb); + offFrame += paSegs[idxSeg].cb; + } + + /* Commit */ + IntNetRingCommitFrameEx(&m->m_pIfBuf->Send, pHdr, cbFrame); + + LogFlowFuncLeaveRC(rc); + return rc; +} + +/** + * forcible ask for send packet on the "wire" + */ +void VBoxNetBaseService::flushWire() +{ + INTNETIFSENDREQ SendReq; + SendReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + SendReq.Hdr.cbReq = sizeof(SendReq); + SendReq.pSession = m->m_pSession; + SendReq.hIf = m->m_hIf; + int rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_SEND, 0, &SendReq.Hdr); + AssertRCReturnVoid(rc); + LogFlowFuncLeave(); +} + + +int VBoxNetBaseService::hlpUDPBroadcast(unsigned uSrcPort, unsigned uDstPort, + void const *pvData, size_t cbData) const +{ + return VBoxNetUDPBroadcast(m->m_pSession, m->m_hIf, m->m_pIfBuf, + m->m_Ipv4Address, &m->m_MacAddress, uSrcPort, + uDstPort, pvData, cbData); + +} + + +const std::string VBoxNetBaseService::getServiceName() const +{ + return m->m_ServiceName; +} + + +void VBoxNetBaseService::setServiceName(const std::string& aName) +{ + m->m_ServiceName = aName; +} + + +const std::string VBoxNetBaseService::getNetworkName() const +{ + return m->m_NetworkName; +} + + +void VBoxNetBaseService::setNetworkName(const std::string& aName) +{ + m->m_NetworkName = aName; +} + + +const RTMAC VBoxNetBaseService::getMacAddress() const +{ + return m->m_MacAddress; +} + + +void VBoxNetBaseService::setMacAddress(const RTMAC& aMac) +{ + m->m_MacAddress = aMac; +} + + +const RTNETADDRIPV4 VBoxNetBaseService::getIpv4Address() const +{ + return m->m_Ipv4Address; +} + + +void VBoxNetBaseService::setIpv4Address(const RTNETADDRIPV4& aAddress) +{ + m->m_Ipv4Address = aAddress; +} + + +const RTNETADDRIPV4 VBoxNetBaseService::getIpv4Netmask() const +{ + return m->m_Ipv4Netmask; +} + + +void VBoxNetBaseService::setIpv4Netmask(const RTNETADDRIPV4& aNetmask) +{ + m->m_Ipv4Netmask = aNetmask; +} + + +uint32_t VBoxNetBaseService::getSendBufSize() const +{ + return m->m_cbSendBuf; +} + + +void VBoxNetBaseService::setSendBufSize(uint32_t cbBuf) +{ + m->m_cbSendBuf = cbBuf; +} + + +uint32_t VBoxNetBaseService::getRecvBufSize() const +{ + return m->m_cbRecvBuf; +} + + +void VBoxNetBaseService::setRecvBufSize(uint32_t cbBuf) +{ + m->m_cbRecvBuf = cbBuf; +} + + +int32_t VBoxNetBaseService::getVerbosityLevel() const +{ + return m->m_cVerbosity; +} + + +void VBoxNetBaseService::setVerbosityLevel(int32_t aVerbosity) +{ + m->m_cVerbosity = aVerbosity; +} + + +void VBoxNetBaseService::addCommandLineOption(PCRTGETOPTDEF optDef) +{ + m->m_vecOptionDefs.push_back(optDef); +} + + +void VBoxNetBaseService::doReceiveLoop() +{ + int rc; + /* Well we're ready */ + PINTNETRINGBUF pRingBuf = &m->m_pIfBuf->Recv; + + for (;;) + { + /* + * Wait for a packet to become available. + */ + rc = waitForIntNetEvent(2000); + if (rc == VERR_SEM_DESTROYED) + break; + + if (RT_FAILURE(rc)) + { + if (rc == VERR_TIMEOUT || rc == VERR_INTERRUPTED) + { + /* do we want interrupt anyone ??? */ + continue; + } + LogRel(("VBoxNetBaseService: waitForIntNetEvent returned %Rrc\n", rc)); + AssertRCReturnVoid(rc); + } + + /* + * Process the receive buffer. + */ + PCINTNETHDR pHdr; + while ((pHdr = IntNetRingGetNextFrameToRead(pRingBuf)) != NULL) + { + uint8_t const u8Type = pHdr->u8Type; + size_t cbFrame = pHdr->cbFrame; + switch (u8Type) + { + case INTNETHDR_TYPE_FRAME: + { + void *pvFrame = IntNetHdrGetFramePtr(pHdr, m->m_pIfBuf); + rc = processFrame(pvFrame, cbFrame); + if (RT_FAILURE(rc) && rc == VERR_IGNORED) + { + /* XXX: UDP + ARP for DHCP */ + VBOXNETUDPHDRS Hdrs; + size_t cb; + void *pv = VBoxNetUDPMatch(m->m_pIfBuf, RTNETIPV4_PORT_BOOTPS, &m->m_MacAddress, + VBOXNETUDP_MATCH_UNICAST + | VBOXNETUDP_MATCH_BROADCAST + | VBOXNETUDP_MATCH_CHECKSUM + | (m->m_cVerbosity > 2 ? VBOXNETUDP_MATCH_PRINT_STDERR : 0), + &Hdrs, &cb); + if (pv && cb) + processUDP(pv, cb); + else + VBoxNetArpHandleIt(m->m_pSession, m->m_hIf, m->m_pIfBuf, &m->m_MacAddress, m->m_Ipv4Address); + } + break; + } + case INTNETHDR_TYPE_GSO: + { + PCPDMNETWORKGSO pGso = IntNetHdrGetGsoContext(pHdr, m->m_pIfBuf); + rc = processGSO(pGso, cbFrame); + if (RT_FAILURE(rc) && rc == VERR_IGNORED) + break; + break; + } + + case INTNETHDR_TYPE_PADDING: + break; + + default: + break; + } + IntNetRingSkipFrame(&m->m_pIfBuf->Recv); + } /* loop */ + } +} + + +int VBoxNetBaseService::startReceiveThreadAndEnterEventLoop() +{ + AssertMsgReturn(isMainNeeded(), ("It's expected that we need Main"), VERR_INTERNAL_ERROR); + + /* start receiving thread */ + int rc = RTThreadCreate(&m->m_hThrRecv, /* thread handle*/ + &VBoxNetBaseService::Data::recvLoop, /* routine */ + this, /* user data */ + 128 * _1K, /* stack size */ + RTTHREADTYPE_IO, /* type */ + RTTHREADFLAGS_WAITABLE, /* flags */ + "RECV"); + AssertRCReturn(rc, rc); + + m->m_EventQ = com::NativeEventQueue::getMainEventQueue(); + AssertPtrReturn(m->m_EventQ, VERR_INTERNAL_ERROR); + + while (!m->fShutdown) + { + rc = m->m_EventQ->processEventQueue(RT_INDEFINITE_WAIT); + if (rc == VERR_INTERRUPTED) + { + LogFlow(("Event queue processing ended with rc=%Rrc\n", rc)); + break; + } + } + + return VINF_SUCCESS; +} + + +void VBoxNetBaseService::debugPrint(int32_t iMinLevel, bool fMsg, const char *pszFmt, ...) const +{ + if (iMinLevel <= m->m_cVerbosity) + { + va_list va; + va_start(va, pszFmt); + debugPrintV(iMinLevel, fMsg, pszFmt, va); + va_end(va); + } +} + + +/** + * Print debug message depending on the m_cVerbosity level. + * + * @param iMinLevel The minimum m_cVerbosity level for this message. + * @param fMsg Whether to dump parts for the current service message. + * @param pszFmt The message format string. + * @param va Optional arguments. + */ +void VBoxNetBaseService::debugPrintV(int iMinLevel, bool fMsg, const char *pszFmt, va_list va) const +{ + RT_NOREF(fMsg); + if (iMinLevel <= m->m_cVerbosity) + { + va_list vaCopy; /* This dude is *very* special, thus the copy. */ + va_copy(vaCopy, va); + RTStrmPrintf(g_pStdErr, "%s: %s: %N\n", + RTProcShortName(), + iMinLevel >= 2 ? "debug" : "info", + pszFmt, + &vaCopy); + va_end(vaCopy); + } +} + + +PRTGETOPTDEF VBoxNetBaseService::getOptionsPtr() +{ + PRTGETOPTDEF pOptArray = NULL; + pOptArray = (PRTGETOPTDEF)RTMemAlloc(sizeof(RTGETOPTDEF) * m->m_vecOptionDefs.size()); + if (!pOptArray) + return NULL; + for (unsigned int i = 0; i < m->m_vecOptionDefs.size(); ++i) + { + PCRTGETOPTDEF pOpt = m->m_vecOptionDefs[i]; + memcpy(&pOptArray[i], pOpt, sizeof(*pOpt)); + } + return pOptArray; +} diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.h b/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.h new file mode 100644 index 00000000..4e2c83ce --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.h @@ -0,0 +1,159 @@ +/* $Id: VBoxNetBaseService.h $ */ +/** @file + * VBoxNetUDP - IntNet Client Library. + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NetLib_VBoxNetBaseService_h +#define VBOX_INCLUDED_SRC_NetLib_VBoxNetBaseService_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <iprt/critsect.h> + + +class VBoxNetHlpUDPService +{ +public: + virtual ~VBoxNetHlpUDPService() { /* Make VC++ 19.2 happy. */ } + virtual int hlpUDPBroadcast(unsigned uSrcPort, unsigned uDstPort, void const *pvData, size_t cbData) const = 0; +}; + + +class VBoxNetLockee +{ +public: + virtual ~VBoxNetLockee() { /* Make VC++ 19.2 happy. */ } + virtual int syncEnter() = 0; + virtual int syncLeave() = 0; +}; + + +class VBoxNetALock +{ +public: + VBoxNetALock(VBoxNetLockee *a_lck) : m_lck(a_lck) + { + if (m_lck) + m_lck->syncEnter(); + } + + ~VBoxNetALock() + { + if (m_lck) + m_lck->syncLeave(); + } + +private: + VBoxNetLockee *m_lck; +}; + +# ifndef BASE_SERVICES_ONLY +class VBoxNetBaseService : public VBoxNetHlpUDPService, public VBoxNetLockee +{ +public: + VBoxNetBaseService(const std::string& aName, const std::string& aNetworkName); + virtual ~VBoxNetBaseService(); + int parseArgs(int argc, char **argv); + int tryGoOnline(void); + void shutdown(void); + int syncEnter(); + int syncLeave(); + int waitForIntNetEvent(int cMillis); + int abortWait(); + int sendBufferOnWire(PCINTNETSEG paSegs, size_t cSegs, size_t cbBuffer); + void flushWire(); + + virtual int hlpUDPBroadcast(unsigned uSrcPort, unsigned uDstPort, + void const *pvData, size_t cbData) const; + virtual void usage(void) = 0; + virtual int parseOpt(int rc, const RTGETOPTUNION& getOptVal) = 0; + virtual int processFrame(void *, size_t) = 0; + virtual int processGSO(PCPDMNETWORKGSO, size_t) = 0; + virtual int processUDP(void *, size_t) = 0; + + + virtual int init(void); + virtual int run(void); + virtual bool isMainNeeded() const; + +protected: + const std::string getServiceName() const; + void setServiceName(const std::string&); + + const std::string getNetworkName() const; + void setNetworkName(const std::string&); + + const RTMAC getMacAddress() const; + void setMacAddress(const RTMAC&); + + const RTNETADDRIPV4 getIpv4Address() const; + void setIpv4Address(const RTNETADDRIPV4&); + + const RTNETADDRIPV4 getIpv4Netmask() const; + void setIpv4Netmask(const RTNETADDRIPV4&); + + uint32_t getSendBufSize() const; + void setSendBufSize(uint32_t); + + uint32_t getRecvBufSize() const; + void setRecvBufSize(uint32_t); + + int32_t getVerbosityLevel() const; + void setVerbosityLevel(int32_t); + + void addCommandLineOption(PCRTGETOPTDEF); + + /** + * Print debug message depending on the m_cVerbosity level. + * + * @param iMinLevel The minimum m_cVerbosity level for this message. + * @param fMsg Whether to dump parts for the current DHCP message. + * @param pszFmt The message format string. + * @param ... Optional arguments. + */ + void debugPrint(int32_t iMinLevel, bool fMsg, const char *pszFmt, ...) const; + virtual void debugPrintV(int32_t iMinLevel, bool fMsg, const char *pszFmt, va_list va) const; + + private: + void doReceiveLoop(); + + /** starts receiving thread and enter event polling loop. */ + int startReceiveThreadAndEnterEventLoop(); + + protected: + /* VirtualBox instance */ + ComPtr<IVirtualBox> virtualbox; + ComPtr<IVirtualBoxClient> virtualboxClient; + + private: + struct Data; + Data *m; + + private: + PRTGETOPTDEF getOptionsPtr(); +}; +# endif +#endif /* !VBOX_INCLUDED_SRC_NetLib_VBoxNetBaseService_h */ diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetIntIf.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetIntIf.cpp new file mode 100644 index 00000000..eaa02dfc --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/VBoxNetIntIf.cpp @@ -0,0 +1,150 @@ +/* $Id: VBoxNetIntIf.cpp $ */ +/** @file + * VBoxNetIntIf - IntNet Interface Client Routines. + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DEFAULT +#include "VBoxNetLib.h" +#include <VBox/intnet.h> +#include <VBox/intnetinline.h> +#include <VBox/sup.h> +#include <VBox/vmm/vmm.h> +#include <iprt/errcore.h> +#include <VBox/log.h> + +#include <iprt/string.h> + + + +/** + * Flushes the send buffer. + * + * @returns VBox status code. + * @param pSession The support driver session. + * @param hIf The interface handle to flush. + */ +int VBoxNetIntIfFlush(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf) +{ + INTNETIFSENDREQ SendReq; + SendReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + SendReq.Hdr.cbReq = sizeof(SendReq); + SendReq.pSession = pSession; + SendReq.hIf = hIf; + return SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_SEND, 0, &SendReq.Hdr); +} + + +/** + * Copys the SG segments into the specified fram. + * + * @param pvFrame The frame buffer. + * @param cSegs The number of segments. + * @param paSegs The segments. + */ +static void vboxnetIntIfCopySG(void *pvFrame, size_t cSegs, PCINTNETSEG paSegs) +{ + uint8_t *pbDst = (uint8_t *)pvFrame; + for (size_t iSeg = 0; iSeg < cSegs; iSeg++) + { + memcpy(pbDst, paSegs[iSeg].pv, paSegs[iSeg].cb); + pbDst += paSegs[iSeg].cb; + } +} + + +/** + * Writes a frame packet to the buffer. + * + * @returns VBox status code. + * @param pBuf The buffer. + * @param pRingBuf The ring buffer to read from. + * @param cSegs The number of segments. + * @param paSegs The segments. + */ +int VBoxNetIntIfRingWriteFrame(PINTNETBUF pBuf, PINTNETRINGBUF pRingBuf, size_t cSegs, PCINTNETSEG paSegs) +{ + RT_NOREF(pBuf); + + /* + * Validate input. + */ + AssertPtr(pBuf); + AssertPtr(pRingBuf); + AssertPtr(paSegs); + Assert(cSegs > 0); + + /* + * Calc frame size. + */ + uint32_t cbFrame = 0; + for (size_t iSeg = 0; iSeg < cSegs; iSeg++) + cbFrame += paSegs[iSeg].cb; + Assert(cbFrame >= sizeof(RTMAC) * 2); + + /* + * Allocate a frame, copy the data and commit it. + */ + PINTNETHDR pHdr = NULL; + void *pvFrame = NULL; + int rc = IntNetRingAllocateFrame(pRingBuf, cbFrame, &pHdr, &pvFrame); + if (RT_SUCCESS(rc)) + { + vboxnetIntIfCopySG(pvFrame, cSegs, paSegs); + IntNetRingCommitFrame(pRingBuf, pHdr); + return VINF_SUCCESS; + } + + return rc; +} + + +/** + * Sends a frame + * + * @returns VBox status code. + * @param pSession The support driver session. + * @param hIf The interface handle. + * @param pBuf The interface buffer. + * @param cSegs The number of segments. + * @param paSegs The segments. + * @param fFlush Whether to flush the write. + */ +int VBoxNetIntIfSend(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf, + size_t cSegs, PCINTNETSEG paSegs, bool fFlush) +{ + int rc = VBoxNetIntIfRingWriteFrame(pBuf, &pBuf->Send, cSegs, paSegs); + if (rc == VERR_BUFFER_OVERFLOW) + { + VBoxNetIntIfFlush(pSession, hIf); + rc = VBoxNetIntIfRingWriteFrame(pBuf, &pBuf->Send, cSegs, paSegs); + } + if (RT_SUCCESS(rc) && fFlush) + rc = VBoxNetIntIfFlush(pSession, hIf); + return rc; +} diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetLib.h b/src/VBox/NetworkServices/NetLib/VBoxNetLib.h new file mode 100644 index 00000000..5c92dacb --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/VBoxNetLib.h @@ -0,0 +1,82 @@ +/* $Id: VBoxNetLib.h $ */ +/** @file + * VBoxNetUDP - IntNet Client Library. + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NetLib_VBoxNetLib_h +#define VBOX_INCLUDED_SRC_NetLib_VBoxNetLib_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <iprt/net.h> +#include <VBox/intnet.h> + +RT_C_DECLS_BEGIN + + +/** + * Header pointers optionally returned by VBoxNetUDPMatch. + */ +typedef struct VBOXNETUDPHDRS +{ + PCRTNETETHERHDR pEth; /**< Pointer to the ethernet header. */ + PCRTNETIPV4 pIpv4; /**< Pointer to the IPV4 header if IPV4 packet. */ + PCRTNETUDP pUdp; /**< Pointer to the UDP header. */ +} VBOXNETUDPHDRS; +/** Pointer to a VBOXNETUDPHDRS structure. */ +typedef VBOXNETUDPHDRS *PVBOXNETUDPHDRS; + + +/** @name VBoxNetUDPMatch flags. + * @{ */ +#define VBOXNETUDP_MATCH_UNICAST RT_BIT_32(0) +#define VBOXNETUDP_MATCH_BROADCAST RT_BIT_32(1) +#define VBOXNETUDP_MATCH_CHECKSUM RT_BIT_32(2) +#define VBOXNETUDP_MATCH_REQUIRE_CHECKSUM RT_BIT_32(3) +#define VBOXNETUDP_MATCH_PRINT_STDERR RT_BIT_32(31) +/** @} */ + +void * VBoxNetUDPMatch(PINTNETBUF pBuf, unsigned uDstPort, PCRTMAC pDstMac, uint32_t fFlags, PVBOXNETUDPHDRS pHdrs, size_t *pcb); +int VBoxNetUDPUnicast(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf, + RTNETADDRIPV4 SrcIPv4Addr, PCRTMAC SrcMacAddr, unsigned uSrcPort, + RTNETADDRIPV4 DstIPv4Addr, PCRTMAC DstMacAddr, unsigned uDstPort, + void const *pvData, size_t cbData); +int VBoxNetUDPBroadcast(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf, + RTNETADDRIPV4 SrcIPv4Addr, PCRTMAC SrcMacAddr, unsigned uSrcPort, + unsigned uDstPort, + void const *pvData, size_t cbData); + +bool VBoxNetArpHandleIt(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf, PCRTMAC pMacAddr, RTNETADDRIPV4 IPv4Addr); + +int VBoxNetIntIfFlush(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf); +int VBoxNetIntIfRingWriteFrame(PINTNETBUF pBuf, PINTNETRINGBUF pRingBuf, size_t cSegs, PCINTNETSEG paSegs); +int VBoxNetIntIfSend(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf, size_t cSegs, PCINTNETSEG paSegs, bool fFlush); + + +RT_C_DECLS_END + +#endif /* !VBOX_INCLUDED_SRC_NetLib_VBoxNetLib_h */ + diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetPortForwardString.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetPortForwardString.cpp new file mode 100644 index 00000000..c60d6860 --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/VBoxNetPortForwardString.cpp @@ -0,0 +1,382 @@ +/* $Id: VBoxNetPortForwardString.cpp $ */ +/** @file + * VBoxNetPortForwardString - Routines for managing port-forward strings. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#ifndef RT_OS_WINDOWS +# include <netinet/in.h> +#else +# include <iprt/win/winsock2.h> +# include <Ws2ipdef.h> +#endif + +#include <iprt/cdefs.h> +#include <iprt/cidr.h> +#include <iprt/ctype.h> +#include <iprt/errcore.h> +#include <iprt/getopt.h> +#include <iprt/net.h> +#include <iprt/param.h> +#include <iprt/path.h> +#include <iprt/stream.h> +#include <iprt/string.h> + +#include <VBox/log.h> + +#include "VBoxPortForwardString.h" + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +#define PF_FIELD_SEPARATOR ':' +#define PF_ADDRESS_FIELD_STARTS '[' +#define PF_ADDRESS_FIELD_ENDS ']' + +#define PF_STR_FIELD_SEPARATOR ":" +#define PF_STR_ADDRESS_FIELD_STARTS "[" +#define PF_STR_ADDRESS_FIELD_ENDS "]" + + +static int netPfStrAddressParse(char *pszRaw, size_t cchRaw, + char *pszAddress, int cbAddress, + bool fEmptyAcceptable) +{ + size_t cchField = 0; + + AssertPtrReturn(pszRaw, -1); + AssertPtrReturn(pszAddress, -1); + AssertReturn(pszRaw[0] == PF_ADDRESS_FIELD_STARTS, -1); + + if (pszRaw[0] == PF_ADDRESS_FIELD_STARTS) + { + /* shift pszRaw to next symbol */ + pszRaw++; + cchRaw--; + + + /* we shouldn't face with ending here */ + AssertReturn(cchRaw > 0, VERR_INVALID_PARAMETER); + + char *pszEndOfAddress = RTStrStr(pszRaw, PF_STR_ADDRESS_FIELD_ENDS); + + /* no pair closing sign */ + AssertPtrReturn(pszEndOfAddress, VERR_INVALID_PARAMETER); + + cchField = pszEndOfAddress - pszRaw; + + /* field should be less then the rest of the string */ + AssertReturn(cchField < cchRaw, VERR_INVALID_PARAMETER); + + if (cchField != 0) + RTStrCopy(pszAddress, RT_MIN(cchField + 1, (size_t)cbAddress), pszRaw); + else if (!fEmptyAcceptable) + return -1; + } + + AssertReturn(pszRaw[cchField] == PF_ADDRESS_FIELD_ENDS, -1); + + return (int)cchField + 2; /* length of the field and closing braces */ +} + + +/** + * Parses a port something. + * + * @returns Offset relative to @a pszRaw of the end of the port field. + * -1 on failure. + * @param pszRaw The zero terminated string to parse. Points a field + * separator. + * @param pu16Port Where to store the port number on success. + */ +static int netPfStrPortParse(char *pszRaw, uint16_t *pu16Port) +{ +#if 1 + AssertPtrReturn(pszRaw, -1); + AssertPtrReturn(pu16Port, -1); + AssertReturn(pszRaw[0] == PF_FIELD_SEPARATOR, -1); + + char *pszNext = NULL; + int rc = RTStrToUInt16Ex(&pszRaw[1], &pszNext, 0, pu16Port); + if (rc == VWRN_TRAILING_CHARS) + AssertReturn(*pszNext == PF_FIELD_SEPARATOR, -1); + else if (rc == VINF_SUCCESS) + Assert(*pszNext == '\0'); + else + AssertMsgFailedReturn(("rc=%Rrc\n", rc), -1); + if (*pu16Port == 0) + return -1; + return (int)(pszNext - pszRaw); + +#else /* The same code, just a little more verbose: */ + char *pszEndOfPort = NULL; + uint16_t u16Port = 0; + int idxRaw = 1; /* we increment pszRaw after checks. */ + int cbRest = 0; + size_t cbPort = 0; + + AssertPtrReturn(pszRaw, -1); + AssertPtrReturn(pu16Port, -1); + AssertReturn(pszRaw[0] == PF_FIELD_SEPARATOR, -1); + + pszRaw++; /* skip field separator */ + cchRaw --; + + char *pszEndOfPort = RTStrStr(pszRaw, ":"); + if (!pszEndOfPort) + { + cbRest = strlen(pszRaw); + + Assert(cchRaw == cbRest); + + /* XXX: Assumption that if string is too big, it will be reported by + * RTStrToUint16. + */ + if (cbRest > 0) + { + pszEndOfPort = pszRaw + cbRest; + cbPort = cbRest; + } + else + return -1; + } + else + cbPort = pszEndOfPort - pszRaw; + + + idxRaw += cbPort; + + Assert(cbRest || pszRaw[idxRaw - 1] == PF_FIELD_SEPARATOR); /* we are 1 char ahead */ + + char szPort[10]; + RT_ZERO(szPort); + + Assert(idxRaw > 0); + RTStrCopy(szPort, RT_MIN(sizeof(szPort), (size_t)(cbPort) + 1), pszRaw); + + if (!(u16Port = RTStrToUInt16(szPort))) + return -1; + + *pu16Port = u16Port; + + return idxRaw; +#endif +} + + +static int netPfStrAddressPortPairParse(char *pszRaw, size_t cchRaw, + char *pszAddress, int cbAddress, + bool fEmptyAddressAcceptable, + uint16_t *pu16Port) +{ + int idxRaw = 0; + int idxRawTotal = 0; + + AssertPtrReturn(pszRaw, -1); + AssertPtrReturn(pszAddress, -1); + AssertPtrReturn(pu16Port, -2); + + /* XXX: Here we should check 0 - ':' and 1 - '[' */ + Assert( pszRaw[0] == PF_FIELD_SEPARATOR + && pszRaw[1] == PF_ADDRESS_FIELD_STARTS); + + pszRaw++; /* field separator skip */ + cchRaw--; + AssertReturn(cchRaw > 0, VERR_INVALID_PARAMETER); + + idxRaw = 0; + + if (pszRaw[0] == PF_ADDRESS_FIELD_STARTS) + { + idxRaw += netPfStrAddressParse(pszRaw, + cchRaw - idxRaw, + pszAddress, + cbAddress, + fEmptyAddressAcceptable); + if (idxRaw == -1) + return -1; + + Assert(pszRaw[idxRaw] == PF_FIELD_SEPARATOR); + } + else return -1; + + pszRaw += idxRaw; + idxRawTotal += idxRaw; + cchRaw -= idxRaw; + + AssertReturn(cchRaw > 0, VERR_INVALID_PARAMETER); + + idxRaw = 0; + + Assert(pszRaw[0] == PF_FIELD_SEPARATOR); + + if (pszRaw[0] == PF_FIELD_SEPARATOR) + { + idxRaw = netPfStrPortParse(pszRaw, pu16Port); + + Assert(strlen(&pszRaw[idxRaw]) == 0 || pszRaw[idxRaw] == PF_FIELD_SEPARATOR); + + if (idxRaw == -1) + return -2; + + idxRawTotal += idxRaw; + + return idxRawTotal + 1; + } + else return -1; /* trailing garbage in the address */ +} + +/* XXX: Having fIPv6 we might emprove adress verification comparing address length + * with INET[6]_ADDRLEN + * + */ +int netPfStrToPf(const char *pcszStrPortForward, bool fIPv6, PPORTFORWARDRULE pPfr) +{ +/** r=bird: Redo from scratch? This is very hard to read. And it's going about + * things in a very complicated, potentially leaky (pszRaw) fashion. */ + + int proto; + uint16_t u16HostPort; + uint16_t u16GuestPort; + bool fTcpProto = false; + + int idxRaw = 0; + int cbToken = 0; + + AssertPtrReturn(pcszStrPortForward, VERR_INVALID_PARAMETER); + AssertPtrReturn(pPfr, VERR_INVALID_PARAMETER); + + RT_ZERO(*pPfr); + + char *pszHostAddr = &pPfr->szPfrHostAddr[0]; + char *pszGuestAddr = &pPfr->szPfrGuestAddr[0]; + char *pszName = &pPfr->szPfrName[0]; + + size_t cchRaw = strlen(pcszStrPortForward); + + /* Minimal rule ":tcp:[]:0:[]:0" has got lenght 14 */ + AssertReturn(cchRaw > 14, VERR_INVALID_PARAMETER); + + char *pszRaw = RTStrDup(pcszStrPortForward); + AssertReturn(pszRaw, VERR_NO_MEMORY); + + char *pszRawBegin = pszRaw; + + /* name */ + if (pszRaw[idxRaw] == PF_FIELD_SEPARATOR) + idxRaw = 1; /* begin of the next segment */ + else + { + char *pszEndOfName = RTStrStr(pszRaw + 1, PF_STR_FIELD_SEPARATOR); + if (!pszEndOfName) + goto invalid_parameter; + + cbToken = pszEndOfName - pszRaw; /* don't take : into account */ + /* XXX it's unacceptable to have only name entry in PF */ + AssertReturn(cbToken < (ssize_t)cchRaw, VERR_INVALID_PARAMETER); + + if ( cbToken < 0 + || (size_t)cbToken >= PF_NAMELEN) + goto invalid_parameter; + + RTStrCopy(pszName, + RT_MIN((size_t)cbToken + 1, PF_NAMELEN), + pszRaw); + pszRaw += cbToken; /* move to separator */ + cchRaw -= cbToken; + } + + AssertReturn(pszRaw[0] == PF_FIELD_SEPARATOR, VERR_INVALID_PARAMETER); + /* protocol */ + + pszRaw++; /* skip separator */ + cchRaw--; + idxRaw = 0; + + if ( ( (fTcpProto = (RTStrNICmp(pszRaw, "tcp", 3) == 0)) + || RTStrNICmp(pszRaw, "udp", 3) == 0) + && pszRaw[3] == PF_FIELD_SEPARATOR) + { + proto = (fTcpProto ? IPPROTO_TCP : IPPROTO_UDP); + idxRaw = 3; + } + else + goto invalid_parameter; + + pszRaw += idxRaw; + cchRaw -= idxRaw; + + idxRaw = netPfStrAddressPortPairParse(pszRaw, cchRaw, + pszHostAddr, INET6_ADDRSTRLEN, + true, &u16HostPort); + if (idxRaw < 0) + return VERR_INVALID_PARAMETER; + + pszRaw += idxRaw; + cchRaw -= idxRaw; + + Assert(pszRaw[0] == PF_FIELD_SEPARATOR); + + idxRaw = netPfStrAddressPortPairParse(pszRaw, cchRaw, + pszGuestAddr, INET6_ADDRSTRLEN, + false, &u16GuestPort); + + if (idxRaw < 0) + goto invalid_parameter; + + /* XXX: fill the rule */ + pPfr->fPfrIPv6 = fIPv6; + pPfr->iPfrProto = proto; + + pPfr->u16PfrHostPort = u16HostPort; + + if (*pszGuestAddr == '\0') + goto invalid_parameter; /* guest address should be defined */ + + pPfr->u16PfrGuestPort = u16GuestPort; + + Log(("name: %s\n" + "proto: %d\n" + "host address: %s\n" + "host port: %d\n" + "guest address: %s\n" + "guest port:%d\n", + pszName, proto, + pszHostAddr, u16HostPort, + pszGuestAddr, u16GuestPort)); + + RTStrFree(pszRawBegin); + return VINF_SUCCESS; + +invalid_parameter: + RTStrFree(pszRawBegin); + if (pPfr) + RT_ZERO(*pPfr); + return VERR_INVALID_PARAMETER; +} diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetUDP.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetUDP.cpp new file mode 100644 index 00000000..0981ccec --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/VBoxNetUDP.cpp @@ -0,0 +1,314 @@ +/* $Id: VBoxNetUDP.cpp $ */ +/** @file + * VBoxNetUDP - IntNet UDP Client Routines. + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DEFAULT +#include "VBoxNetLib.h" +#include <iprt/stream.h> +#include <iprt/string.h> +#include <iprt/rand.h> +#include <VBox/log.h> +#include <VBox/vmm/pdmnetinline.h> +#include <VBox/intnetinline.h> + + +/** + * Checks if the head of the receive ring is a UDP packet matching the given + * criteria. + * + * @returns Pointer to the data if it matches. + * @param pBuf The IntNet buffers. + * @param uDstPort The destination port to match. + * @param pDstMac The destination address to match if + * VBOXNETUDP_MATCH_UNICAST is specied. + * @param fFlags Flags indicating what to match and some debug stuff. + * See VBOXNETUDP_MATCH_*. + * @param pHdrs Where to return the pointers to the headers. + * Optional. + * @param pcb Where to return the size of the data on success. + */ +void *VBoxNetUDPMatch(PINTNETBUF pBuf, unsigned uDstPort, PCRTMAC pDstMac, uint32_t fFlags, PVBOXNETUDPHDRS pHdrs, size_t *pcb) +{ + /* + * Clear return values so we can return easier on mismatch. + */ + *pcb = 0; + if (pHdrs) + { + pHdrs->pEth = NULL; + pHdrs->pIpv4 = NULL; + pHdrs->pUdp = NULL; + } + + /* + * Valid IntNet Ethernet frame? + */ + PCINTNETHDR pHdr = IntNetRingGetNextFrameToRead(&pBuf->Recv); + if ( !pHdr + || ( pHdr->u8Type != INTNETHDR_TYPE_FRAME + && pHdr->u8Type != INTNETHDR_TYPE_GSO)) + return NULL; + + size_t cbFrame = pHdr->cbFrame; + const void *pvFrame = IntNetHdrGetFramePtr(pHdr, pBuf); + PCPDMNETWORKGSO pGso = NULL; + if (pHdr->u8Type == INTNETHDR_TYPE_GSO) + { + pGso = (PCPDMNETWORKGSO)pvFrame; + if (!PDMNetGsoIsValid(pGso, cbFrame, cbFrame - sizeof(*pGso))) + return NULL; + /** @todo IPv6 UDP support, goes for this entire function really. Not really + * important yet since this is currently only used by the DHCP server. */ + if (pGso->u8Type != PDMNETWORKGSOTYPE_IPV4_UDP) + return NULL; + pvFrame = pGso + 1; + cbFrame -= sizeof(*pGso); + } + + PCRTNETETHERHDR pEthHdr = (PCRTNETETHERHDR)pvFrame; + if (pHdrs) + pHdrs->pEth = pEthHdr; + +#ifdef IN_RING3 + /* Dump if to stderr/log if that's wanted. */ + if (fFlags & VBOXNETUDP_MATCH_PRINT_STDERR) + { + RTStrmPrintf(g_pStdErr, "frame: cb=%04x dst=%.6Rhxs src=%.6Rhxs type=%04x%s\n", + cbFrame, &pEthHdr->DstMac, &pEthHdr->SrcMac, RT_BE2H_U16(pEthHdr->EtherType), + !memcmp(&pEthHdr->DstMac, pDstMac, sizeof(*pDstMac)) ? " Mine!" : ""); + } +#endif + + /* + * Ethernet matching. + */ + + /* Ethernet min frame size. */ + if (cbFrame < 64) + return NULL; + + /* Match Ethertype: IPV4? */ + /** @todo VLAN tagging? */ + if (pEthHdr->EtherType != RT_H2BE_U16_C(RTNET_ETHERTYPE_IPV4)) + return NULL; + + /* Match destination address (ethernet) */ + if ( ( !(fFlags & VBOXNETUDP_MATCH_UNICAST) + || memcmp(&pEthHdr->DstMac, pDstMac, sizeof(pEthHdr->DstMac))) + && ( !(fFlags & VBOXNETUDP_MATCH_BROADCAST) + || pEthHdr->DstMac.au16[0] != 0xffff + || pEthHdr->DstMac.au16[1] != 0xffff + || pEthHdr->DstMac.au16[2] != 0xffff)) + return NULL; + + /* + * If we're working on a GSO frame, we need to make sure the length fields + * are set correctly (they are usually set to 0). + */ + if (pGso) + PDMNetGsoPrepForDirectUse(pGso, (void *)pvFrame, cbFrame, PDMNETCSUMTYPE_NONE); + + /* + * IP validation and matching. + */ + PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)(pEthHdr + 1); + if (pHdrs) + pHdrs->pIpv4 = pIpHdr; + + /* Protocol: UDP */ + if (pIpHdr->ip_p != RTNETIPV4_PROT_UDP) + return NULL; + + /* Valid IPv4 header? */ + size_t const offIpHdr = (uintptr_t)pIpHdr - (uintptr_t)pEthHdr; + if (!RTNetIPv4IsHdrValid(pIpHdr, cbFrame - offIpHdr, cbFrame - offIpHdr, !pGso /*fChecksum*/)) + return NULL; + + /* + * UDP matching and validation. + */ + PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint32_t *)pIpHdr + pIpHdr->ip_hl); + if (pHdrs) + pHdrs->pUdp = pUdpHdr; + + /* Destination port */ + if (RT_BE2H_U16(pUdpHdr->uh_dport) != uDstPort) + return NULL; + + if (!pGso) + { + /* Validate the UDP header according to flags. */ + size_t offUdpHdr = (uintptr_t)pUdpHdr - (uintptr_t)pEthHdr; + if (fFlags & (VBOXNETUDP_MATCH_CHECKSUM | VBOXNETUDP_MATCH_REQUIRE_CHECKSUM)) + { + if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbFrame - offUdpHdr, true /*fChecksum*/)) + return NULL; + if ( (fFlags & VBOXNETUDP_MATCH_REQUIRE_CHECKSUM) + && !pUdpHdr->uh_sum) + return NULL; + } + else + { + if (!RTNetIPv4IsUDPSizeValid(pIpHdr, pUdpHdr, cbFrame - offUdpHdr)) + return NULL; + } + } + + /* + * We've got a match! + */ + *pcb = RT_N2H_U16(pUdpHdr->uh_ulen) - sizeof(*pUdpHdr); + return (void *)(pUdpHdr + 1); +} + + +/** Internal worker for VBoxNetUDPUnicast and VBoxNetUDPBroadcast. */ +static int vboxnetudpSend(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf, + RTNETADDRIPV4 SrcIPv4Addr, PCRTMAC pSrcMacAddr, unsigned uSrcPort, + RTNETADDRIPV4 DstIPv4Addr, PCRTMAC pDstMacAddr, unsigned uDstPort, + void const *pvData, size_t cbData) +{ + INTNETSEG aSegs[4]; + + /* the Ethernet header */ + RTNETETHERHDR EtherHdr; + EtherHdr.DstMac = *pDstMacAddr; + EtherHdr.SrcMac = *pSrcMacAddr; + EtherHdr.EtherType = RT_H2BE_U16_C(RTNET_ETHERTYPE_IPV4); + + aSegs[0].pv = &EtherHdr; + aSegs[0].cb = sizeof(EtherHdr); + aSegs[0].Phys = NIL_RTHCPHYS; + + /* the IP header */ + RTNETIPV4 IpHdr; + unsigned cbIdHdr = RT_UOFFSETOF(RTNETIPV4, ip_options); + IpHdr.ip_v = 4; + IpHdr.ip_hl = cbIdHdr >> 2; + IpHdr.ip_tos = 0; + IpHdr.ip_len = RT_H2BE_U16((uint16_t)(cbData + sizeof(RTNETUDP) + cbIdHdr)); + IpHdr.ip_id = (uint16_t)RTRandU32(); + IpHdr.ip_off = 0; + IpHdr.ip_ttl = 255; + IpHdr.ip_p = RTNETIPV4_PROT_UDP; + IpHdr.ip_sum = 0; + IpHdr.ip_src = SrcIPv4Addr; + IpHdr.ip_dst = DstIPv4Addr; + IpHdr.ip_sum = RTNetIPv4HdrChecksum(&IpHdr); + + aSegs[1].pv = &IpHdr; + aSegs[1].cb = cbIdHdr; + aSegs[1].Phys = NIL_RTHCPHYS; + + + /* the UDP bit */ + RTNETUDP UdpHdr; + UdpHdr.uh_sport = RT_H2BE_U16(uSrcPort); + UdpHdr.uh_dport = RT_H2BE_U16(uDstPort); + UdpHdr.uh_ulen = RT_H2BE_U16((uint16_t)(cbData + sizeof(RTNETUDP))); +#if 0 + UdpHdr.uh_sum = 0; /* pretend checksumming is disabled */ +#else + UdpHdr.uh_sum = RTNetIPv4UDPChecksum(&IpHdr, &UdpHdr, pvData); +#endif + + aSegs[2].pv = &UdpHdr; + aSegs[2].cb = sizeof(UdpHdr); + aSegs[2].Phys = NIL_RTHCPHYS; + + /* the payload */ + aSegs[3].pv = (void *)pvData; + aSegs[3].cb = (uint32_t)cbData; + aSegs[3].Phys = NIL_RTHCPHYS; + + + /* send it */ + return VBoxNetIntIfSend(pSession, hIf, pBuf, RT_ELEMENTS(aSegs), &aSegs[0], true /* fFlush */); +} + + +/** + * Sends an unicast UDP packet. + * + * @returns VBox status code. + * @param pSession The support driver session handle. + * @param hIf The interface handle. + * @param pBuf The interface buffer. + * @param SrcIPv4Addr The source IPv4 address. + * @param pSrcMacAddr The source MAC address. + * @param uSrcPort The source port number. + * @param DstIPv4Addr The destination IPv4 address. Can be broadcast. + * @param pDstMacAddr The destination MAC address. + * @param uDstPort The destination port number. + * @param pvData The data payload. + * @param cbData The size of the data payload. + */ +int VBoxNetUDPUnicast(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf, + RTNETADDRIPV4 SrcIPv4Addr, PCRTMAC pSrcMacAddr, unsigned uSrcPort, + RTNETADDRIPV4 DstIPv4Addr, PCRTMAC pDstMacAddr, unsigned uDstPort, + void const *pvData, size_t cbData) +{ + return vboxnetudpSend(pSession, hIf, pBuf, + SrcIPv4Addr, pSrcMacAddr, uSrcPort, + DstIPv4Addr, pDstMacAddr, uDstPort, + pvData, cbData); +} + + +/** + * Sends a broadcast UDP packet. + * + * @returns VBox status code. + * @param pSession The support driver session handle. + * @param hIf The interface handle. + * @param pBuf The interface buffer. + * @param SrcIPv4Addr The source IPv4 address. + * @param pSrcMacAddr The source MAC address. + * @param uSrcPort The source port number. + * @param uDstPort The destination port number. + * @param pvData The data payload. + * @param cbData The size of the data payload. + */ +int VBoxNetUDPBroadcast(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf, + RTNETADDRIPV4 SrcIPv4Addr, PCRTMAC pSrcMacAddr, unsigned uSrcPort, + unsigned uDstPort, + void const *pvData, size_t cbData) +{ + RTNETADDRIPV4 IPv4AddrBrdCast; + IPv4AddrBrdCast.u = UINT32_C(0xffffffff); + RTMAC MacBrdCast; + MacBrdCast.au16[0] = MacBrdCast.au16[1] = MacBrdCast.au16[2] = UINT16_C(0xffff); + + return vboxnetudpSend(pSession, hIf, pBuf, + SrcIPv4Addr, pSrcMacAddr, uSrcPort, + IPv4AddrBrdCast, &MacBrdCast, uDstPort, + pvData, cbData); +} + diff --git a/src/VBox/NetworkServices/NetLib/VBoxPortForwardString.h b/src/VBox/NetworkServices/NetLib/VBoxPortForwardString.h new file mode 100644 index 00000000..57347a1c --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/VBoxPortForwardString.h @@ -0,0 +1,69 @@ +/* $Id: VBoxPortForwardString.h $ */ +/** @file + * VBoxPortForwardString + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NetLib_VBoxPortForwardString_h +#define VBOX_INCLUDED_SRC_NetLib_VBoxPortForwardString_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <iprt/net.h> +#include <VBox/intnet.h> + +RT_C_DECLS_BEGIN + +#define PF_NAMELEN 64 +/* + * TBD: Here is shared implementation of parsing port-forward string + * of format: + * name:[ipv4 or ipv6 address]:host-port:[ipv4 or ipv6 guest addr]:guest port + * + * This code supposed to be used in NetService and Frontend and perhaps in corresponding + * services. + * + * Note: ports are in host format. + */ + +typedef struct PORTFORWARDRULE +{ + char szPfrName[PF_NAMELEN]; + /* true if ipv6 and false otherwise */ + int fPfrIPv6; + /* IPPROTO_{UDP,TCP} */ + int iPfrProto; + char szPfrHostAddr[INET6_ADDRSTRLEN]; + uint16_t u16PfrHostPort; + char szPfrGuestAddr[INET6_ADDRSTRLEN]; + uint16_t u16PfrGuestPort; +} PORTFORWARDRULE, *PPORTFORWARDRULE; + +int netPfStrToPf(const char *pszStrPortForward, bool fIPv6, PPORTFORWARDRULE pPfr); + +RT_C_DECLS_END + +#endif /* !VBOX_INCLUDED_SRC_NetLib_VBoxPortForwardString_h */ + diff --git a/src/VBox/NetworkServices/NetLib/cpp/utils.h b/src/VBox/NetworkServices/NetLib/cpp/utils.h new file mode 100644 index 00000000..04e222ac --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/cpp/utils.h @@ -0,0 +1,57 @@ +/* $Id: utils.h $ */ +/** @file + * NetLib/cpp/utils.h + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NetLib_cpp_utils_h +#define VBOX_INCLUDED_SRC_NetLib_cpp_utils_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <iprt/types.h> + +/** less operator for IPv4 addresess */ +DECLINLINE(bool) operator <(const RTNETADDRIPV4 &lhs, const RTNETADDRIPV4 &rhs) +{ + return RT_N2H_U32(lhs.u) < RT_N2H_U32(rhs.u); +} + +/** greater operator for IPv4 addresess */ +DECLINLINE(bool) operator >(const RTNETADDRIPV4 &lhs, const RTNETADDRIPV4 &rhs) +{ + return RT_N2H_U32(lhs.u) > RT_N2H_U32(rhs.u); +} + +/** Compares MAC addresses */ +DECLINLINE(bool) operator== (const RTMAC &lhs, const RTMAC &rhs) +{ + return lhs.au16[0] == rhs.au16[0] + && lhs.au16[1] == rhs.au16[1] + && lhs.au16[2] == rhs.au16[2]; +} + +#endif /* !VBOX_INCLUDED_SRC_NetLib_cpp_utils_h */ + diff --git a/src/VBox/NetworkServices/NetLib/shared_ptr.h b/src/VBox/NetworkServices/NetLib/shared_ptr.h new file mode 100644 index 00000000..a7a488a7 --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/shared_ptr.h @@ -0,0 +1,112 @@ +/* $Id: shared_ptr.h $ */ +/** @file + * Simplified shared pointer. + */ + +/* + * Copyright (C) 2013-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_NetLib_shared_ptr_h +#define VBOX_INCLUDED_SRC_NetLib_shared_ptr_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#ifdef __cplusplus +template<typename T> +class SharedPtr +{ + struct imp + { + imp(T *pTrg = NULL, int cnt = 1): ptr(pTrg),refcnt(cnt){} + ~imp() { if (ptr) delete ptr;} + + T *ptr; + int refcnt; + }; + + + public: + SharedPtr(T *t = NULL):p(NULL) + { + p = new imp(t); + } + + ~SharedPtr() + { + p->refcnt--; + + if (p->refcnt == 0) + delete p; + } + + + SharedPtr(const SharedPtr& rhs) + { + p = rhs.p; + p->refcnt++; + } + + const SharedPtr& operator= (const SharedPtr& rhs) + { + if (p == rhs.p) return *this; + + p->refcnt--; + if (p->refcnt == 0) + delete p; + + p = rhs.p; + p->refcnt++; + + return *this; + } + + + T *get() const + { + return p->ptr; + } + + + T *operator->() + { + return p->ptr; + } + + + const T*operator->() const + { + return p->ptr; + } + + + int use_count() + { + return p->refcnt; + } + + private: + imp *p; +}; +#endif + +#endif /* !VBOX_INCLUDED_SRC_NetLib_shared_ptr_h */ |