summaryrefslogtreecommitdiffstats
path: root/src/VBox/NetworkServices
diff options
context:
space:
mode:
Diffstat (limited to 'src/VBox/NetworkServices')
-rw-r--r--src/VBox/NetworkServices/.scm-settings19
-rw-r--r--src/VBox/NetworkServices/DHCP/ClientDataInt.h70
-rw-r--r--src/VBox/NetworkServices/DHCP/Config.cpp1493
-rw-r--r--src/VBox/NetworkServices/DHCP/Config.h845
-rw-r--r--src/VBox/NetworkServices/DHCP/Makefile.kmk72
-rw-r--r--src/VBox/NetworkServices/DHCP/NetworkManagerDhcp.cpp189
-rw-r--r--src/VBox/NetworkServices/DHCP/README.customoptions23
-rw-r--r--src/VBox/NetworkServices/DHCP/VBoxNetDHCP.cpp885
-rw-r--r--src/VBox/NetworkServices/DHCP/VBoxNetDHCP.rc55
-rw-r--r--src/VBox/NetworkServices/DHCP/VBoxNetDHCPHardened.cpp25
-rw-r--r--src/VBox/NetworkServices/Dhcpd/ClientId.cpp122
-rw-r--r--src/VBox/NetworkServices/Dhcpd/ClientId.h70
-rw-r--r--src/VBox/NetworkServices/Dhcpd/Config.cpp949
-rw-r--r--src/VBox/NetworkServices/Dhcpd/Config.h110
-rw-r--r--src/VBox/NetworkServices/Dhcpd/DHCPD.cpp286
-rw-r--r--src/VBox/NetworkServices/Dhcpd/DHCPD.h65
-rw-r--r--src/VBox/NetworkServices/Dhcpd/Db.cpp747
-rw-r--r--src/VBox/NetworkServices/Dhcpd/Db.h162
-rw-r--r--src/VBox/NetworkServices/Dhcpd/Defs.h62
-rw-r--r--src/VBox/NetworkServices/Dhcpd/DhcpMessage.cpp412
-rw-r--r--src/VBox/NetworkServices/Dhcpd/DhcpMessage.h138
-rw-r--r--src/VBox/NetworkServices/Dhcpd/DhcpOptions.cpp242
-rw-r--r--src/VBox/NetworkServices/Dhcpd/DhcpOptions.h642
-rw-r--r--src/VBox/NetworkServices/Dhcpd/IPv4Pool.cpp138
-rw-r--r--src/VBox/NetworkServices/Dhcpd/IPv4Pool.h126
-rw-r--r--src/VBox/NetworkServices/Dhcpd/Makefile.kmk93
-rw-r--r--src/VBox/NetworkServices/Dhcpd/TimeStamp.cpp36
-rw-r--r--src/VBox/NetworkServices/Dhcpd/TimeStamp.h101
-rw-r--r--src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpd.cpp805
-rw-r--r--src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpd.rc55
-rw-r--r--src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpdHardened.cpp25
-rw-r--r--src/VBox/NetworkServices/Dhcpd/lwipopts.h181
-rw-r--r--src/VBox/NetworkServices/Makefile.kmk35
-rw-r--r--src/VBox/NetworkServices/NAT/Makefile.kmk151
-rw-r--r--src/VBox/NetworkServices/NAT/RTWinPoll.cpp154
-rw-r--r--src/VBox/NetworkServices/NAT/RTWinSocketPair.cpp225
-rw-r--r--src/VBox/NetworkServices/NAT/VBoxNetLwipNAT.cpp1504
-rw-r--r--src/VBox/NetworkServices/NAT/VBoxNetNAT.rc55
-rw-r--r--src/VBox/NetworkServices/NAT/VBoxNetNATHardened.cpp27
-rw-r--r--src/VBox/NetworkServices/NAT/dhcp6.h51
-rw-r--r--src/VBox/NetworkServices/NAT/fwtcp.c316
-rw-r--r--src/VBox/NetworkServices/NAT/fwudp.c545
-rw-r--r--src/VBox/NetworkServices/NAT/getrawsock.c155
-rw-r--r--src/VBox/NetworkServices/NAT/lwipopts.h205
-rw-r--r--src/VBox/NetworkServices/NAT/mkrawsock.c339
-rw-r--r--src/VBox/NetworkServices/NAT/portfwd.c263
-rw-r--r--src/VBox/NetworkServices/NAT/portfwd.h74
-rw-r--r--src/VBox/NetworkServices/NAT/proxy.c705
-rw-r--r--src/VBox/NetworkServices/NAT/proxy.h121
-rw-r--r--src/VBox/NetworkServices/NAT/proxy_dhcp6ds.c317
-rw-r--r--src/VBox/NetworkServices/NAT/proxy_pollmgr.c828
-rw-r--r--src/VBox/NetworkServices/NAT/proxy_pollmgr.h85
-rw-r--r--src/VBox/NetworkServices/NAT/proxy_rtadvd.c417
-rw-r--r--src/VBox/NetworkServices/NAT/proxy_tftpd.c981
-rw-r--r--src/VBox/NetworkServices/NAT/pxdns.c932
-rw-r--r--src/VBox/NetworkServices/NAT/pxping.c2001
-rw-r--r--src/VBox/NetworkServices/NAT/pxping_win.c662
-rw-r--r--src/VBox/NetworkServices/NAT/pxremap.c323
-rw-r--r--src/VBox/NetworkServices/NAT/pxremap.h52
-rw-r--r--src/VBox/NetworkServices/NAT/pxtcp.c2506
-rw-r--r--src/VBox/NetworkServices/NAT/pxtcp.h42
-rw-r--r--src/VBox/NetworkServices/NAT/pxudp.c848
-rw-r--r--src/VBox/NetworkServices/NAT/rtmon_bsd.c116
-rw-r--r--src/VBox/NetworkServices/NAT/rtmon_linux.c249
-rw-r--r--src/VBox/NetworkServices/NAT/rtmon_win.c21
-rw-r--r--src/VBox/NetworkServices/NAT/tftp.h49
-rw-r--r--src/VBox/NetworkServices/NAT/winpoll.h53
-rw-r--r--src/VBox/NetworkServices/NAT/winutils.h210
-rw-r--r--src/VBox/NetworkServices/NetLib/ComHostUtils.cpp230
-rw-r--r--src/VBox/NetworkServices/NetLib/Makefile.kup0
-rw-r--r--src/VBox/NetworkServices/NetLib/VBoxNetARP.cpp155
-rw-r--r--src/VBox/NetworkServices/NetLib/VBoxNetBaseService.cpp849
-rw-r--r--src/VBox/NetworkServices/NetLib/VBoxNetBaseService.h148
-rw-r--r--src/VBox/NetworkServices/NetLib/VBoxNetIntIf.cpp140
-rw-r--r--src/VBox/NetworkServices/NetLib/VBoxNetLib.h72
-rw-r--r--src/VBox/NetworkServices/NetLib/VBoxNetPortForwardString.cpp372
-rw-r--r--src/VBox/NetworkServices/NetLib/VBoxNetUDP.cpp304
-rw-r--r--src/VBox/NetworkServices/NetLib/VBoxPortForwardString.h59
-rw-r--r--src/VBox/NetworkServices/NetLib/cpp/utils.h47
-rw-r--r--src/VBox/NetworkServices/NetLib/shared_ptr.h102
-rw-r--r--src/VBox/NetworkServices/NetLib/utils.h142
81 files changed, 27480 insertions, 0 deletions
diff --git a/src/VBox/NetworkServices/.scm-settings b/src/VBox/NetworkServices/.scm-settings
new file mode 100644
index 00000000..41767eea
--- /dev/null
+++ b/src/VBox/NetworkServices/.scm-settings
@@ -0,0 +1,19 @@
+# $Id: .scm-settings $
+## @file
+# Source code massager settings for the network services.
+#
+
+#
+# Copyright (C) 2019 Oracle Corporation
+#
+# This file is part of VirtualBox Open Source Edition (OSE), as
+# available from http://www.virtualbox.org. This file is free software;
+# you can redistribute it and/or modify it under the terms of the GNU
+# General Public License (GPL) as published by the Free Software
+# Foundation, in version 2 as it comes in the "COPYING" file of the
+# VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+#
+
+/*.h: --guard-relative-to-dir .
+
diff --git a/src/VBox/NetworkServices/DHCP/ClientDataInt.h b/src/VBox/NetworkServices/DHCP/ClientDataInt.h
new file mode 100644
index 00000000..74879523
--- /dev/null
+++ b/src/VBox/NetworkServices/DHCP/ClientDataInt.h
@@ -0,0 +1,70 @@
+/* $Id: ClientDataInt.h $ */
+/** @file
+ * Config.h
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_DHCP_ClientDataInt_h
+#define VBOX_INCLUDED_SRC_DHCP_ClientDataInt_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+class ClientData
+{
+public:
+ ClientData()
+ {
+ m_address.u = 0;
+ m_network.u = 0;
+ fHasLease = false;
+ fHasClient = false;
+ fBinding = true;
+ u64TimestampBindingStarted = 0;
+ u64TimestampLeasingStarted = 0;
+ u32LeaseExpirationPeriod = 0;
+ u32BindExpirationPeriod = 0;
+ pCfg = NULL;
+
+ }
+ ~ClientData(){}
+
+ /* client information */
+ RTNETADDRIPV4 m_address;
+ RTNETADDRIPV4 m_network;
+ RTMAC m_mac;
+
+ bool fHasClient;
+
+ /* Lease part */
+ bool fHasLease;
+ /** lease isn't commited */
+ bool fBinding;
+
+ /** Timestamp when lease commited. */
+ uint64_t u64TimestampLeasingStarted;
+ /** Period when lease is expired in secs. */
+ uint32_t u32LeaseExpirationPeriod;
+
+ /** timestamp when lease was bound */
+ uint64_t u64TimestampBindingStarted;
+ /* Period when binding is expired in secs. */
+ uint32_t u32BindExpirationPeriod;
+
+ MapOptionId2RawOption options;
+
+ NetworkConfigEntity *pCfg;
+};
+
+#endif /* !VBOX_INCLUDED_SRC_DHCP_ClientDataInt_h */
diff --git a/src/VBox/NetworkServices/DHCP/Config.cpp b/src/VBox/NetworkServices/DHCP/Config.cpp
new file mode 100644
index 00000000..b1d09c78
--- /dev/null
+++ b/src/VBox/NetworkServices/DHCP/Config.cpp
@@ -0,0 +1,1493 @@
+/* $Id: Config.cpp $ */
+/** @file
+ * Configuration for DHCP.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/**
+ * XXX: license.
+ */
+
+#include <iprt/asm.h>
+#include <iprt/getopt.h>
+#include <iprt/net.h>
+#include <iprt/time.h>
+
+#include <VBox/sup.h>
+#include <VBox/intnet.h>
+#include <VBox/intnetinline.h>
+#include <VBox/vmm/vmm.h>
+#include <VBox/version.h>
+
+#include <VBox/com/array.h>
+#include <VBox/com/string.h>
+
+#include <iprt/cpp/xml.h>
+
+#define BASE_SERVICES_ONLY
+#include "../NetLib/VBoxNetBaseService.h"
+#include "../NetLib/VBoxNetLib.h"
+#include "../NetLib/shared_ptr.h"
+
+#include <list>
+#include <vector>
+#include <map>
+#include <string>
+
+#include "Config.h"
+#include "ClientDataInt.h"
+
+bool operator== (const Lease& lhs, const Lease& rhs)
+{
+ return (lhs.m.get() == rhs.m.get());
+}
+
+
+bool operator!= (const Lease& lhs, const Lease& rhs)
+{
+ return !(lhs == rhs);
+}
+
+
+bool operator< (const Lease& lhs, const Lease& rhs)
+{
+ return ( (lhs.getAddress() < rhs.getAddress())
+ || (lhs.issued() < rhs.issued()));
+}
+/* consts */
+
+const NullConfigEntity *g_NullConfig = new NullConfigEntity();
+RootConfigEntity *g_RootConfig = new RootConfigEntity(std::string("ROOT"), 1200 /* 20 min. */);
+const ClientMatchCriteria *g_AnyClient = new AnyClientMatchCriteria();
+
+static ConfigurationManager *g_ConfigurationManager = ConfigurationManager::getConfigurationManager();
+
+NetworkManager *NetworkManager::g_NetworkManager;
+
+bool MACClientMatchCriteria::check(const Client& client) const
+{
+ return (client == m_mac);
+}
+
+
+int BaseConfigEntity::match(Client& client, BaseConfigEntity **cfg)
+{
+ int iMatch = (m_criteria && m_criteria->check(client) ? m_MatchLevel : 0);
+ if (m_children.empty())
+ {
+ if (iMatch > 0)
+ {
+ *cfg = this;
+ return iMatch;
+ }
+ }
+ else
+ {
+ *cfg = this;
+ /* XXX: hack */
+ BaseConfigEntity *matching = this;
+ int matchingLevel = m_MatchLevel;
+
+ for (std::vector<BaseConfigEntity *>::iterator it = m_children.begin();
+ it != m_children.end();
+ ++it)
+ {
+ iMatch = (*it)->match(client, &matching);
+ if (iMatch > matchingLevel)
+ {
+ *cfg = matching;
+ matchingLevel = iMatch;
+ }
+ }
+ return matchingLevel;
+ }
+ return iMatch;
+}
+
+/* Client */
+/* Configs
+ NetworkConfigEntity(std::string name,
+ ConfigEntity* pCfg,
+ ClientMatchCriteria* criteria,
+ RTNETADDRIPV4& networkID,
+ RTNETADDRIPV4& networkMask)
+*/
+static const RTNETADDRIPV4 g_AnyIpv4 = {0};
+static const RTNETADDRIPV4 g_AllIpv4 = {0xffffffff};
+RootConfigEntity::RootConfigEntity(std::string name, uint32_t expPeriod):
+ NetworkConfigEntity(name, g_NullConfig, g_AnyClient, g_AnyIpv4, g_AllIpv4)
+{
+ m_MatchLevel = 2;
+ m_u32ExpirationPeriod = expPeriod;
+}
+
+/* Configuration Manager */
+struct ConfigurationManager::Data
+{
+ Data():fFileExists(false){}
+
+ MapLease2Ip4Address m_allocations;
+ Ipv4AddressContainer m_nameservers;
+ Ipv4AddressContainer m_routers;
+
+ std::string m_domainName;
+ VecClient m_clients;
+ com::Utf8Str m_leaseStorageFilename;
+ bool fFileExists;
+};
+
+ConfigurationManager *ConfigurationManager::getConfigurationManager()
+{
+ if (!g_ConfigurationManager)
+
+
+ {
+ g_ConfigurationManager = new ConfigurationManager();
+ g_ConfigurationManager->init();
+ }
+
+ return g_ConfigurationManager;
+}
+
+
+const std::string tagXMLLeases = "Leases";
+const std::string tagXMLLeasesAttributeVersion = "version";
+const std::string tagXMLLeasesVersion_1_0 = "1.0";
+const std::string tagXMLLease = "Lease";
+const std::string tagXMLLeaseAttributeMac = "mac";
+const std::string tagXMLLeaseAttributeNetwork = "network";
+const std::string tagXMLLeaseAddress = "Address";
+const std::string tagXMLAddressAttributeValue = "value";
+const std::string tagXMLLeaseTime = "Time";
+const std::string tagXMLTimeAttributeIssued = "issued";
+const std::string tagXMLTimeAttributeExpiration = "expiration";
+const std::string tagXMLLeaseOptions = "Options";
+
+/**
+ * @verbatim
+ <Leases version="1.0">
+ <Lease mac="" network=""/>
+ <Address value=""/>
+ <Time issued="" expiration=""/>
+ <options>
+ <option name="" type=""/>
+ </option>
+ </options>
+ </Lease>
+ </Leases>
+ @endverbatim
+ */
+int ConfigurationManager::loadFromFile(const com::Utf8Str& leaseStorageFileName)
+{
+ m->m_leaseStorageFilename = leaseStorageFileName;
+
+ xml::XmlFileParser parser;
+ xml::Document doc;
+
+ try {
+ parser.read(m->m_leaseStorageFilename.c_str(), doc);
+ }
+ catch (...)
+ {
+ return VINF_SUCCESS;
+ }
+
+ /* XML parsing */
+ xml::ElementNode *root = doc.getRootElement();
+
+ if (!root || !root->nameEquals(tagXMLLeases.c_str()))
+ {
+ m->fFileExists = false;
+ return VERR_NOT_FOUND;
+ }
+
+ com::Utf8Str version;
+ if (root)
+ root->getAttributeValue(tagXMLLeasesAttributeVersion.c_str(), version);
+
+ /* XXX: version check */
+ xml::NodesLoop leases(*root);
+
+ const xml::ElementNode *lease;
+ while ((lease = leases.forAllNodes()))
+ {
+ if (!lease->nameEquals(tagXMLLease.c_str()))
+ continue;
+
+ ClientData *data = new ClientData();
+ Lease l(data);
+ if (l.fromXML(lease))
+ {
+
+ m->m_allocations.insert(MapLease2Ip4AddressPair(l, l.getAddress()));
+
+
+ NetworkConfigEntity *pNetCfg = NULL;
+ Client c(data);
+ int rc = g_RootConfig->match(c, (BaseConfigEntity **)&pNetCfg);
+ Assert(rc >= 0 && pNetCfg); RT_NOREF(rc);
+
+ l.setConfig(pNetCfg);
+
+ m->m_clients.push_back(c);
+ }
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+int ConfigurationManager::saveToFile()
+{
+ if (m->m_leaseStorageFilename.isEmpty())
+ return VINF_SUCCESS;
+
+ xml::Document doc;
+
+ xml::ElementNode *root = doc.createRootElement(tagXMLLeases.c_str());
+ if (!root)
+ return VERR_INTERNAL_ERROR;
+
+ root->setAttribute(tagXMLLeasesAttributeVersion.c_str(), tagXMLLeasesVersion_1_0.c_str());
+
+ for(MapLease2Ip4AddressConstIterator it = m->m_allocations.begin();
+ it != m->m_allocations.end(); ++it)
+ {
+ xml::ElementNode *lease = root->createChild(tagXMLLease.c_str());
+ if (!it->first.toXML(lease))
+ {
+ /* XXX: todo logging + error handling */
+ }
+ }
+
+ try {
+ xml::XmlFileWriter writer(doc);
+ writer.write(m->m_leaseStorageFilename.c_str(), true);
+ } catch(...){}
+
+ return VINF_SUCCESS;
+}
+
+
+int ConfigurationManager::extractRequestList(PCRTNETBOOTP pDhcpMsg, size_t cbDhcpMsg, RawOption& rawOpt)
+{
+ return ConfigurationManager::findOption(RTNET_DHCP_OPT_PARAM_REQ_LIST, pDhcpMsg, cbDhcpMsg, rawOpt);
+}
+
+
+Client ConfigurationManager::getClientByDhcpPacket(const RTNETBOOTP *pDhcpMsg, size_t cbDhcpMsg)
+{
+
+ VecClientIterator it;
+ bool fDhcpValid = false;
+ uint8_t uMsgType = 0;
+
+ fDhcpValid = RTNetIPv4IsDHCPValid(NULL, pDhcpMsg, cbDhcpMsg, &uMsgType);
+ AssertReturn(fDhcpValid, Client::NullClient);
+
+ LogFlowFunc(("dhcp:mac:%RTmac\n", &pDhcpMsg->bp_chaddr.Mac));
+ /* 1st. client IDs */
+ for ( it = m->m_clients.begin();
+ it != m->m_clients.end();
+ ++it)
+ {
+ if ((*it) == pDhcpMsg->bp_chaddr.Mac)
+ {
+ LogFlowFunc(("client:mac:%RTmac\n", it->getMacAddress()));
+ /* check timestamp that request wasn't expired. */
+ return (*it);
+ }
+ }
+
+ if (it == m->m_clients.end())
+ {
+ /* We hasn't got any session for this client */
+ Client c;
+ c.initWithMac(pDhcpMsg->bp_chaddr.Mac);
+ m->m_clients.push_back(c);
+ return m->m_clients.back();
+ }
+
+ return Client::NullClient;
+}
+
+/**
+ * Finds an option.
+ *
+ * @returns On success, a pointer to the first byte in the option data (no none
+ * then it'll be the byte following the 0 size field) and *pcbOpt set
+ * to the option length.
+ * On failure, NULL is returned and *pcbOpt unchanged.
+ *
+ * @param uOption The option to search for.
+ * @param pDhcpMsg The DHCP message.
+ * that this is adjusted if the option length is larger
+ * than the message buffer.
+ * @param cbDhcpMsg Size of the DHCP message.
+ * @param opt The actual option we found.
+ */
+int
+ConfigurationManager::findOption(uint8_t uOption, PCRTNETBOOTP pDhcpMsg, size_t cbDhcpMsg, RawOption& opt)
+{
+ Assert(uOption != RTNET_DHCP_OPT_PAD);
+ Assert(uOption != RTNET_DHCP_OPT_END);
+
+ /*
+ * Validate the DHCP bits and figure the max size of the options in the vendor field.
+ */
+ if (cbDhcpMsg <= RT_UOFFSETOF(RTNETBOOTP, bp_vend.Dhcp.dhcp_opts))
+ return VERR_INVALID_PARAMETER;
+
+ if (pDhcpMsg->bp_vend.Dhcp.dhcp_cookie != RT_H2N_U32_C(RTNET_DHCP_COOKIE))
+ return VERR_INVALID_PARAMETER;
+
+ size_t cbLeft = cbDhcpMsg - RT_UOFFSETOF(RTNETBOOTP, bp_vend.Dhcp.dhcp_opts);
+ if (cbLeft > RTNET_DHCP_OPT_SIZE)
+ cbLeft = RTNET_DHCP_OPT_SIZE;
+
+ /*
+ * Search the vendor field.
+ */
+ uint8_t const *pb = &pDhcpMsg->bp_vend.Dhcp.dhcp_opts[0];
+ while (pb && cbLeft > 0)
+ {
+ uint8_t uCur = *pb;
+ if (uCur == RTNET_DHCP_OPT_PAD)
+ {
+ cbLeft--;
+ pb++;
+ }
+ else if (uCur == RTNET_DHCP_OPT_END)
+ break;
+ else if (cbLeft <= 1)
+ break;
+ else
+ {
+ uint8_t cbCur = pb[1];
+ if (cbCur > cbLeft - 2)
+ cbCur = (uint8_t)(cbLeft - 2);
+ if (uCur == uOption)
+ {
+ opt.u8OptId = uCur;
+ memcpy(opt.au8RawOpt, pb+2, cbCur);
+ opt.cbRawOpt = cbCur;
+ return VINF_SUCCESS;
+ }
+ pb += cbCur + 2;
+ cbLeft -= cbCur + 2;
+ }
+ }
+
+ /** @todo search extended dhcp option field(s) when present */
+
+ return VERR_NOT_FOUND;
+}
+
+
+/**
+ * We bind lease for client till it continue with it on DHCPREQUEST.
+ */
+Lease ConfigurationManager::allocateLease4Client(const Client& client, PCRTNETBOOTP pDhcpMsg, size_t cbDhcpMsg)
+{
+ {
+ /**
+ * This mean that client has already bound or commited lease.
+ * If we've it happens it means that we received DHCPDISCOVER twice.
+ */
+ const Lease l = client.lease();
+ if (l != Lease::NullLease)
+ {
+ /* Here we should take lease from the m_allocation which was feed with leases
+ * on start
+ */
+ if (l.isExpired())
+ {
+ expireLease4Client(const_cast<Client&>(client));
+ if (!l.isExpired())
+ return l;
+ }
+ else
+ {
+ AssertReturn(l.getAddress().u != 0, Lease::NullLease);
+ return l;
+ }
+ }
+ }
+
+ RTNETADDRIPV4 hintAddress;
+ RawOption opt;
+ NetworkConfigEntity *pNetCfg;
+
+ Client cl(client);
+ AssertReturn(g_RootConfig->match(cl, (BaseConfigEntity **)&pNetCfg) > 0, Lease::NullLease);
+
+ /* DHCPDISCOVER MAY contain request address */
+ hintAddress.u = 0;
+ int rc = findOption(RTNET_DHCP_OPT_REQ_ADDR, pDhcpMsg, cbDhcpMsg, opt);
+ if (RT_SUCCESS(rc))
+ {
+ hintAddress.u = *(uint32_t *)opt.au8RawOpt;
+ if ( RT_H2N_U32(hintAddress.u) < RT_H2N_U32(pNetCfg->lowerIp().u)
+ || RT_H2N_U32(hintAddress.u) > RT_H2N_U32(pNetCfg->upperIp().u))
+ hintAddress.u = 0; /* clear hint */
+ }
+
+ if ( hintAddress.u
+ && !isAddressTaken(hintAddress))
+ {
+ Lease l(cl);
+ l.setConfig(pNetCfg);
+ l.setAddress(hintAddress);
+ m->m_allocations.insert(MapLease2Ip4AddressPair(l, hintAddress));
+ return l;
+ }
+
+ uint32_t u32 = 0;
+ for(u32 = RT_H2N_U32(pNetCfg->lowerIp().u);
+ u32 <= RT_H2N_U32(pNetCfg->upperIp().u);
+ ++u32)
+ {
+ RTNETADDRIPV4 address;
+ address.u = RT_H2N_U32(u32);
+ if (!isAddressTaken(address))
+ {
+ Lease l(cl);
+ l.setConfig(pNetCfg);
+ l.setAddress(address);
+ m->m_allocations.insert(MapLease2Ip4AddressPair(l, address));
+ return l;
+ }
+ }
+
+ return Lease::NullLease;
+}
+
+
+int ConfigurationManager::commitLease4Client(Client& client)
+{
+ Lease l = client.lease();
+ AssertReturn(l != Lease::NullLease, VERR_INTERNAL_ERROR);
+
+ l.bindingPhase(false);
+ const NetworkConfigEntity *pCfg = l.getConfig();
+
+ AssertPtr(pCfg);
+ l.setExpiration(pCfg->expirationPeriod());
+ l.phaseStart(RTTimeMilliTS());
+
+ saveToFile();
+
+ return VINF_SUCCESS;
+}
+
+
+int ConfigurationManager::expireLease4Client(Client& client)
+{
+ Lease l = client.lease();
+ AssertReturn(l != Lease::NullLease, VERR_INTERNAL_ERROR);
+
+ if (l.isInBindingPhase())
+ {
+
+ MapLease2Ip4AddressIterator it = m->m_allocations.find(l);
+ AssertReturn(it != m->m_allocations.end(), VERR_NOT_FOUND);
+
+ /*
+ * XXX: perhaps it better to keep this allocation ????
+ */
+ m->m_allocations.erase(it);
+
+ l.expire();
+ return VINF_SUCCESS;
+ }
+
+ l = Lease(client); /* re-new */
+ return VINF_SUCCESS;
+}
+
+
+bool ConfigurationManager::isAddressTaken(const RTNETADDRIPV4& addr, Lease& lease)
+{
+ MapLease2Ip4AddressIterator it;
+
+ for (it = m->m_allocations.begin();
+ it != m->m_allocations.end();
+ ++it)
+ {
+ if (it->second.u == addr.u)
+ {
+ if (lease != Lease::NullLease)
+ lease = it->first;
+
+ return true;
+ }
+ }
+ lease = Lease::NullLease;
+ return false;
+}
+
+
+bool ConfigurationManager::isAddressTaken(const RTNETADDRIPV4& addr)
+{
+ Lease ignore;
+ return isAddressTaken(addr, ignore);
+}
+
+
+NetworkConfigEntity *ConfigurationManager::addNetwork(NetworkConfigEntity *,
+ const RTNETADDRIPV4& networkId,
+ const RTNETADDRIPV4& netmask,
+ RTNETADDRIPV4& LowerAddress,
+ RTNETADDRIPV4& UpperAddress)
+{
+ static int id;
+ char name[64];
+
+ RTStrPrintf(name, RT_ELEMENTS(name), "network-%d", id);
+ std::string strname(name);
+ id++;
+
+
+ if (!LowerAddress.u)
+ LowerAddress = networkId;
+
+ if (!UpperAddress.u)
+ UpperAddress.u = networkId.u | (~netmask.u);
+
+ return new NetworkConfigEntity(strname,
+ g_RootConfig,
+ g_AnyClient,
+ 5,
+ networkId,
+ netmask,
+ LowerAddress,
+ UpperAddress);
+}
+
+HostConfigEntity *ConfigurationManager::addHost(NetworkConfigEntity* pCfg,
+ const RTNETADDRIPV4& address,
+ ClientMatchCriteria *criteria)
+{
+ static int id;
+ char name[64];
+
+ RTStrPrintf(name, RT_ELEMENTS(name), "host-%d", id);
+ std::string strname(name);
+ id++;
+
+ return new HostConfigEntity(address, strname, pCfg, criteria);
+}
+
+int ConfigurationManager::addToAddressList(uint8_t u8OptId, RTNETADDRIPV4& address)
+{
+ switch(u8OptId)
+ {
+ case RTNET_DHCP_OPT_DNS:
+ m->m_nameservers.push_back(address);
+ break;
+ case RTNET_DHCP_OPT_ROUTERS:
+ m->m_routers.push_back(address);
+ break;
+ default:
+ Log(("dhcp-opt: list (%d) unsupported\n", u8OptId));
+ }
+ return VINF_SUCCESS;
+}
+
+
+int ConfigurationManager::flushAddressList(uint8_t u8OptId)
+{
+ switch(u8OptId)
+ {
+ case RTNET_DHCP_OPT_DNS:
+ m->m_nameservers.clear();
+ break;
+ case RTNET_DHCP_OPT_ROUTERS:
+ m->m_routers.clear();
+ break;
+ default:
+ Log(("dhcp-opt: list (%d) unsupported\n", u8OptId));
+ }
+ return VINF_SUCCESS;
+}
+
+
+const Ipv4AddressContainer& ConfigurationManager::getAddressList(uint8_t u8OptId)
+{
+ switch(u8OptId)
+ {
+ case RTNET_DHCP_OPT_DNS:
+ return m->m_nameservers;
+
+ case RTNET_DHCP_OPT_ROUTERS:
+ return m->m_routers;
+
+ }
+ /* XXX: Grrr !!! */
+ return m_empty;
+}
+
+
+int ConfigurationManager::setString(uint8_t u8OptId, const std::string& str)
+{
+ switch (u8OptId)
+ {
+ case RTNET_DHCP_OPT_DOMAIN_NAME:
+ m->m_domainName = str;
+ break;
+ default:
+ break;
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+const std::string &ConfigurationManager::getString(uint8_t u8OptId)
+{
+ switch (u8OptId)
+ {
+ case RTNET_DHCP_OPT_DOMAIN_NAME:
+ if (m->m_domainName.length())
+ return m->m_domainName;
+ return m_noString;
+ default:
+ break;
+ }
+
+ return m_noString;
+}
+
+
+void ConfigurationManager::init()
+{
+ m = new ConfigurationManager::Data();
+}
+
+
+ConfigurationManager::~ConfigurationManager() { if (m) delete m; }
+
+/**
+ * Network manager
+ */
+struct NetworkManager::Data
+{
+ Data()
+ {
+ RT_ZERO(BootPReplyMsg);
+ cbBooPReplyMsg = 0;
+
+ m_OurAddress.u = 0;
+ m_OurNetmask.u = 0;
+ RT_ZERO(m_OurMac);
+ }
+
+ union {
+ RTNETBOOTP BootPHeader;
+ uint8_t au8Storage[1024];
+ } BootPReplyMsg;
+ int cbBooPReplyMsg;
+
+ RTNETADDRIPV4 m_OurAddress;
+ RTNETADDRIPV4 m_OurNetmask;
+ RTMAC m_OurMac;
+
+ ComPtr<IDHCPServer> m_DhcpServer;
+ const VBoxNetHlpUDPService *m_service;
+};
+
+
+NetworkManager::NetworkManager():m(NULL)
+{
+ m = new NetworkManager::Data();
+}
+
+
+NetworkManager::~NetworkManager()
+{
+ delete m;
+ m = NULL;
+}
+
+
+NetworkManager *NetworkManager::getNetworkManager(ComPtr<IDHCPServer> aDhcpServer)
+{
+ if (!g_NetworkManager)
+ {
+ g_NetworkManager = new NetworkManager();
+ g_NetworkManager->m->m_DhcpServer = aDhcpServer;
+ }
+
+ return g_NetworkManager;
+}
+
+
+const RTNETADDRIPV4& NetworkManager::getOurAddress() const
+{
+ return m->m_OurAddress;
+}
+
+
+const RTNETADDRIPV4& NetworkManager::getOurNetmask() const
+{
+ return m->m_OurNetmask;
+}
+
+
+const RTMAC& NetworkManager::getOurMac() const
+{
+ return m->m_OurMac;
+}
+
+
+void NetworkManager::setOurAddress(const RTNETADDRIPV4& aAddress)
+{
+ m->m_OurAddress = aAddress;
+}
+
+
+void NetworkManager::setOurNetmask(const RTNETADDRIPV4& aNetmask)
+{
+ m->m_OurNetmask = aNetmask;
+}
+
+
+void NetworkManager::setOurMac(const RTMAC& aMac)
+{
+ m->m_OurMac = aMac;
+}
+
+
+void NetworkManager::setService(const VBoxNetHlpUDPService *srv)
+{
+ m->m_service = srv;
+}
+
+/**
+ * Network manager creates DHCPOFFER datagramm
+ */
+int NetworkManager::offer4Client(const Client& client, uint32_t u32Xid,
+ uint8_t *pu8ReqList, int cReqList)
+{
+ Lease l(client); /* XXX: oh, it looks badly, but now we have lease */
+ prepareReplyPacket4Client(client, u32Xid);
+
+ RTNETADDRIPV4 address = l.getAddress();
+ m->BootPReplyMsg.BootPHeader.bp_yiaddr = address;
+
+ /* Ubuntu ???*/
+ m->BootPReplyMsg.BootPHeader.bp_ciaddr = address;
+
+ /* options:
+ * - IP lease time
+ * - message type
+ * - server identifier
+ */
+ RawOption opt;
+ RT_ZERO(opt);
+
+ std::vector<RawOption> extra;
+ opt.u8OptId = RTNET_DHCP_OPT_MSG_TYPE;
+ opt.au8RawOpt[0] = RTNET_DHCP_MT_OFFER;
+ opt.cbRawOpt = 1;
+ extra.push_back(opt);
+
+ opt.u8OptId = RTNET_DHCP_OPT_LEASE_TIME;
+
+ const NetworkConfigEntity *pCfg = l.getConfig();
+ AssertPtr(pCfg);
+
+ *(uint32_t *)opt.au8RawOpt = RT_H2N_U32(pCfg->expirationPeriod());
+ opt.cbRawOpt = sizeof(RTNETADDRIPV4);
+
+ extra.push_back(opt);
+
+ processParameterReqList(client, pu8ReqList, cReqList, extra);
+
+ return doReply(client, extra);
+}
+
+/**
+ * Network manager creates DHCPACK
+ */
+int NetworkManager::ack(const Client& client, uint32_t u32Xid,
+ uint8_t *pu8ReqList, int cReqList)
+{
+ RTNETADDRIPV4 address;
+
+ prepareReplyPacket4Client(client, u32Xid);
+
+ Lease l = client.lease();
+ address = l.getAddress();
+ m->BootPReplyMsg.BootPHeader.bp_ciaddr = address;
+
+
+ /* rfc2131 4.3.1 is about DHCPDISCOVER and this value is equal to ciaddr from
+ * DHCPREQUEST or 0 ...
+ * XXX: Using addressHint is not correct way to initialize [cy]iaddress...
+ */
+ m->BootPReplyMsg.BootPHeader.bp_ciaddr = address;
+ m->BootPReplyMsg.BootPHeader.bp_yiaddr = address;
+
+ Assert(m->BootPReplyMsg.BootPHeader.bp_yiaddr.u);
+
+ /* options:
+ * - IP address lease time (if DHCPREQUEST)
+ * - message type
+ * - server identifier
+ */
+ RawOption opt;
+ RT_ZERO(opt);
+
+ std::vector<RawOption> extra;
+ opt.u8OptId = RTNET_DHCP_OPT_MSG_TYPE;
+ opt.au8RawOpt[0] = RTNET_DHCP_MT_ACK;
+ opt.cbRawOpt = 1;
+ extra.push_back(opt);
+
+ /*
+ * XXX: lease time should be conditional. If on dhcprequest then tim should be provided,
+ * else on dhcpinform it mustn't.
+ */
+ opt.u8OptId = RTNET_DHCP_OPT_LEASE_TIME;
+ *(uint32_t *)opt.au8RawOpt = RT_H2N_U32(l.getExpiration());
+ opt.cbRawOpt = sizeof(RTNETADDRIPV4);
+ extra.push_back(opt);
+
+ processParameterReqList(client, pu8ReqList, cReqList, extra);
+
+ return doReply(client, extra);
+}
+
+/**
+ * Network manager creates DHCPNAK
+ */
+int NetworkManager::nak(const Client& client, uint32_t u32Xid)
+{
+
+ Lease l = client.lease();
+ if (l == Lease::NullLease)
+ return VERR_INTERNAL_ERROR;
+
+ prepareReplyPacket4Client(client, u32Xid);
+
+ /* this field filed in prepareReplyPacket4Session, and
+ * RFC 2131 require to have it zero fo NAK.
+ */
+ m->BootPReplyMsg.BootPHeader.bp_yiaddr.u = 0;
+
+ /* options:
+ * - message type (if DHCPREQUEST)
+ * - server identifier
+ */
+ RawOption opt;
+ std::vector<RawOption> extra;
+
+ opt.u8OptId = RTNET_DHCP_OPT_MSG_TYPE;
+ opt.au8RawOpt[0] = RTNET_DHCP_MT_NAC;
+ opt.cbRawOpt = 1;
+ extra.push_back(opt);
+
+ return doReply(client, extra);
+}
+
+/**
+ *
+ */
+int NetworkManager::prepareReplyPacket4Client(const Client& client, uint32_t u32Xid)
+{
+ RT_ZERO(m->BootPReplyMsg);
+
+ m->BootPReplyMsg.BootPHeader.bp_op = RTNETBOOTP_OP_REPLY;
+ m->BootPReplyMsg.BootPHeader.bp_htype = RTNET_ARP_ETHER;
+ m->BootPReplyMsg.BootPHeader.bp_hlen = sizeof(RTMAC);
+ m->BootPReplyMsg.BootPHeader.bp_hops = 0;
+ m->BootPReplyMsg.BootPHeader.bp_xid = u32Xid;
+ m->BootPReplyMsg.BootPHeader.bp_secs = 0;
+ /* XXX: bp_flags should be processed specially */
+ m->BootPReplyMsg.BootPHeader.bp_flags = 0;
+ m->BootPReplyMsg.BootPHeader.bp_ciaddr.u = 0;
+ m->BootPReplyMsg.BootPHeader.bp_giaddr.u = 0;
+
+ m->BootPReplyMsg.BootPHeader.bp_chaddr.Mac = client.getMacAddress();
+
+ const Lease l = client.lease();
+ m->BootPReplyMsg.BootPHeader.bp_yiaddr = l.getAddress();
+ m->BootPReplyMsg.BootPHeader.bp_siaddr.u = 0;
+
+
+ m->BootPReplyMsg.BootPHeader.bp_vend.Dhcp.dhcp_cookie = RT_H2N_U32_C(RTNET_DHCP_COOKIE);
+
+ memset(&m->BootPReplyMsg.BootPHeader.bp_vend.Dhcp.dhcp_opts[0],
+ '\0',
+ RTNET_DHCP_OPT_SIZE);
+
+ return VINF_SUCCESS;
+}
+
+
+int NetworkManager::doReply(const Client& client, const std::vector<RawOption>& extra)
+{
+ int rc;
+
+ /*
+ Options....
+ */
+ VBoxNetDhcpWriteCursor Cursor(&m->BootPReplyMsg.BootPHeader, RTNET_DHCP_NORMAL_SIZE);
+
+ /* The basics */
+
+ Cursor.optIPv4Addr(RTNET_DHCP_OPT_SERVER_ID, m->m_OurAddress);
+
+ const Lease l = client.lease();
+ const std::map<uint8_t, RawOption>& options = l.options();
+
+ for(std::vector<RawOption>::const_iterator it = extra.begin();
+ it != extra.end(); ++it)
+ {
+ if (!Cursor.begin(it->u8OptId, it->cbRawOpt))
+ break;
+ Cursor.put(it->au8RawOpt, it->cbRawOpt);
+
+ }
+
+ for(std::map<uint8_t, RawOption>::const_iterator it = options.begin();
+ it != options.end(); ++it)
+ {
+ if (!Cursor.begin(it->second.u8OptId, it->second.cbRawOpt))
+ break;
+ Cursor.put(it->second.au8RawOpt, it->second.cbRawOpt);
+
+ }
+
+ Cursor.optEnd();
+
+ /*
+ */
+#if 0
+ /** @todo need to see someone set this flag to check that it's correct. */
+ if (!(pDhcpMsg->bp_flags & RTNET_DHCP_FLAGS_NO_BROADCAST))
+ {
+ rc = VBoxNetUDPUnicast(m_pSession,
+ m_hIf,
+ m_pIfBuf,
+ m_OurAddress,
+ &m_OurMac,
+ RTNETIPV4_PORT_BOOTPS, /* sender */
+ IPv4AddrBrdCast,
+ &BootPReplyMsg.BootPHeader->bp_chaddr.Mac,
+ RTNETIPV4_PORT_BOOTPC, /* receiver */
+ &BootPReplyMsg, cbBooPReplyMsg);
+ }
+ else
+#endif
+ rc = m->m_service->hlpUDPBroadcast(RTNETIPV4_PORT_BOOTPS, /* sender */
+ RTNETIPV4_PORT_BOOTPC,
+ &m->BootPReplyMsg,
+ RTNET_DHCP_NORMAL_SIZE);
+
+ AssertRCReturn(rc,rc);
+
+ return VINF_SUCCESS;
+}
+
+
+/*
+ * XXX: TODO: Share decoding code with DHCPServer::addOption.
+ */
+static int parseDhcpOptionText(const char *pszText,
+ int *pOptCode, char **ppszOptText, int *pOptEncoding)
+{
+ uint8_t u8Code;
+ uint32_t u32Enc;
+ char *pszNext;
+ int rc;
+
+ rc = RTStrToUInt8Ex(pszText, &pszNext, 10, &u8Code);
+ if (!RT_SUCCESS(rc))
+ return VERR_PARSE_ERROR;
+
+ switch (*pszNext)
+ {
+ case ':': /* support legacy format too */
+ {
+ u32Enc = 0;
+ break;
+ }
+
+ case '=':
+ {
+ u32Enc = 1;
+ break;
+ }
+
+ case '@':
+ {
+ rc = RTStrToUInt32Ex(pszNext + 1, &pszNext, 10, &u32Enc);
+ if (!RT_SUCCESS(rc))
+ return VERR_PARSE_ERROR;
+ if (*pszNext != '=')
+ return VERR_PARSE_ERROR;
+ break;
+ }
+
+ default:
+ return VERR_PARSE_ERROR;
+ }
+
+ *pOptCode = u8Code;
+ *ppszOptText = pszNext + 1;
+ *pOptEncoding = (int)u32Enc;
+
+ return VINF_SUCCESS;
+}
+
+
+static int fillDhcpOption(RawOption &opt, const std::string &OptText, int OptEncoding)
+{
+ int rc;
+
+ if (OptEncoding == DhcpOptEncoding_Hex)
+ {
+ if (OptText.empty())
+ return VERR_INVALID_PARAMETER;
+
+ size_t cbRawOpt = 0;
+ char *pszNext = const_cast<char *>(OptText.c_str());
+ while (*pszNext != '\0')
+ {
+ if (cbRawOpt >= RT_ELEMENTS(opt.au8RawOpt))
+ return VERR_INVALID_PARAMETER;
+
+ uint8_t u8Byte;
+ rc = RTStrToUInt8Ex(pszNext, &pszNext, 16, &u8Byte);
+ if (!RT_SUCCESS(rc))
+ return rc;
+
+ if (*pszNext == ':')
+ ++pszNext;
+ else if (*pszNext != '\0')
+ return VERR_PARSE_ERROR;
+
+ opt.au8RawOpt[cbRawOpt] = u8Byte;
+ ++cbRawOpt;
+ }
+ opt.cbRawOpt = (uint8_t)cbRawOpt;
+ }
+ else if (OptEncoding == DhcpOptEncoding_Legacy)
+ {
+ /*
+ * XXX: TODO: encode "known" option opt.u8OptId
+ */
+ return VERR_INVALID_PARAMETER;
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+int NetworkManager::processParameterReqList(const Client& client, const uint8_t *pu8ReqList,
+ int cReqList, std::vector<RawOption>& extra)
+{
+ int rc;
+
+ const Lease l = client.lease();
+
+ const NetworkConfigEntity *pNetCfg = l.getConfig();
+
+ /*
+ * XXX: Brute-force. Unfortunately, there's no notification event
+ * for changes. Should at least cache the options for a short
+ * time, enough to last discover/offer/request/ack cycle.
+ */
+ typedef std::map< int, std::pair<std::string, int> > DhcpOptionMap;
+ DhcpOptionMap OptMap;
+
+ if (!m->m_DhcpServer.isNull())
+ {
+ com::SafeArray<BSTR> strings;
+ com::Bstr str;
+ HRESULT hrc;
+ int OptCode, OptEncoding;
+ char *pszOptText;
+
+ strings.setNull();
+ hrc = m->m_DhcpServer->COMGETTER(GlobalOptions)(ComSafeArrayAsOutParam(strings));
+ AssertComRC(hrc);
+ for (size_t i = 0; i < strings.size(); ++i)
+ {
+ com::Utf8Str encoded(strings[i]);
+ rc = parseDhcpOptionText(encoded.c_str(),
+ &OptCode, &pszOptText, &OptEncoding);
+ if (!RT_SUCCESS(rc))
+ continue;
+
+ OptMap[OptCode] = std::make_pair(pszOptText, OptEncoding);
+ }
+
+ const RTMAC &mac = client.getMacAddress();
+ char strMac[6*2+1] = "";
+ RTStrPrintf(strMac, sizeof(strMac), "%02x%02x%02x%02x%02x%02x",
+ mac.au8[0], mac.au8[1], mac.au8[2],
+ mac.au8[3], mac.au8[4], mac.au8[5]);
+
+ strings.setNull();
+ hrc = m->m_DhcpServer->GetMacOptions(com::Bstr(strMac).raw(),
+ ComSafeArrayAsOutParam(strings));
+ AssertComRC(hrc);
+ for (size_t i = 0; i < strings.size(); ++i)
+ {
+ com::Utf8Str text(strings[i]);
+ rc = parseDhcpOptionText(text.c_str(),
+ &OptCode, &pszOptText, &OptEncoding);
+ if (!RT_SUCCESS(rc))
+ continue;
+
+ OptMap[OptCode] = std::make_pair(pszOptText, OptEncoding);
+ }
+ }
+
+ /* request parameter list */
+ RawOption opt;
+ bool fIgnore;
+ uint8_t u8Req;
+ for (int idxParam = 0; idxParam < cReqList; ++idxParam)
+ {
+ fIgnore = false;
+ RT_ZERO(opt);
+ u8Req = opt.u8OptId = pu8ReqList[idxParam];
+
+ switch(u8Req)
+ {
+ case RTNET_DHCP_OPT_SUBNET_MASK:
+ ((PRTNETADDRIPV4)opt.au8RawOpt)->u = pNetCfg->netmask().u;
+ opt.cbRawOpt = sizeof(RTNETADDRIPV4);
+
+ break;
+
+ case RTNET_DHCP_OPT_ROUTERS:
+ case RTNET_DHCP_OPT_DNS:
+ {
+ const Ipv4AddressContainer lst =
+ g_ConfigurationManager->getAddressList(u8Req);
+ PRTNETADDRIPV4 pAddresses = (PRTNETADDRIPV4)&opt.au8RawOpt[0];
+
+ for (Ipv4AddressConstIterator it = lst.begin();
+ it != lst.end();
+ ++it)
+ {
+ *pAddresses = (*it);
+ pAddresses++;
+ opt.cbRawOpt += sizeof(RTNETADDRIPV4);
+ }
+
+ if (lst.empty())
+ fIgnore = true;
+ }
+ break;
+ case RTNET_DHCP_OPT_DOMAIN_NAME:
+ {
+ std::string domainName = g_ConfigurationManager->getString(u8Req);
+ if (domainName == g_ConfigurationManager->m_noString)
+ {
+ fIgnore = true;
+ break;
+ }
+
+ size_t cchLength = domainName.length();
+ if (cchLength >= sizeof(opt.au8RawOpt))
+ cchLength = sizeof(opt.au8RawOpt) - 1;
+ memcpy(&opt.au8RawOpt[0], domainName.c_str(), cchLength);
+ opt.au8RawOpt[cchLength] = '\0';
+ opt.cbRawOpt = (uint8_t)cchLength;
+ }
+ break;
+ default:
+ {
+ DhcpOptionMap::const_iterator it = OptMap.find((int)u8Req);
+ if (it == OptMap.end())
+ {
+ Log(("opt: %d is ignored\n", u8Req));
+ fIgnore = true;
+ }
+ else
+ {
+ std::string OptText((*it).second.first);
+ int OptEncoding((*it).second.second);
+
+ rc = fillDhcpOption(opt, OptText, OptEncoding);
+ if (!RT_SUCCESS(rc))
+ {
+ fIgnore = true;
+ break;
+ }
+ }
+ }
+ break;
+ }
+
+ if (!fIgnore)
+ extra.push_back(opt);
+
+ }
+
+ return VINF_SUCCESS;
+}
+
+/* Client */
+Client::Client()
+{
+ m = SharedPtr<ClientData>();
+}
+
+
+void Client::initWithMac(const RTMAC& mac)
+{
+ m = SharedPtr<ClientData>(new ClientData());
+ m->m_mac = mac;
+}
+
+
+bool Client::operator== (const RTMAC& mac) const
+{
+ return (m.get() && m->m_mac == mac);
+}
+
+
+const RTMAC& Client::getMacAddress() const
+{
+ return m->m_mac;
+}
+
+
+Lease Client::lease()
+{
+ if (!m.get()) return Lease::NullLease;
+
+ if (m->fHasLease)
+ return Lease(*this);
+ else
+ return Lease::NullLease;
+}
+
+
+const Lease Client::lease() const
+{
+ return const_cast<Client *>(this)->lease();
+}
+
+
+Client::Client(ClientData *data):m(SharedPtr<ClientData>(data)){}
+
+/* Lease */
+Lease::Lease()
+{
+ m = SharedPtr<ClientData>();
+}
+
+
+Lease::Lease (const Client& c)
+{
+ m = SharedPtr<ClientData>(c.m);
+ if ( !m->fHasLease
+ || ( isExpired()
+ && !isInBindingPhase()))
+ {
+ m->fHasLease = true;
+ m->fBinding = true;
+ phaseStart(RTTimeMilliTS());
+ }
+}
+
+
+bool Lease::isExpired() const
+{
+ AssertPtrReturn(m.get(), false);
+
+ if (!m->fBinding)
+ return (ASMDivU64ByU32RetU32(RTTimeMilliTS() - m->u64TimestampLeasingStarted, 1000)
+ > m->u32LeaseExpirationPeriod);
+ else
+ return (ASMDivU64ByU32RetU32(RTTimeMilliTS() - m->u64TimestampBindingStarted, 1000)
+ > m->u32BindExpirationPeriod);
+}
+
+
+void Lease::expire()
+{
+ /* XXX: TODO */
+}
+
+
+void Lease::phaseStart(uint64_t u64Start)
+{
+ if (m->fBinding)
+ m->u64TimestampBindingStarted = u64Start;
+ else
+ m->u64TimestampLeasingStarted = u64Start;
+}
+
+
+void Lease::bindingPhase(bool fOnOff)
+{
+ m->fBinding = fOnOff;
+}
+
+
+bool Lease::isInBindingPhase() const
+{
+ return m->fBinding;
+}
+
+
+uint64_t Lease::issued() const
+{
+ return m->u64TimestampLeasingStarted;
+}
+
+
+void Lease::setExpiration(uint32_t exp)
+{
+ if (m->fBinding)
+ m->u32BindExpirationPeriod = exp;
+ else
+ m->u32LeaseExpirationPeriod = exp;
+}
+
+
+uint32_t Lease::getExpiration() const
+{
+ if (m->fBinding)
+ return m->u32BindExpirationPeriod;
+ else
+ return m->u32LeaseExpirationPeriod;
+}
+
+
+RTNETADDRIPV4 Lease::getAddress() const
+{
+ return m->m_address;
+}
+
+
+void Lease::setAddress(RTNETADDRIPV4 address)
+{
+ m->m_address = address;
+}
+
+
+const NetworkConfigEntity *Lease::getConfig() const
+{
+ return m->pCfg;
+}
+
+
+void Lease::setConfig(NetworkConfigEntity *pCfg)
+{
+ m->pCfg = pCfg;
+}
+
+
+const MapOptionId2RawOption& Lease::options() const
+{
+ return m->options;
+}
+
+
+Lease::Lease(ClientData *pd):m(SharedPtr<ClientData>(pd)){}
+
+
+bool Lease::toXML(xml::ElementNode *node) const
+{
+ xml::AttributeNode *pAttribNode = node->setAttribute(tagXMLLeaseAttributeMac.c_str(),
+ com::Utf8StrFmt("%RTmac", &m->m_mac));
+ if (!pAttribNode)
+ return false;
+
+ pAttribNode = node->setAttribute(tagXMLLeaseAttributeNetwork.c_str(),
+ com::Utf8StrFmt("%RTnaipv4", m->m_network));
+ if (!pAttribNode)
+ return false;
+
+ xml::ElementNode *pLeaseAddress = node->createChild(tagXMLLeaseAddress.c_str());
+ if (!pLeaseAddress)
+ return false;
+
+ pAttribNode = pLeaseAddress->setAttribute(tagXMLAddressAttributeValue.c_str(),
+ com::Utf8StrFmt("%RTnaipv4", m->m_address));
+ if (!pAttribNode)
+ return false;
+
+ xml::ElementNode *pLeaseTime = node->createChild(tagXMLLeaseTime.c_str());
+ if (!pLeaseTime)
+ return false;
+
+ pAttribNode = pLeaseTime->setAttribute(tagXMLTimeAttributeIssued.c_str(),
+ m->u64TimestampLeasingStarted);
+ if (!pAttribNode)
+ return false;
+
+ pAttribNode = pLeaseTime->setAttribute(tagXMLTimeAttributeExpiration.c_str(),
+ m->u32LeaseExpirationPeriod);
+ if (!pAttribNode)
+ return false;
+
+ return true;
+}
+
+
+bool Lease::fromXML(const xml::ElementNode *node)
+{
+ com::Utf8Str mac;
+ bool valueExists = node->getAttributeValue(tagXMLLeaseAttributeMac.c_str(), mac);
+ if (!valueExists) return false;
+ int rc = RTNetStrToMacAddr(mac.c_str(), &m->m_mac);
+ if (RT_FAILURE(rc)) return false;
+
+ com::Utf8Str network;
+ valueExists = node->getAttributeValue(tagXMLLeaseAttributeNetwork.c_str(), network);
+ if (!valueExists) return false;
+ rc = RTNetStrToIPv4Addr(network.c_str(), &m->m_network);
+ if (RT_FAILURE(rc)) return false;
+
+ /* Address */
+ const xml::ElementNode *address = node->findChildElement(tagXMLLeaseAddress.c_str());
+ if (!address) return false;
+ com::Utf8Str addressValue;
+ valueExists = address->getAttributeValue(tagXMLAddressAttributeValue.c_str(), addressValue);
+ if (!valueExists) return false;
+ rc = RTNetStrToIPv4Addr(addressValue.c_str(), &m->m_address);
+
+ /* Time */
+ const xml::ElementNode *time = node->findChildElement(tagXMLLeaseTime.c_str());
+ if (!time) return false;
+
+ valueExists = time->getAttributeValue(tagXMLTimeAttributeIssued.c_str(),
+ &m->u64TimestampLeasingStarted);
+ if (!valueExists) return false;
+ m->fBinding = false;
+
+ valueExists = time->getAttributeValue(tagXMLTimeAttributeExpiration.c_str(),
+ &m->u32LeaseExpirationPeriod);
+ if (!valueExists) return false;
+
+ m->fHasLease = true;
+ return true;
+}
+
+
+const Lease Lease::NullLease;
+
+const Client Client::NullClient;
diff --git a/src/VBox/NetworkServices/DHCP/Config.h b/src/VBox/NetworkServices/DHCP/Config.h
new file mode 100644
index 00000000..a8431686
--- /dev/null
+++ b/src/VBox/NetworkServices/DHCP/Config.h
@@ -0,0 +1,845 @@
+/* $Id: Config.h $ */
+/** @file
+ * Config.h
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_DHCP_Config_h
+#define VBOX_INCLUDED_SRC_DHCP_Config_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <iprt/asm-math.h>
+#include <iprt/cpp/utils.h>
+
+#include <VBox/com/ptr.h>
+#include <VBox/com/string.h>
+#include <VBox/com/VirtualBox.h>
+
+#include "../NetLib/cpp/utils.h"
+
+
+class RawOption
+{
+public:
+ RawOption()
+ {
+ /** @todo r=bird: this is crazy. */
+ RT_ZERO(*this);
+ }
+ uint8_t u8OptId;
+ uint8_t cbRawOpt;
+ uint8_t au8RawOpt[255];
+};
+
+class ClientData;
+class Client;
+class Lease;
+class BaseConfigEntity;
+
+class NetworkConfigEntity;
+class HostConfigEntity;
+class ClientMatchCriteria;
+class ConfigurationManager;
+
+/*
+ * it's a basic representation of
+ * of out undestanding what client is
+ * XXX: Client might sends Option 61 (RFC2132 9.14 "Client-identifier") signalling
+ * that we may identify it in special way
+ *
+ * XXX: Client might send Option 60 (RFC2132 9.13 "Vendor class undentifier")
+ * in response it's expected server sends Option 43 (RFC2132 8.4. "Vendor Specific Information")
+ */
+class Client
+{
+ friend class Lease;
+ friend class ConfigurationManager;
+
+ public:
+ Client();
+ void initWithMac(const RTMAC& mac);
+ bool operator== (const RTMAC& mac) const;
+ const RTMAC& getMacAddress() const;
+
+ /** Dumps client query */
+ void dump();
+
+ Lease lease();
+ const Lease lease() const;
+
+ public:
+ static const Client NullClient;
+
+ private:
+ Client(ClientData *);
+ SharedPtr<ClientData> m;
+};
+
+
+bool operator== (const Lease&, const Lease&);
+bool operator!= (const Lease&, const Lease&);
+bool operator< (const Lease&, const Lease&);
+
+
+typedef std::map<uint8_t, RawOption> MapOptionId2RawOption;
+typedef MapOptionId2RawOption::iterator MapOptionId2RawOptionIterator;
+typedef MapOptionId2RawOption::const_iterator MapOptionId2RawOptionConstIterator;
+typedef MapOptionId2RawOption::value_type MapOptionId2RawOptionValue;
+
+namespace xml {
+ class ElementNode;
+}
+
+class Lease
+{
+ friend class Client;
+ friend bool operator== (const Lease&, const Lease&);
+ //friend int ConfigurationManager::loadFromFile(const std::string&);
+ friend class ConfigurationManager;
+
+ public:
+ Lease();
+ Lease(const Client&);
+
+ bool isExpired() const;
+ void expire();
+
+ /* Depending on phase *Expiration and phaseStart initialize different values. */
+ void bindingPhase(bool);
+ void phaseStart(uint64_t u64Start);
+ bool isInBindingPhase() const;
+ /* returns 0 if in binding state */
+ uint64_t issued() const;
+
+ void setExpiration(uint32_t);
+ uint32_t getExpiration() const;
+
+ RTNETADDRIPV4 getAddress() const;
+ void setAddress(RTNETADDRIPV4);
+
+ const NetworkConfigEntity *getConfig() const;
+ void setConfig(NetworkConfigEntity *);
+
+ const MapOptionId2RawOption& options() const;
+
+ bool toXML(xml::ElementNode *) const;
+ bool fromXML(const xml::ElementNode *);
+
+ public:
+ static const Lease NullLease;
+
+ private:
+ Lease(ClientData *);
+ SharedPtr<ClientData> m;
+};
+
+
+typedef std::vector<Client> VecClient;
+typedef VecClient::iterator VecClientIterator;
+typedef VecClient::const_iterator VecClientConstIterator;
+
+typedef std::vector<RTMAC> MacAddressContainer;
+typedef MacAddressContainer::iterator MacAddressIterator;
+
+typedef std::vector<RTNETADDRIPV4> Ipv4AddressContainer;
+typedef Ipv4AddressContainer::iterator Ipv4AddressIterator;
+typedef Ipv4AddressContainer::const_iterator Ipv4AddressConstIterator;
+
+typedef std::map<Lease, RTNETADDRIPV4> MapLease2Ip4Address;
+typedef MapLease2Ip4Address::iterator MapLease2Ip4AddressIterator;
+typedef MapLease2Ip4Address::const_iterator MapLease2Ip4AddressConstIterator;
+typedef MapLease2Ip4Address::value_type MapLease2Ip4AddressPair;
+
+/**
+ *
+ */
+class ClientMatchCriteria
+{
+ public:
+ virtual bool check(const Client&) const {return false;};
+};
+
+
+class ORClientMatchCriteria: ClientMatchCriteria
+{
+ ClientMatchCriteria* m_left;
+ ClientMatchCriteria* m_right;
+ ORClientMatchCriteria(ClientMatchCriteria *left, ClientMatchCriteria *right)
+ {
+ m_left = left;
+ m_right = right;
+ }
+
+ virtual bool check(const Client& client) const
+ {
+ return (m_left->check(client) || m_right->check(client));
+ }
+};
+
+
+class ANDClientMatchCriteria: ClientMatchCriteria
+{
+public:
+ ANDClientMatchCriteria(ClientMatchCriteria *left, ClientMatchCriteria *right)
+ {
+ m_left = left;
+ m_right = right;
+ }
+
+ virtual bool check(const Client& client) const
+ {
+ return (m_left->check(client) && m_right->check(client));
+ }
+
+private:
+ ClientMatchCriteria* m_left;
+ ClientMatchCriteria* m_right;
+
+};
+
+
+class AnyClientMatchCriteria: public ClientMatchCriteria
+{
+public:
+ virtual bool check(const Client&) const
+ {
+ return true;
+ }
+};
+
+
+class MACClientMatchCriteria: public ClientMatchCriteria
+{
+public:
+ MACClientMatchCriteria(const RTMAC& mac):m_mac(mac){}
+
+ virtual bool check(const Client& client) const;
+
+private:
+ RTMAC m_mac;
+};
+
+
+#if 0
+/* XXX: Later */
+class VmSlotClientMatchCriteria: public ClientMatchCriteria
+{
+ str::string VmName;
+ uint8_t u8Slot;
+ virtual bool check(const Client& client)
+ {
+ return ( client.VmName == VmName
+ && ( u8Slot == (uint8_t)~0 /* any */
+ || client.u8Slot == u8Slot));
+ }
+};
+#endif
+
+
+/* Option 60 */
+class ClassClientMatchCriteria: ClientMatchCriteria{};
+/* Option 61 */
+class ClientIdentifierMatchCriteria: ClientMatchCriteria{};
+
+
+class BaseConfigEntity
+{
+ public:
+ BaseConfigEntity(const ClientMatchCriteria *criteria = NULL,
+ int matchingLevel = 0)
+ : m_criteria(criteria),
+ m_MatchLevel(matchingLevel){};
+ virtual ~BaseConfigEntity(){};
+ /* XXX */
+ int add(BaseConfigEntity *cfg)
+ {
+ m_children.push_back(cfg);
+ return 0;
+ }
+
+ /* Should return how strong matching */
+ virtual int match(Client& client, BaseConfigEntity **cfg);
+ virtual uint32_t expirationPeriod() const = 0;
+
+ protected:
+ const ClientMatchCriteria *m_criteria;
+ int m_MatchLevel;
+ std::vector<BaseConfigEntity *> m_children;
+};
+
+
+class NullConfigEntity: public BaseConfigEntity
+{
+ public:
+ NullConfigEntity(){}
+ virtual ~NullConfigEntity(){}
+ int add(BaseConfigEntity *) const { return 0;}
+ virtual uint32_t expirationPeriod() const {return 0;}
+};
+
+
+class ConfigEntity: public BaseConfigEntity
+{
+ public:
+ /* range */
+ /* match conditions */
+ ConfigEntity(std::string& name,
+ const BaseConfigEntity *cfg,
+ const ClientMatchCriteria *criteria,
+ int matchingLevel = 0):
+ BaseConfigEntity(criteria, matchingLevel),
+ m_name(name),
+ m_parentCfg(cfg),
+ m_u32ExpirationPeriod(0)
+ {
+ unconst(m_parentCfg)->add(this);
+ }
+
+ virtual uint32_t expirationPeriod() const
+ {
+ if (!m_u32ExpirationPeriod)
+ return m_parentCfg->expirationPeriod();
+ else
+ return m_u32ExpirationPeriod;
+ }
+
+ /* XXX: private:*/
+ std::string m_name;
+ const BaseConfigEntity *m_parentCfg;
+ uint32_t m_u32ExpirationPeriod;
+};
+
+
+/**
+ * Network specific entries
+ */
+class NetworkConfigEntity:public ConfigEntity
+{
+public:
+ /* Address Pool matching with network declaration */
+ NetworkConfigEntity(std::string name,
+ const BaseConfigEntity *cfg,
+ const ClientMatchCriteria *criteria,
+ int matchlvl,
+ const RTNETADDRIPV4& networkID,
+ const RTNETADDRIPV4& networkMask,
+ const RTNETADDRIPV4& lowerIP,
+ const RTNETADDRIPV4& upperIP):
+ ConfigEntity(name, cfg, criteria, matchlvl),
+ m_NetworkID(networkID),
+ m_NetworkMask(networkMask),
+ m_UpperIP(upperIP),
+ m_LowerIP(lowerIP)
+ {
+ };
+
+ NetworkConfigEntity(std::string name,
+ const BaseConfigEntity *cfg,
+ const ClientMatchCriteria *criteria,
+ const RTNETADDRIPV4& networkID,
+ const RTNETADDRIPV4& networkMask):
+ ConfigEntity(name, cfg, criteria, 5),
+ m_NetworkID(networkID),
+ m_NetworkMask(networkMask)
+ {
+ m_UpperIP.u = m_NetworkID.u | (~m_NetworkMask.u);
+ m_LowerIP.u = m_NetworkID.u;
+ };
+
+ const RTNETADDRIPV4& upperIp() const {return m_UpperIP;}
+ const RTNETADDRIPV4& lowerIp() const {return m_LowerIP;}
+ const RTNETADDRIPV4& networkId() const {return m_NetworkID;}
+ const RTNETADDRIPV4& netmask() const {return m_NetworkMask;}
+
+ private:
+ RTNETADDRIPV4 m_NetworkID;
+ RTNETADDRIPV4 m_NetworkMask;
+ RTNETADDRIPV4 m_UpperIP;
+ RTNETADDRIPV4 m_LowerIP;
+};
+
+
+/**
+ * Host specific entry
+ * Address pool is contains one element
+ */
+class HostConfigEntity: public NetworkConfigEntity
+{
+public:
+ HostConfigEntity(const RTNETADDRIPV4& addr,
+ std::string name,
+ const NetworkConfigEntity *cfg,
+ const ClientMatchCriteria *criteria):
+ NetworkConfigEntity(name,
+ static_cast<const ConfigEntity*>(cfg), criteria, 10,
+ cfg->networkId(), cfg->netmask(), addr, addr)
+ {
+ /* upper addr == lower addr */
+ }
+};
+
+class RootConfigEntity: public NetworkConfigEntity
+{
+public:
+ RootConfigEntity(std::string name, uint32_t expirationPeriod);
+ virtual ~RootConfigEntity(){};
+};
+
+
+#if 0
+/**
+ * Shared regions e.g. some of configured networks declarations
+ * are cover each other.
+ * XXX: Shared Network is join on Network config entities with possible
+ * overlaps in address pools. for a moment we won't configure and use them them
+ */
+class SharedNetworkConfigEntity: public NetworkEntity
+{
+public:
+ SharedNetworkConfigEntity(){}
+ int match(const Client& client) const { return m_criteria.match(client)? 3 : 0;}
+
+ SharedNetworkConfigEntity(NetworkEntity& network)
+ {
+ Networks.push_back(network);
+ }
+ virtual ~SharedNetworkConfigEntity(){}
+
+ std::vector<NetworkConfigEntity> Networks;
+};
+#endif
+
+class ConfigurationManager
+{
+public:
+ static ConfigurationManager* getConfigurationManager();
+ static int extractRequestList(PCRTNETBOOTP pDhcpMsg, size_t cbDhcpMsg, RawOption& rawOpt);
+
+ int loadFromFile(const com::Utf8Str&);
+ int saveToFile();
+ /**
+ *
+ */
+ Client getClientByDhcpPacket(const RTNETBOOTP *pDhcpMsg, size_t cbDhcpMsg);
+
+ /**
+ * XXX: it's could be done on DHCPOFFER or on DHCPACK (rfc2131 gives freedom here
+ * 3.1.2, what is strict that allocation should do address check before real
+ * allocation)...
+ */
+ Lease allocateLease4Client(const Client& client, PCRTNETBOOTP pDhcpMsg, size_t cbDhcpMsg);
+
+ /**
+ * We call this before DHCPACK sent and after DHCPREQUEST received ...
+ * when requested configuration is acceptable.
+ */
+ int commitLease4Client(Client& client);
+
+ /**
+ * Expires client lease.
+ */
+ int expireLease4Client(Client& client);
+
+ static int findOption(uint8_t uOption, PCRTNETBOOTP pDhcpMsg, size_t cbDhcpMsg, RawOption& opt);
+
+ NetworkConfigEntity *addNetwork(NetworkConfigEntity *pCfg,
+ const RTNETADDRIPV4& networkId,
+ const RTNETADDRIPV4& netmask,
+ RTNETADDRIPV4& UpperAddress,
+ RTNETADDRIPV4& LowerAddress);
+
+ HostConfigEntity *addHost(NetworkConfigEntity*, const RTNETADDRIPV4&, ClientMatchCriteria*);
+ int addToAddressList(uint8_t u8OptId, RTNETADDRIPV4& address);
+ int flushAddressList(uint8_t u8OptId);
+ int setString(uint8_t u8OptId, const std::string& str);
+ const std::string& getString(uint8_t u8OptId);
+ const Ipv4AddressContainer& getAddressList(uint8_t u8OptId);
+
+private:
+ ConfigurationManager():m(NULL){}
+ void init();
+
+ ~ConfigurationManager();
+ bool isAddressTaken(const RTNETADDRIPV4& addr, Lease& lease);
+ bool isAddressTaken(const RTNETADDRIPV4& addr);
+
+public:
+ /* nulls */
+ const Ipv4AddressContainer m_empty;
+ const std::string m_noString;
+
+private:
+ struct Data;
+ Data *m;
+};
+
+
+class NetworkManager
+{
+public:
+ static NetworkManager *getNetworkManager(ComPtr<IDHCPServer> aDhcpServer = ComPtr<IDHCPServer>());
+
+ const RTNETADDRIPV4& getOurAddress() const;
+ const RTNETADDRIPV4& getOurNetmask() const;
+ const RTMAC& getOurMac() const;
+
+ void setOurAddress(const RTNETADDRIPV4& aAddress);
+ void setOurNetmask(const RTNETADDRIPV4& aNetmask);
+ void setOurMac(const RTMAC& aMac);
+
+ bool handleDhcpReqDiscover(PCRTNETBOOTP pDhcpMsg, size_t cb);
+ bool handleDhcpReqRequest(PCRTNETBOOTP pDhcpMsg, size_t cb);
+ bool handleDhcpReqDecline(PCRTNETBOOTP pDhcpMsg, size_t cb);
+ bool handleDhcpReqRelease(PCRTNETBOOTP pDhcpMsg, size_t cb);
+
+ void setService(const VBoxNetHlpUDPService *);
+private:
+ NetworkManager();
+ ~NetworkManager();
+
+ int offer4Client(const Client& lease, uint32_t u32Xid, uint8_t *pu8ReqList, int cReqList);
+ int ack(const Client& lease, uint32_t u32Xid, uint8_t *pu8ReqList, int cReqList);
+ int nak(const Client& lease, uint32_t u32Xid);
+
+ int prepareReplyPacket4Client(const Client& client, uint32_t u32Xid);
+ int doReply(const Client& client, const std::vector<RawOption>& extra);
+ int processParameterReqList(const Client& client, const uint8_t *pu8ReqList, int cReqList, std::vector<RawOption>& extra);
+
+private:
+ static NetworkManager *g_NetworkManager;
+
+private:
+ struct Data;
+ Data *m;
+
+};
+
+
+extern const ClientMatchCriteria *g_AnyClient;
+extern RootConfigEntity *g_RootConfig;
+extern const NullConfigEntity *g_NullConfig;
+
+/**
+ * Helper class for stuffing DHCP options into a reply packet.
+ */
+class VBoxNetDhcpWriteCursor
+{
+private:
+ uint8_t *m_pbCur; /**< The current cursor position. */
+ uint8_t *m_pbEnd; /**< The end the current option space. */
+ uint8_t *m_pfOverload; /**< Pointer to the flags of the overload option. */
+ uint8_t m_fUsed; /**< Overload fields that have been used. */
+ PRTNETDHCPOPT m_pOpt; /**< The current option. */
+ PRTNETBOOTP m_pDhcp; /**< The DHCP packet. */
+ bool m_fOverflowed; /**< Set if we've overflowed, otherwise false. */
+
+public:
+ /** Instantiate an option cursor for the specified DHCP message. */
+ VBoxNetDhcpWriteCursor(PRTNETBOOTP pDhcp, size_t cbDhcp) :
+ m_pbCur(&pDhcp->bp_vend.Dhcp.dhcp_opts[0]),
+ m_pbEnd((uint8_t *)pDhcp + cbDhcp),
+ m_pfOverload(NULL),
+ m_fUsed(0),
+ m_pOpt(NULL),
+ m_pDhcp(pDhcp),
+ m_fOverflowed(false)
+ {
+ AssertPtr(pDhcp);
+ Assert(cbDhcp > RT_UOFFSETOF(RTNETBOOTP, bp_vend.Dhcp.dhcp_opts[10]));
+ }
+
+ /** Destructor. */
+ ~VBoxNetDhcpWriteCursor()
+ {
+ m_pbCur = m_pbEnd = m_pfOverload = NULL;
+ m_pOpt = NULL;
+ m_pDhcp = NULL;
+ }
+
+ /**
+ * Try use the bp_file field.
+ * @returns true if not overloaded, false otherwise.
+ */
+ bool useBpFile(void)
+ {
+ if ( m_pfOverload
+ && (*m_pfOverload & 1))
+ return false;
+ m_fUsed |= 1 /* bp_file flag*/;
+ return true;
+ }
+
+
+ /**
+ * Try overload more BOOTP fields
+ */
+ bool overloadMore(void)
+ {
+ /* switch option area. */
+ uint8_t *pbNew;
+ uint8_t *pbNewEnd;
+ uint8_t fField;
+ if (!(m_fUsed & 1))
+ {
+ fField = 1;
+ pbNew = &m_pDhcp->bp_file[0];
+ pbNewEnd = &m_pDhcp->bp_file[sizeof(m_pDhcp->bp_file)];
+ }
+ else if (!(m_fUsed & 2))
+ {
+ fField = 2;
+ pbNew = &m_pDhcp->bp_sname[0];
+ pbNewEnd = &m_pDhcp->bp_sname[sizeof(m_pDhcp->bp_sname)];
+ }
+ else
+ return false;
+
+ if (!m_pfOverload)
+ {
+ /* Add an overload option. */
+ *m_pbCur++ = RTNET_DHCP_OPT_OPTION_OVERLOAD;
+ *m_pbCur++ = fField;
+ m_pfOverload = m_pbCur;
+ *m_pbCur++ = 1; /* bp_file flag */
+ }
+ else
+ *m_pfOverload |= fField;
+
+ /* pad current option field */
+ while (m_pbCur != m_pbEnd)
+ *m_pbCur++ = RTNET_DHCP_OPT_PAD; /** @todo not sure if this stuff is at all correct... */
+
+ /* switch */
+ m_pbCur = pbNew;
+ m_pbEnd = pbNewEnd;
+ return true;
+ }
+
+ /**
+ * Begin an option.
+ *
+ * @returns true on success, false if we're out of space.
+ *
+ * @param uOption The option number.
+ * @param cb The amount of data.
+ */
+ bool begin(uint8_t uOption, size_t cb)
+ {
+ /* Check that the data of the previous option has all been written. */
+ Assert( !m_pOpt
+ || (m_pbCur - m_pOpt->dhcp_len == (uint8_t *)(m_pOpt + 1)));
+ AssertMsg(cb <= 255, ("%#x\n", cb));
+
+ /* Check if we need to overload more stuff. */
+ if ((uintptr_t)(m_pbEnd - m_pbCur) < cb + 2 + (m_pfOverload ? 1 : 3))
+ {
+ m_pOpt = NULL;
+ if (!overloadMore())
+ {
+ m_fOverflowed = true;
+ AssertMsgFailedReturn(("%u %#x\n", uOption, cb), false);
+ }
+ if ((uintptr_t)(m_pbEnd - m_pbCur) < cb + 2 + 1)
+ {
+ m_fOverflowed = true;
+ AssertMsgFailedReturn(("%u %#x\n", uOption, cb), false);
+ }
+ }
+
+ /* Emit the option header. */
+ m_pOpt = (PRTNETDHCPOPT)m_pbCur;
+ m_pOpt->dhcp_opt = uOption;
+ m_pOpt->dhcp_len = (uint8_t)cb;
+ m_pbCur += 2;
+ return true;
+ }
+
+ /**
+ * Puts option data.
+ *
+ * @param pvData The data.
+ * @param cb The amount to put.
+ */
+ void put(void const *pvData, size_t cb)
+ {
+ Assert(m_pOpt || m_fOverflowed);
+ if (RT_LIKELY(m_pOpt))
+ {
+ Assert((uintptr_t)m_pbCur - (uintptr_t)(m_pOpt + 1) + cb <= (size_t)m_pOpt->dhcp_len);
+ memcpy(m_pbCur, pvData, cb);
+ m_pbCur += cb;
+ }
+ }
+
+ /**
+ * Puts an IPv4 Address.
+ *
+ * @param IPv4Addr The address.
+ */
+ void putIPv4Addr(RTNETADDRIPV4 IPv4Addr)
+ {
+ put(&IPv4Addr, 4);
+ }
+
+ /**
+ * Adds an IPv4 address option.
+ *
+ * @returns true/false just like begin().
+ *
+ * @param uOption The option number.
+ * @param IPv4Addr The address.
+ */
+ bool optIPv4Addr(uint8_t uOption, RTNETADDRIPV4 IPv4Addr)
+ {
+ if (!begin(uOption, 4))
+ return false;
+ putIPv4Addr(IPv4Addr);
+ return true;
+ }
+
+ /**
+ * Adds an option taking 1 or more IPv4 address.
+ *
+ * If the vector contains no addresses, the option will not be added.
+ *
+ * @returns true/false just like begin().
+ *
+ * @param uOption The option number.
+ * @param rIPv4Addrs Reference to the address vector.
+ */
+ bool optIPv4Addrs(uint8_t uOption, std::vector<RTNETADDRIPV4> const &rIPv4Addrs)
+ {
+ size_t const c = rIPv4Addrs.size();
+ if (!c)
+ return true;
+
+ if (!begin(uOption, 4*c))
+ return false;
+ for (size_t i = 0; i < c; i++)
+ putIPv4Addr(rIPv4Addrs[i]);
+ return true;
+ }
+
+ /**
+ * Puts an 8-bit integer.
+ *
+ * @param u8 The integer.
+ */
+ void putU8(uint8_t u8)
+ {
+ put(&u8, 1);
+ }
+
+ /**
+ * Adds an 8-bit integer option.
+ *
+ * @returns true/false just like begin().
+ *
+ * @param uOption The option number.
+ * @param u8 The integer
+ */
+ bool optU8(uint8_t uOption, uint8_t u8)
+ {
+ if (!begin(uOption, 1))
+ return false;
+ putU8(u8);
+ return true;
+ }
+
+ /**
+ * Puts an 32-bit integer (network endian).
+ *
+ * @param u32 The integer.
+ */
+ void putU32(uint32_t u32)
+ {
+ put(&u32, 4);
+ }
+
+ /**
+ * Adds an 32-bit integer (network endian) option.
+ *
+ * @returns true/false just like begin().
+ *
+ * @param uOption The option number.
+ * @param u32 The integer.
+ */
+ bool optU32(uint8_t uOption, uint32_t u32)
+ {
+ if (!begin(uOption, 4))
+ return false;
+ putU32(u32);
+ return true;
+ }
+
+ /**
+ * Puts a std::string.
+ *
+ * @param rStr Reference to the string.
+ */
+ void putStr(std::string const &rStr)
+ {
+ put(rStr.c_str(), rStr.size());
+ }
+
+ /**
+ * Adds an std::string option if the string isn't empty.
+ *
+ * @returns true/false just like begin().
+ *
+ * @param uOption The option number.
+ * @param rStr Reference to the string.
+ */
+ bool optStr(uint8_t uOption, std::string const &rStr)
+ {
+ const size_t cch = rStr.size();
+ if (!cch)
+ return true;
+
+ if (!begin(uOption, cch))
+ return false;
+ put(rStr.c_str(), cch);
+ return true;
+ }
+
+ /**
+ * Whether we've overflowed.
+ *
+ * @returns true on overflow, false otherwise.
+ */
+ bool hasOverflowed(void) const
+ {
+ return m_fOverflowed;
+ }
+
+ /**
+ * Adds the terminating END option.
+ *
+ * The END will always be added as we're reserving room for it, however, we
+ * might have dropped previous options due to overflows and that is what the
+ * return status indicates.
+ *
+ * @returns true on success, false on a (previous) overflow.
+ */
+ bool optEnd(void)
+ {
+ Assert((uintptr_t)(m_pbEnd - m_pbCur) < 4096);
+ *m_pbCur++ = RTNET_DHCP_OPT_END;
+ return !hasOverflowed();
+ }
+};
+
+#endif /* !VBOX_INCLUDED_SRC_DHCP_Config_h */
diff --git a/src/VBox/NetworkServices/DHCP/Makefile.kmk b/src/VBox/NetworkServices/DHCP/Makefile.kmk
new file mode 100644
index 00000000..41e9bb8e
--- /dev/null
+++ b/src/VBox/NetworkServices/DHCP/Makefile.kmk
@@ -0,0 +1,72 @@
+ # $Id: Makefile.kmk $
+## @file
+# Sub-Makefile for VBoxNetDHCP.
+#
+
+#
+# Copyright (C) 2009-2019 Oracle Corporation
+#
+# This file is part of VirtualBox Open Source Edition (OSE), as
+# available from http://www.virtualbox.org. This file is free software;
+# you can redistribute it and/or modify it under the terms of the GNU
+# General Public License (GPL) as published by the Free Software
+# Foundation, in version 2 as it comes in the "COPYING" file of the
+# VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+#
+
+SUB_DEPTH = ../../../..
+include $(KBUILD_PATH)/subheader.kmk
+
+VBOX_PATH_NET_DHCP_SRC := $(PATH_SUB_CURRENT)
+
+#
+# Targets.
+#
+ifdef VBOX_WITH_HARDENING
+ PROGRAMS += VBoxNetDHCPHardened
+ DLLS += VBoxNetDHCP
+else
+ PROGRAMS += VBoxNetDHCP
+endif
+
+
+#
+# Hardened VBoxNetDHCP.
+#
+VBoxNetDHCPHardened_TEMPLATE = VBOXR3HARDENEDEXE
+VBoxNetDHCPHardened_SOURCES = VBoxNetDHCPHardened.cpp
+VBoxNetDHCPHardened_SOURCES.win = $(VBoxNetDHCP_0_OUTDIR)/VBoxNetDHCP-icon.rc
+VBoxNetDHCPHardened_NAME = VBoxNetDHCP
+VBoxNetDHCPHardened_LDFLAGS.win = /SUBSYSTEM:windows
+
+
+#
+# VBoxNetDHCP
+#
+VBoxNetDHCP_TEMPLATE := VBOX$(if-expr defined(VBOX_WITH_HARDENING),MAINDLL,MAINCLIENTEXE)
+VBoxNetDHCP_SOURCES = \
+ VBoxNetDHCP.cpp \
+ Config.cpp \
+ NetworkManagerDhcp.cpp \
+ $(VBOX_PATH_NET_DHCP_SRC)/../NetLib/VBoxNetIntIf.cpp \
+ $(VBOX_PATH_NET_DHCP_SRC)/../NetLib/VBoxNetUDP.cpp \
+ $(VBOX_PATH_NET_DHCP_SRC)/../NetLib/VBoxNetARP.cpp \
+ $(VBOX_PATH_NET_DHCP_SRC)/../NetLib/VBoxNetBaseService.cpp \
+ $(VBOX_PATH_NET_DHCP_SRC)/../NetLib/ComHostUtils.cpp
+VBoxNetDHCP_LIBS = \
+ $(LIB_RUNTIME)
+VBoxNetDHCP_LDFLAGS.win = /SUBSYSTEM:windows
+
+ifeq ($(KBUILD_TARGET),win)
+# Icon include file.
+VBoxNetDHCP_SOURCES += VBoxNetDHCP.rc
+VBoxNetDHCP.rc_INCS = $(VBoxNetDHCP_0_OUTDIR)
+VBoxNetDHCP.rc_DEPS = $(VBoxNetDHCP_0_OUTDIR)/VBoxNetDHCP-icon.rc
+VBoxNetDHCP.rc_CLEAN = $(VBoxNetDHCP_0_OUTDIR)/VBoxNetDHCP-icon.rc
+$$(VBoxNetDHCP_0_OUTDIR)/VBoxNetDHCP-icon.rc: $(VBOX_WINDOWS_ICON_FILE) $$(VBoxNetDHCP_DEFPATH)/Makefile.kmk | $$(dir $$@)
+ $(RM) -f $@
+ $(APPEND) $@ 'IDI_VIRTUALBOX ICON DISCARDABLE "$(subst /,\\,$(VBOX_WINDOWS_ICON_FILE))"'
+endif # win
+
+include $(FILE_KBUILD_SUB_FOOTER)
diff --git a/src/VBox/NetworkServices/DHCP/NetworkManagerDhcp.cpp b/src/VBox/NetworkServices/DHCP/NetworkManagerDhcp.cpp
new file mode 100644
index 00000000..591da5b8
--- /dev/null
+++ b/src/VBox/NetworkServices/DHCP/NetworkManagerDhcp.cpp
@@ -0,0 +1,189 @@
+/* $Id: NetworkManagerDhcp.cpp $ */
+/** @file
+ * NetworkManagerDhcp - Network Manager part handling Dhcp.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/asm.h>
+#include <iprt/cdefs.h>
+#include <iprt/getopt.h>
+#include <iprt/net.h>
+#include <iprt/param.h>
+#include <iprt/path.h>
+#include <iprt/stream.h>
+#include <iprt/time.h>
+#include <iprt/string.h>
+
+#include "../NetLib/shared_ptr.h"
+
+#include <vector>
+#include <list>
+#include <string>
+#include <map>
+
+#include <VBox/sup.h>
+#include <VBox/intnet.h>
+
+#define BASE_SERVICES_ONLY
+#include "../NetLib/VBoxNetBaseService.h"
+#include "Config.h"
+#include "ClientDataInt.h"
+
+/**
+ * The client is requesting an offer.
+ *
+ * @returns true.
+ *
+ * @param pDhcpMsg The message.
+ * @param cb The message size.
+ */
+bool NetworkManager::handleDhcpReqDiscover(PCRTNETBOOTP pDhcpMsg, size_t cb)
+{
+ RawOption opt;
+ RT_ZERO(opt);
+
+ /* 1. Find client */
+ ConfigurationManager *confManager = ConfigurationManager::getConfigurationManager();
+ Client client = confManager->getClientByDhcpPacket(pDhcpMsg, cb);
+
+ /* 2. Find/Bind lease for client */
+ Lease lease = confManager->allocateLease4Client(client, pDhcpMsg, cb);
+ AssertReturn(lease != Lease::NullLease, VINF_SUCCESS);
+
+ int rc = ConfigurationManager::extractRequestList(pDhcpMsg, cb, opt);
+ NOREF(rc); /** @todo check */
+
+ /* 3. Send of offer */
+
+ lease.bindingPhase(true);
+ lease.phaseStart(RTTimeMilliTS());
+ lease.setExpiration(300); /* 3 min. */
+ offer4Client(client, pDhcpMsg->bp_xid, opt.au8RawOpt, opt.cbRawOpt);
+
+ return true;
+}
+
+
+/**
+ * The client is requesting an offer.
+ *
+ * @returns true.
+ *
+ * @param pDhcpMsg The message.
+ * @param cb The message size.
+ */
+bool NetworkManager::handleDhcpReqRequest(PCRTNETBOOTP pDhcpMsg, size_t cb)
+{
+ ConfigurationManager *confManager = ConfigurationManager::getConfigurationManager();
+
+ /* 1. find client */
+ Client client = confManager->getClientByDhcpPacket(pDhcpMsg, cb);
+
+ /* 2. find bound lease */
+ Lease l = client.lease();
+ if (l != Lease::NullLease)
+ {
+
+ if (l.isExpired())
+ {
+ /* send client to INIT state */
+ Client c(client);
+ nak(client, pDhcpMsg->bp_xid);
+ confManager->expireLease4Client(c);
+ return true;
+ }
+ /* XXX: Validate request */
+ RawOption opt;
+ RT_ZERO(opt);
+
+ Client c(client);
+ int rc = confManager->commitLease4Client(c);
+ AssertRCReturn(rc, false);
+
+ rc = ConfigurationManager::extractRequestList(pDhcpMsg, cb, opt);
+ AssertRCReturn(rc, false);
+
+ ack(client, pDhcpMsg->bp_xid, opt.au8RawOpt, opt.cbRawOpt);
+ }
+ else
+ {
+ nak(client, pDhcpMsg->bp_xid);
+ }
+ return true;
+}
+
+
+/**
+ * The client is declining an offer we've made.
+ *
+ * @returns true.
+ *
+ * @param pDhcpMsg The message.
+ * @param cb The message size.
+ */
+bool NetworkManager::handleDhcpReqDecline(PCRTNETBOOTP, size_t)
+{
+ /** @todo Probably need to match the server IP here to work correctly with
+ * other servers. */
+
+ /*
+ * The client is supposed to pass us option 50, requested address,
+ * from the offer. We also match the lease state. Apparently the
+ * MAC address is not supposed to be checked here.
+ */
+
+ /** @todo this is not required in the initial implementation, do it later. */
+ return true;
+}
+
+
+/**
+ * The client is releasing its lease - good boy.
+ *
+ * @returns true.
+ *
+ * @param pDhcpMsg The message.
+ * @param cb The message size.
+ */
+bool NetworkManager::handleDhcpReqRelease(PCRTNETBOOTP, size_t)
+{
+ /** @todo Probably need to match the server IP here to work correctly with
+ * other servers. */
+
+ /*
+ * The client may pass us option 61, client identifier, which we should
+ * use to find the lease by.
+ *
+ * We're matching MAC address and lease state as well.
+ */
+
+ /*
+ * If no client identifier or if we couldn't find a lease by using it,
+ * we will try look it up by the client IP address.
+ */
+
+
+ /*
+ * If found, release it.
+ */
+
+
+ /** @todo this is not required in the initial implementation, do it later. */
+ return true;
+}
+
diff --git a/src/VBox/NetworkServices/DHCP/README.customoptions b/src/VBox/NetworkServices/DHCP/README.customoptions
new file mode 100644
index 00000000..c121d318
--- /dev/null
+++ b/src/VBox/NetworkServices/DHCP/README.customoptions
@@ -0,0 +1,23 @@
+To configure custom DHCP options for a VM use the following command adapted
+to your needs:
+
+$ VBoxManage dhcpserver modify \
+ --netname test-0 --options --vm "Test Client" --slot 0 \
+ --id 0 --value "224=c0:a8:02:01:c0:a8:02:02" \
+ --id 0 --value "225=0:0"
+
+Note that custom DHCP options must be specified with ID 0 and the actual
+number in the value. This has technical reasons which may change in future
+VirtualBox releases.
+
+
+It corresponds to the following bit of ISC 'dhcpd.conf':
+
+option sample1 code 224 = array of ip-address;
+option sample2 code 225 = array of integer 8;
+
+...
+ option sample1 192.168.2.1,192.168.2.2;
+ option sample2 0,0;
+...
+
diff --git a/src/VBox/NetworkServices/DHCP/VBoxNetDHCP.cpp b/src/VBox/NetworkServices/DHCP/VBoxNetDHCP.cpp
new file mode 100644
index 00000000..8c543272
--- /dev/null
+++ b/src/VBox/NetworkServices/DHCP/VBoxNetDHCP.cpp
@@ -0,0 +1,885 @@
+/* $Id: VBoxNetDHCP.cpp $ */
+/** @file
+ * VBoxNetDHCP - DHCP Service for connecting to IntNet.
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+/** @page pg_net_dhcp VBoxNetDHCP
+ *
+ * Write a few words...
+ *
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <VBox/com/com.h>
+#include <VBox/com/listeners.h>
+#include <VBox/com/string.h>
+#include <VBox/com/Guid.h>
+#include <VBox/com/array.h>
+#include <VBox/com/ErrorInfo.h>
+#include <VBox/com/errorprint.h>
+#include <VBox/com/EventQueue.h>
+#include <VBox/com/VirtualBox.h>
+
+#include <iprt/alloca.h>
+#include <iprt/buildconfig.h>
+#include <iprt/err.h>
+#include <iprt/net.h> /* must come before getopt */
+#include <iprt/getopt.h>
+#include <iprt/initterm.h>
+#include <iprt/message.h>
+#include <iprt/param.h>
+#include <iprt/path.h>
+#include <iprt/stream.h>
+#include <iprt/time.h>
+#include <iprt/string.h>
+#ifdef RT_OS_WINDOWS
+# include <iprt/thread.h>
+#endif
+
+#include <VBox/sup.h>
+#include <VBox/intnet.h>
+#include <VBox/intnetinline.h>
+#include <VBox/vmm/vmm.h>
+#include <VBox/version.h>
+
+#include "../NetLib/VBoxNetLib.h"
+#include "../NetLib/shared_ptr.h"
+
+#include <vector>
+#include <list>
+#include <string>
+#include <map>
+
+#include "../NetLib/VBoxNetBaseService.h"
+#include "../NetLib/utils.h"
+
+#ifdef RT_OS_WINDOWS /* WinMain */
+# include <iprt/win/windows.h>
+# include <stdlib.h>
+# ifdef INET_ADDRSTRLEN
+/* On Windows INET_ADDRSTRLEN defined as 22 Ws2ipdef.h, because it include port number */
+# undef INET_ADDRSTRLEN
+# endif
+# define INET_ADDRSTRLEN 16
+#else
+# include <netinet/in.h>
+#endif
+
+
+#include "Config.h"
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/**
+ * DHCP server instance.
+ */
+class VBoxNetDhcp : public VBoxNetBaseService, public NATNetworkEventAdapter
+{
+public:
+ VBoxNetDhcp();
+ virtual ~VBoxNetDhcp();
+
+ int init();
+ void done();
+ void usage(void) { /* XXX: document options */ };
+ int parseOpt(int rc, const RTGETOPTUNION& getOptVal);
+ int processFrame(void *, size_t) {return VERR_IGNORED; };
+ int processGSO(PCPDMNETWORKGSO, size_t) {return VERR_IGNORED; };
+ int processUDP(void *, size_t);
+
+protected:
+ bool handleDhcpMsg(uint8_t uMsgType, PCRTNETBOOTP pDhcpMsg, size_t cb);
+
+ void debugPrintV(int32_t iMinLevel, bool fMsg, const char *pszFmt, va_list va) const;
+ static const char *debugDhcpName(uint8_t uMsgType);
+
+private:
+ int initNoMain();
+ int initWithMain();
+ HRESULT HandleEvent(VBoxEventType_T aEventType, IEvent *pEvent);
+
+ static int hostDnsServers(const ComHostPtr& host,
+ const RTNETADDRIPV4& networkid,
+ const AddressToOffsetMapping& mapping,
+ AddressList& servers);
+ int fetchAndUpdateDnsInfo();
+
+protected:
+ /** @name The DHCP server specific configuration data members.
+ * @{ */
+ /*
+ * XXX: what was the plan? SQL3 or plain text file?
+ * How it will coexists with managment from VBoxManagement, who should manage db
+ * in that case (VBoxManage, VBoxSVC ???)
+ */
+ std::string m_LeaseDBName;
+
+ /** @} */
+
+ /* corresponding dhcp server description in Main */
+ ComPtr<IDHCPServer> m_DhcpServer;
+
+ ComPtr<INATNetwork> m_NATNetwork;
+
+ /** Listener for Host DNS changes */
+ ComNatListenerPtr m_VBoxListener;
+ ComNatListenerPtr m_VBoxClientListener;
+
+ NetworkManager *m_NetworkManager;
+
+ /*
+ * We will ignore cmd line parameters IFF there will be some DHCP specific arguments
+ * otherwise all paramters will come from Main.
+ */
+ bool m_fIgnoreCmdLineParameters;
+
+ /*
+ * -b -n 10.0.1.2 -m 255.255.255.0 -> to the list processing in
+ */
+ typedef struct
+ {
+ char Key;
+ std::string strValue;
+ } CMDLNPRM;
+ std::list<CMDLNPRM> CmdParameterll;
+ typedef std::list<CMDLNPRM>::iterator CmdParameterIterator;
+
+ /** @name Debug stuff
+ * @{ */
+ int32_t m_cVerbosity;
+ uint8_t m_uCurMsgType;
+ size_t m_cbCurMsg;
+ PCRTNETBOOTP m_pCurMsg;
+ VBOXNETUDPHDRS m_CurHdrs;
+ /** @} */
+};
+
+
+static inline int configGetBoundryAddress(const ComDhcpServerPtr& dhcp, bool fUpperBoundry, RTNETADDRIPV4& boundryAddress)
+{
+ boundryAddress.u = INADDR_ANY;
+
+ HRESULT hrc;
+ com::Bstr strAddress;
+ if (fUpperBoundry)
+ hrc = dhcp->COMGETTER(UpperIP)(strAddress.asOutParam());
+ else
+ hrc = dhcp->COMGETTER(LowerIP)(strAddress.asOutParam());
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+
+ return RTNetStrToIPv4Addr(com::Utf8Str(strAddress).c_str(), &boundryAddress);
+}
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/** Pointer to the DHCP server. */
+static VBoxNetDhcp *g_pDhcp;
+
+/* DHCP server specific options */
+static RTGETOPTDEF g_aOptionDefs[] =
+{
+ { "--lease-db", 'D', RTGETOPT_REQ_STRING },
+ { "--begin-config", 'b', RTGETOPT_REQ_NOTHING },
+ { "--gateway", 'g', RTGETOPT_REQ_IPV4ADDR },
+ { "--lower-ip", 'l', RTGETOPT_REQ_IPV4ADDR },
+ { "--upper-ip", 'u', RTGETOPT_REQ_IPV4ADDR },
+};
+
+/**
+ * Construct a DHCP server with a default configuration.
+ */
+VBoxNetDhcp::VBoxNetDhcp()
+ : VBoxNetBaseService("VBoxNetDhcp", "VBoxNetDhcp"),
+ m_NetworkManager(NULL)
+{
+ /* m_enmTrunkType = kIntNetTrunkType_WhateverNone; */
+ RTMAC mac;
+ mac.au8[0] = 0x08;
+ mac.au8[1] = 0x00;
+ mac.au8[2] = 0x27;
+ mac.au8[3] = 0x40;
+ mac.au8[4] = 0x41;
+ mac.au8[5] = 0x42;
+ setMacAddress(mac);
+
+ RTNETADDRIPV4 address;
+ address.u = RT_H2N_U32_C(RT_BSWAP_U32_C(RT_MAKE_U32_FROM_U8( 10, 0, 2, 5)));
+ setIpv4Address(address);
+
+ setSendBufSize(8 * _1K);
+ setRecvBufSize(50 * _1K);
+
+ m_uCurMsgType = UINT8_MAX;
+ m_cbCurMsg = 0;
+ m_pCurMsg = NULL;
+ RT_ZERO(m_CurHdrs);
+
+ m_fIgnoreCmdLineParameters = true;
+
+ for(unsigned int i = 0; i < RT_ELEMENTS(g_aOptionDefs); ++i)
+ addCommandLineOption(&g_aOptionDefs[i]);
+}
+
+
+/**
+ * Destruct a DHCP server.
+ */
+VBoxNetDhcp::~VBoxNetDhcp()
+{
+}
+
+
+
+
+/**
+ * Parse the DHCP specific arguments.
+ *
+ * This callback caled for each paramenter so
+ * ....
+ * we nee post analisys of the parameters, at least
+ * for -b, -g, -l, -u, -m
+ */
+int VBoxNetDhcp::parseOpt(int rc, const RTGETOPTUNION& Val)
+{
+ CMDLNPRM prm;
+
+ /* Ok, we've entered here, thus we can't ignore cmd line parameters anymore */
+ m_fIgnoreCmdLineParameters = false;
+
+ prm.Key = rc;
+
+ switch (rc)
+ {
+ case 'l':
+ case 'u':
+ case 'g':
+ {
+ char buf[17];
+ RTStrPrintf(buf, 17, "%RTnaipv4", Val.IPv4Addr.u);
+ prm.strValue = buf;
+ CmdParameterll.push_back(prm);
+ }
+ break;
+
+ case 'b': // ignore
+ case 'D': // ignore
+ break;
+
+ default:
+ rc = RTGetOptPrintError(rc, &Val);
+ RTPrintf("Use --help for more information.\n");
+ return rc;
+ }
+
+ return VINF_SUCCESS;
+}
+
+int VBoxNetDhcp::init()
+{
+ int rc = this->VBoxNetBaseService::init();
+ AssertRCReturn(rc, rc);
+
+ if (isMainNeeded())
+ rc = initWithMain();
+ else
+ rc = initNoMain();
+ AssertRCReturn(rc, rc);
+
+ m_NetworkManager = NetworkManager::getNetworkManager(m_DhcpServer);
+ AssertPtrReturn(m_NetworkManager, VERR_INTERNAL_ERROR);
+
+ m_NetworkManager->setOurAddress(getIpv4Address());
+ m_NetworkManager->setOurNetmask(getIpv4Netmask());
+ m_NetworkManager->setOurMac(getMacAddress());
+ m_NetworkManager->setService(this);
+
+ return VINF_SUCCESS;
+}
+
+void VBoxNetDhcp::done()
+{
+ destroyNatListener(m_VBoxListener, virtualbox);
+ destroyClientListener(m_VBoxClientListener, virtualboxClient);
+}
+
+int VBoxNetDhcp::processUDP(void *pv, size_t cbPv)
+{
+ PCRTNETBOOTP pDhcpMsg = (PCRTNETBOOTP)pv;
+ m_pCurMsg = pDhcpMsg;
+ m_cbCurMsg = cbPv;
+
+ uint8_t uMsgType;
+ if (RTNetIPv4IsDHCPValid(NULL /* why is this here? */, pDhcpMsg, cbPv, &uMsgType))
+ {
+ m_uCurMsgType = uMsgType;
+ {
+ /* To avoid fight with event processing thread */
+ VBoxNetALock(this);
+ handleDhcpMsg(uMsgType, pDhcpMsg, cbPv);
+ }
+ m_uCurMsgType = UINT8_MAX;
+ }
+ else
+ debugPrint(1, true, "VBoxNetDHCP: Skipping invalid DHCP packet.\n"); /** @todo handle pure bootp clients too? */
+
+ m_pCurMsg = NULL;
+ m_cbCurMsg = 0;
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Handles a DHCP message.
+ *
+ * @returns true if handled, false if not. (IGNORED BY CALLER)
+ * @param uMsgType The message type.
+ * @param pDhcpMsg The DHCP message.
+ * @param cb The size of the DHCP message.
+ */
+bool VBoxNetDhcp::handleDhcpMsg(uint8_t uMsgType, PCRTNETBOOTP pDhcpMsg, size_t cb)
+{
+ if (pDhcpMsg->bp_op == RTNETBOOTP_OP_REQUEST)
+ {
+ AssertPtrReturn(m_NetworkManager, false);
+
+ switch (uMsgType)
+ {
+ case RTNET_DHCP_MT_DISCOVER:
+ return m_NetworkManager->handleDhcpReqDiscover(pDhcpMsg, cb);
+
+ case RTNET_DHCP_MT_REQUEST:
+ return m_NetworkManager->handleDhcpReqRequest(pDhcpMsg, cb);
+
+ case RTNET_DHCP_MT_DECLINE:
+ return m_NetworkManager->handleDhcpReqDecline(pDhcpMsg, cb);
+
+ case RTNET_DHCP_MT_RELEASE:
+ return m_NetworkManager->handleDhcpReqRelease(pDhcpMsg, cb);
+
+ case RTNET_DHCP_MT_INFORM:
+ debugPrint(0, true, "Should we handle this?");
+ break;
+
+ default:
+ debugPrint(0, true, "Unexpected.");
+ break;
+ }
+ }
+ return false;
+}
+
+/**
+ * Print debug message depending on the m_cVerbosity level.
+ *
+ * @param iMinLevel The minimum m_cVerbosity level for this message.
+ * @param fMsg Whether to dump parts for the current DHCP message.
+ * @param pszFmt The message format string.
+ * @param va Optional arguments.
+ */
+void VBoxNetDhcp::debugPrintV(int iMinLevel, bool fMsg, const char *pszFmt, va_list va) const
+{
+ if (iMinLevel <= m_cVerbosity)
+ {
+ va_list vaCopy; /* This dude is *very* special, thus the copy. */
+ va_copy(vaCopy, va);
+ RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: %s: %N\n", iMinLevel >= 2 ? "debug" : "info", pszFmt, &vaCopy);
+ va_end(vaCopy);
+
+ if ( fMsg
+ && m_cVerbosity >= 2
+ && m_pCurMsg)
+ {
+ /* XXX: export this to debugPrinfDhcpMsg or variant and other method export
+ * to base class
+ */
+ const char *pszMsg = m_uCurMsgType != UINT8_MAX ? debugDhcpName(m_uCurMsgType) : "";
+ RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: debug: %8s chaddr=%.6Rhxs ciaddr=%d.%d.%d.%d yiaddr=%d.%d.%d.%d siaddr=%d.%d.%d.%d xid=%#x\n",
+ pszMsg,
+ &m_pCurMsg->bp_chaddr,
+ m_pCurMsg->bp_ciaddr.au8[0], m_pCurMsg->bp_ciaddr.au8[1], m_pCurMsg->bp_ciaddr.au8[2], m_pCurMsg->bp_ciaddr.au8[3],
+ m_pCurMsg->bp_yiaddr.au8[0], m_pCurMsg->bp_yiaddr.au8[1], m_pCurMsg->bp_yiaddr.au8[2], m_pCurMsg->bp_yiaddr.au8[3],
+ m_pCurMsg->bp_siaddr.au8[0], m_pCurMsg->bp_siaddr.au8[1], m_pCurMsg->bp_siaddr.au8[2], m_pCurMsg->bp_siaddr.au8[3],
+ m_pCurMsg->bp_xid);
+ }
+ }
+}
+
+
+/**
+ * Gets the name of given DHCP message type.
+ *
+ * @returns Readonly name.
+ * @param uMsgType The message number.
+ */
+/* static */ const char *VBoxNetDhcp::debugDhcpName(uint8_t uMsgType)
+{
+ switch (uMsgType)
+ {
+ case 0: return "MT_00";
+ case RTNET_DHCP_MT_DISCOVER: return "DISCOVER";
+ case RTNET_DHCP_MT_OFFER: return "OFFER";
+ case RTNET_DHCP_MT_REQUEST: return "REQUEST";
+ case RTNET_DHCP_MT_DECLINE: return "DECLINE";
+ case RTNET_DHCP_MT_ACK: return "ACK";
+ case RTNET_DHCP_MT_NAC: return "NAC";
+ case RTNET_DHCP_MT_RELEASE: return "RELEASE";
+ case RTNET_DHCP_MT_INFORM: return "INFORM";
+ case 9: return "MT_09";
+ case 10: return "MT_0a";
+ case 11: return "MT_0b";
+ case 12: return "MT_0c";
+ case 13: return "MT_0d";
+ case 14: return "MT_0e";
+ case 15: return "MT_0f";
+ case 16: return "MT_10";
+ case 17: return "MT_11";
+ case 18: return "MT_12";
+ case 19: return "MT_13";
+ case UINT8_MAX: return "MT_ff";
+ default: return "UNKNOWN";
+ }
+}
+
+
+int VBoxNetDhcp::initNoMain()
+{
+ CmdParameterIterator it;
+
+ RTNETADDRIPV4 address = getIpv4Address();
+ RTNETADDRIPV4 netmask = getIpv4Netmask();
+ RTNETADDRIPV4 networkId;
+ networkId.u = address.u & netmask.u;
+
+ RTNETADDRIPV4 UpperAddress;
+ RTNETADDRIPV4 LowerAddress = networkId;
+ UpperAddress.u = RT_H2N_U32(RT_N2H_U32(LowerAddress.u) | RT_N2H_U32(netmask.u));
+
+ for (it = CmdParameterll.begin(); it != CmdParameterll.end(); ++it)
+ {
+ switch(it->Key)
+ {
+ case 'l':
+ RTNetStrToIPv4Addr(it->strValue.c_str(), &LowerAddress);
+ break;
+
+ case 'u':
+ RTNetStrToIPv4Addr(it->strValue.c_str(), &UpperAddress);
+ break;
+ case 'b':
+ break;
+
+ }
+ }
+
+ ConfigurationManager *confManager = ConfigurationManager::getConfigurationManager();
+ AssertPtrReturn(confManager, VERR_INTERNAL_ERROR);
+ confManager->addNetwork(unconst(g_RootConfig),
+ networkId,
+ netmask,
+ LowerAddress,
+ UpperAddress);
+
+ return VINF_SUCCESS;
+}
+
+
+int VBoxNetDhcp::initWithMain()
+{
+ /* ok, here we should initiate instance of dhcp server
+ * and listener for Dhcp configuration events
+ */
+ AssertRCReturn(virtualbox.isNull(), VERR_INTERNAL_ERROR);
+ std::string networkName = getNetworkName();
+
+ int rc = findDhcpServer(virtualbox, networkName, m_DhcpServer);
+ AssertRCReturn(rc, rc);
+
+ rc = findNatNetwork(virtualbox, networkName, m_NATNetwork);
+ AssertRCReturn(rc, rc);
+
+ BOOL fNeedDhcpServer = isDhcpRequired(m_NATNetwork);
+ if (!fNeedDhcpServer)
+ return VERR_CANCELLED;
+
+ RTNETADDRIPV4 gateway;
+ com::Bstr strGateway;
+ HRESULT hrc = m_NATNetwork->COMGETTER(Gateway)(strGateway.asOutParam());
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+ RTNetStrToIPv4Addr(com::Utf8Str(strGateway).c_str(), &gateway);
+
+ ConfigurationManager *confManager = ConfigurationManager::getConfigurationManager();
+ AssertPtrReturn(confManager, VERR_INTERNAL_ERROR);
+ confManager->addToAddressList(RTNET_DHCP_OPT_ROUTERS, gateway);
+
+ rc = fetchAndUpdateDnsInfo();
+ AssertMsgRCReturn(rc, ("Wasn't able to fetch Dns info"), rc);
+
+ {
+ ComEventTypeArray eventTypes;
+ eventTypes.push_back(VBoxEventType_OnHostNameResolutionConfigurationChange);
+ eventTypes.push_back(VBoxEventType_OnNATNetworkStartStop);
+ rc = createNatListener(m_VBoxListener, virtualbox, this, eventTypes);
+ AssertRCReturn(rc, rc);
+ }
+
+ {
+ ComEventTypeArray eventTypes;
+ eventTypes.push_back(VBoxEventType_OnVBoxSVCAvailabilityChanged);
+ rc = createClientListener(m_VBoxClientListener, virtualboxClient, this, eventTypes);
+ AssertRCReturn(rc, rc);
+ }
+
+ RTNETADDRIPV4 LowerAddress;
+ rc = configGetBoundryAddress(m_DhcpServer, false, LowerAddress);
+ AssertMsgRCReturn(rc, ("can't get lower boundrary adderss'"),rc);
+
+ RTNETADDRIPV4 UpperAddress;
+ rc = configGetBoundryAddress(m_DhcpServer, true, UpperAddress);
+ AssertMsgRCReturn(rc, ("can't get upper boundrary adderss'"),rc);
+
+ RTNETADDRIPV4 address = getIpv4Address();
+ RTNETADDRIPV4 netmask = getIpv4Netmask();
+ RTNETADDRIPV4 networkId = networkid(address, netmask);
+ std::string name = std::string("default");
+
+ confManager->addNetwork(unconst(g_RootConfig),
+ networkId,
+ netmask,
+ LowerAddress,
+ UpperAddress);
+
+ com::Bstr bstr;
+ hrc = virtualbox->COMGETTER(HomeFolder)(bstr.asOutParam());
+ com::Utf8StrFmt strXmlLeaseFile("%ls%c%s.leases",
+ bstr.raw(), RTPATH_DELIMITER, networkName.c_str());
+ confManager->loadFromFile(strXmlLeaseFile);
+
+ return VINF_SUCCESS;
+}
+
+
+int VBoxNetDhcp::fetchAndUpdateDnsInfo()
+{
+ ComHostPtr host;
+ if (SUCCEEDED(virtualbox->COMGETTER(Host)(host.asOutParam())))
+ {
+ AddressToOffsetMapping mapIp4Addr2Off;
+ int rc = localMappings(m_NATNetwork, mapIp4Addr2Off);
+ /* XXX: here could be several cases: 1. COM error, 2. not found (empty) 3. ? */
+ AssertMsgRCReturn(rc, ("Can't fetch local mappings"), rc);
+
+ RTNETADDRIPV4 address = getIpv4Address();
+ RTNETADDRIPV4 netmask = getIpv4Netmask();
+
+ AddressList nameservers;
+ rc = hostDnsServers(host, networkid(address, netmask), mapIp4Addr2Off, nameservers);
+ AssertMsgRCReturn(rc, ("Debug me!!!"), rc);
+ /* XXX: Search strings */
+
+ std::string domain;
+ rc = hostDnsDomain(host, domain);
+ AssertMsgRCReturn(rc, ("Debug me!!"), rc);
+
+ {
+ VBoxNetALock(this);
+ ConfigurationManager *confManager = ConfigurationManager::getConfigurationManager();
+ confManager->flushAddressList(RTNET_DHCP_OPT_DNS);
+
+ for (AddressList::iterator it = nameservers.begin(); it != nameservers.end(); ++it)
+ confManager->addToAddressList(RTNET_DHCP_OPT_DNS, *it);
+
+ confManager->setString(RTNET_DHCP_OPT_DOMAIN_NAME, domain);
+ }
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+int VBoxNetDhcp::hostDnsServers(const ComHostPtr& host,
+ const RTNETADDRIPV4& networkid,
+ const AddressToOffsetMapping& mapping,
+ AddressList& servers)
+{
+ ComBstrArray strs;
+
+ HRESULT hrc = host->COMGETTER(NameServers)(ComSafeArrayAsOutParam(strs));
+ if (FAILED(hrc))
+ return VERR_NOT_FOUND;
+
+ /*
+ * Recent fashion is to run dnsmasq on 127.0.1.1 which we
+ * currently can't map. If that's the only nameserver we've got,
+ * we need to use DNS proxy for VMs to reach it.
+ */
+ bool fUnmappedLoopback = false;
+
+ for (size_t i = 0; i < strs.size(); ++i)
+ {
+ RTNETADDRIPV4 addr;
+ int rc;
+
+ rc = RTNetStrToIPv4Addr(com::Utf8Str(strs[i]).c_str(), &addr);
+ if (RT_FAILURE(rc))
+ continue;
+
+ if (addr.u == INADDR_ANY)
+ {
+ /*
+ * This doesn't seem to be very well documented except for
+ * RTFS of res_init.c, but INADDR_ANY is a valid value for
+ * for "nameserver".
+ */
+ addr.u = RT_H2N_U32_C(INADDR_LOOPBACK);
+ }
+
+ if (addr.au8[0] == 127)
+ {
+ AddressToOffsetMapping::const_iterator remap(mapping.find(addr));
+
+ if (remap != mapping.end())
+ {
+ int offset = remap->second;
+ addr.u = RT_H2N_U32(RT_N2H_U32(networkid.u) + offset);
+ }
+ else
+ {
+ fUnmappedLoopback = true;
+ continue;
+ }
+ }
+
+ servers.push_back(addr);
+ }
+
+ if (servers.empty() && fUnmappedLoopback)
+ {
+ RTNETADDRIPV4 proxy;
+
+ proxy.u = networkid.u | RT_H2N_U32_C(1U);
+ servers.push_back(proxy);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+HRESULT VBoxNetDhcp::HandleEvent(VBoxEventType_T aEventType, IEvent *pEvent)
+{
+ switch (aEventType)
+ {
+ case VBoxEventType_OnHostNameResolutionConfigurationChange:
+ fetchAndUpdateDnsInfo();
+ break;
+
+ case VBoxEventType_OnNATNetworkStartStop:
+ {
+ ComPtr <INATNetworkStartStopEvent> pStartStopEvent = pEvent;
+
+ com::Bstr networkName;
+ HRESULT hrc = pStartStopEvent->COMGETTER(NetworkName)(networkName.asOutParam());
+ AssertComRCReturn(hrc, hrc);
+ if (networkName.compare(getNetworkName().c_str()))
+ break; /* change not for our network */
+
+ BOOL fStart = TRUE;
+ hrc = pStartStopEvent->COMGETTER(StartEvent)(&fStart);
+ AssertComRCReturn(hrc, hrc);
+ if (!fStart)
+ shutdown();
+ break;
+ }
+
+ case VBoxEventType_OnVBoxSVCAvailabilityChanged:
+ {
+ shutdown();
+ break;
+ }
+
+ default: break; /* Shut up MSC. */
+ }
+
+ return S_OK;
+}
+
+#ifdef RT_OS_WINDOWS
+
+/** The class name for the DIFx-killable window. */
+static WCHAR g_wszWndClassName[] = L"VBoxNetDHCPClass";
+/** Whether to exit the process on quit. */
+static bool g_fExitProcessOnQuit = true;
+
+/**
+ * Window procedure for making us DIFx-killable.
+ */
+static LRESULT CALLBACK DIFxKillableWindowProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam)
+{
+ if (uMsg == WM_DESTROY)
+ {
+ PostQuitMessage(0);
+ return 0;
+ }
+ return DefWindowProc(hwnd, uMsg, wParam, lParam);
+}
+
+/** @callback_method_impl{FNRTTHREAD,
+ * Thread that creates service a window the DIFx can destroy, thereby
+ * triggering process termination. }
+ */
+static DECLCALLBACK(int) DIFxKillableProcessThreadProc(RTTHREAD hThreadSelf, void *pvUser)
+{
+ RT_NOREF(hThreadSelf, pvUser);
+ HINSTANCE hInstance = (HINSTANCE)GetModuleHandle(NULL);
+
+ /* Register the Window Class. */
+ WNDCLASSW WndCls;
+ WndCls.style = 0;
+ WndCls.lpfnWndProc = DIFxKillableWindowProc;
+ WndCls.cbClsExtra = 0;
+ WndCls.cbWndExtra = sizeof(void *);
+ WndCls.hInstance = hInstance;
+ WndCls.hIcon = NULL;
+ WndCls.hCursor = NULL;
+ WndCls.hbrBackground = (HBRUSH)(COLOR_BACKGROUND + 1);
+ WndCls.lpszMenuName = NULL;
+ WndCls.lpszClassName = g_wszWndClassName;
+
+ ATOM atomWindowClass = RegisterClassW(&WndCls);
+ if (atomWindowClass != 0)
+ {
+ /* Create the window. */
+ HWND hwnd = CreateWindowExW(WS_EX_TOOLWINDOW | WS_EX_TRANSPARENT | WS_EX_TOPMOST,
+ g_wszWndClassName, g_wszWndClassName,
+ WS_POPUPWINDOW,
+ -200, -200, 100, 100, NULL, NULL, hInstance, NULL);
+ if (hwnd)
+ {
+ SetWindowPos(hwnd, HWND_TOPMOST, -200, -200, 0, 0,
+ SWP_NOACTIVATE | SWP_HIDEWINDOW | SWP_NOCOPYBITS | SWP_NOREDRAW | SWP_NOSIZE);
+
+ MSG msg;
+ while (GetMessage(&msg, NULL, 0, 0))
+ {
+ TranslateMessage(&msg);
+ DispatchMessage(&msg);
+ }
+
+ DestroyWindow(hwnd);
+ }
+
+ UnregisterClassW(g_wszWndClassName, hInstance);
+
+ if (hwnd && g_fExitProcessOnQuit)
+ exit(0);
+ }
+ return 0;
+}
+
+#endif /* RT_OS_WINDOWS */
+
+/**
+ * Entry point.
+ */
+extern "C" DECLEXPORT(int) TrustedMain(int argc, char **argv)
+{
+ /*
+ * Instantiate the DHCP server and hand it the options.
+ */
+ VBoxNetDhcp *pDhcp = new VBoxNetDhcp();
+ if (!pDhcp)
+ {
+ RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: new VBoxNetDhcp failed!\n");
+ return 1;
+ }
+
+ RTEXITCODE rcExit = (RTEXITCODE)pDhcp->parseArgs(argc - 1, argv + 1);
+ if (rcExit != RTEXITCODE_SUCCESS)
+ return rcExit;
+
+#ifdef RT_OS_WINDOWS
+ /* DIFx hack. */
+ RTTHREAD hMakeUseKillableThread = NIL_RTTHREAD;
+ int rc2 = RTThreadCreate(&hMakeUseKillableThread, DIFxKillableProcessThreadProc, NULL, 0,
+ RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "DIFxKill");
+ if (RT_FAILURE(rc2))
+ hMakeUseKillableThread = NIL_RTTHREAD;
+#endif
+
+ pDhcp->init();
+
+ /*
+ * Try connect the server to the network.
+ */
+ int rc = pDhcp->tryGoOnline();
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Process requests.
+ */
+ g_pDhcp = pDhcp;
+ rc = pDhcp->run();
+ pDhcp->done();
+
+ g_pDhcp = NULL;
+ }
+ delete pDhcp;
+
+#ifdef RT_OS_WINDOWS
+ /* Kill DIFx hack. */
+ if (hMakeUseKillableThread != NIL_RTTHREAD)
+ {
+ g_fExitProcessOnQuit = false;
+ PostThreadMessage((DWORD)RTThreadGetNative(hMakeUseKillableThread), WM_QUIT, 0, 0);
+ RTThreadWait(hMakeUseKillableThread, RT_MS_1SEC * 5U, NULL);
+ }
+#endif
+
+ return RT_SUCCESS(rc) ? RTEXITCODE_SUCCESS : RTEXITCODE_FAILURE;
+}
+
+
+#ifndef VBOX_WITH_HARDENING
+
+int main(int argc, char **argv)
+{
+ int rc = RTR3InitExe(argc, &argv, RTR3INIT_FLAGS_SUPLIB);
+ if (RT_FAILURE(rc))
+ return RTMsgInitFailure(rc);
+
+ return TrustedMain(argc, argv);
+}
+
+# ifdef RT_OS_WINDOWS
+
+
+
+/** (We don't want a console usually.) */
+int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nCmdShow)
+{
+ NOREF(hInstance); NOREF(hPrevInstance); NOREF(lpCmdLine); NOREF(nCmdShow);
+ return main(__argc, __argv);
+}
+# endif /* RT_OS_WINDOWS */
+
+#endif /* !VBOX_WITH_HARDENING */
+
diff --git a/src/VBox/NetworkServices/DHCP/VBoxNetDHCP.rc b/src/VBox/NetworkServices/DHCP/VBoxNetDHCP.rc
new file mode 100644
index 00000000..5a8b64aa
--- /dev/null
+++ b/src/VBox/NetworkServices/DHCP/VBoxNetDHCP.rc
@@ -0,0 +1,55 @@
+/* $Id: VBoxNetDHCP.rc $ */
+/** @file
+ * VBoxNetDHCP - Resource file containing version info.
+ */
+
+/*
+ * Copyright (C) 2015-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include <windows.h>
+#include <VBox/version.h>
+
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION VBOX_RC_FILE_VERSION
+ PRODUCTVERSION VBOX_RC_FILE_VERSION
+ FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
+ FILEFLAGS VBOX_RC_FILE_FLAGS
+ FILEOS VBOX_RC_FILE_OS
+ FILETYPE VBOX_RC_TYPE_DLL
+ FILESUBTYPE VFT2_UNKNOWN
+BEGIN
+ BLOCK "StringFileInfo"
+ BEGIN
+ BLOCK "040904b0" // Lang=US English, CharSet=Unicode
+ BEGIN
+ VALUE "CompanyName", VBOX_RC_COMPANY_NAME
+ VALUE "FileDescription", "VirtualBox DHCP Server\0"
+ VALUE "FileVersion", VBOX_RC_FILE_VERSION_STR
+ VALUE "InternalName", "VBoxNetDHCP\0"
+ VALUE "LegalCopyright", VBOX_RC_LEGAL_COPYRIGHT
+ VALUE "OriginalFilename", "VBoxNetDHCP.dll\0"
+ VALUE "ProductName", VBOX_RC_PRODUCT_NAME_STR
+ VALUE "ProductVersion", VBOX_RC_PRODUCT_VERSION_STR
+ VBOX_RC_MORE_STRINGS
+ END
+ END
+ BLOCK "VarFileInfo"
+ BEGIN
+ VALUE "Translation", 0x409, 1200
+ END
+END
+
+/* Creates the application icon. */
+#include "VBoxNetDHCP-icon.rc"
+
diff --git a/src/VBox/NetworkServices/DHCP/VBoxNetDHCPHardened.cpp b/src/VBox/NetworkServices/DHCP/VBoxNetDHCPHardened.cpp
new file mode 100644
index 00000000..8ed60a98
--- /dev/null
+++ b/src/VBox/NetworkServices/DHCP/VBoxNetDHCPHardened.cpp
@@ -0,0 +1,25 @@
+/* $Id: VBoxNetDHCPHardened.cpp $ */
+/** @file
+ * VBoxNetDHCP - Hardened main().
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include <VBox/sup.h>
+
+
+int main(int argc, char **argv, char **envp)
+{
+ return SUPR3HardenedMain("VBoxNetDHCP", 0 /* fFlags */, argc, argv, envp);
+}
+
diff --git a/src/VBox/NetworkServices/Dhcpd/ClientId.cpp b/src/VBox/NetworkServices/Dhcpd/ClientId.cpp
new file mode 100644
index 00000000..3441a4a4
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/ClientId.cpp
@@ -0,0 +1,122 @@
+/* $Id: ClientId.cpp $ */
+/** @file
+ * DHCP server - client identifier
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include <algorithm>
+
+#include "ClientId.h"
+
+
+bool ClientId::g_fFormatRegistered = false;
+
+
+void ClientId::registerFormat()
+{
+ if (g_fFormatRegistered)
+ return;
+
+ int rc = RTStrFormatTypeRegister("id", rtStrFormat, NULL);
+ AssertRC(rc);
+
+ g_fFormatRegistered = true;
+}
+
+
+DECLCALLBACK(size_t)
+ClientId::rtStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
+ const char *pszType, void const *pvValue,
+ int cchWidth, int cchPrecision, unsigned fFlags,
+ void *pvUser)
+{
+ const ClientId *id = static_cast<const ClientId *>(pvValue);
+ size_t cb = 0;
+
+ AssertReturn(strcmp(pszType, "id") == 0, 0);
+ RT_NOREF(pszType);
+
+ RT_NOREF(cchWidth, cchPrecision, fFlags);
+ RT_NOREF(pvUser);
+
+ if (id == NULL)
+ {
+ return RTStrFormat(pfnOutput, pvArgOutput, NULL, 0,
+ "<NULL>");
+ }
+
+ if (id->m_id.present())
+ {
+ const OptClientId::value_t &idopt = id->m_id.value();
+
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0,
+ "[");
+
+ for (size_t i = 0; i < idopt.size(); ++i)
+ {
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0,
+ "%s%02x", (i == 0 ? "" : ":"), idopt[i]);
+ }
+
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0,
+ "] (");
+ }
+
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0,
+ "%RTmac", &id->m_mac);
+
+ if (id->m_id.present())
+ {
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0,
+ ")");
+ }
+
+ return 0;
+}
+
+
+bool operator==(const ClientId &l, const ClientId &r)
+{
+ if (l.m_id.present())
+ {
+ if (r.m_id.present())
+ return l.m_id.value() == r.m_id.value();
+ }
+ else
+ {
+ if (!r.m_id.present())
+ return l.m_mac == r.m_mac;
+ }
+
+ return false;
+}
+
+
+bool operator<(const ClientId &l, const ClientId &r)
+{
+ if (l.m_id.present())
+ {
+ if (r.m_id.present())
+ return l.m_id.value() < r.m_id.value();
+ else
+ return false; /* the one with id comes last */
+ }
+ else
+ {
+ if (r.m_id.present())
+ return true; /* the one with id comes last */
+ else
+ return l.m_mac < r.m_mac;
+ }
+}
diff --git a/src/VBox/NetworkServices/Dhcpd/ClientId.h b/src/VBox/NetworkServices/Dhcpd/ClientId.h
new file mode 100644
index 00000000..430b6e3f
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/ClientId.h
@@ -0,0 +1,70 @@
+/* $Id: ClientId.h $ */
+/** @file
+ * DHCP server - client identifier
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_Dhcpd_ClientId_h
+#define VBOX_INCLUDED_SRC_Dhcpd_ClientId_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include "Defs.h"
+#include <iprt/net.h>
+#include "DhcpOptions.h"
+
+/*
+ * Client is identified by either the Client ID option it sends or its
+ * chaddr, i.e. MAC address.
+ */
+class ClientId
+{
+ RTMAC m_mac;
+ OptClientId m_id;
+
+public:
+ ClientId()
+ : m_mac(), m_id() {}
+ ClientId(const RTMAC &macParam, const OptClientId &idParam)
+ : m_mac(macParam), m_id(idParam) {}
+
+ const RTMAC &mac() const { return m_mac; }
+ const OptClientId &id() const { return m_id; }
+
+public:
+ static void registerFormat(); /* %R[id] */
+
+private:
+ static bool g_fFormatRegistered;
+ static DECLCALLBACK(size_t) rtStrFormat(
+ PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
+ const char *pszType, void const *pvValue,
+ int cchWidth, int cchPrecision, unsigned fFlags,
+ void *pvUser);
+
+private:
+ friend bool operator==(const ClientId &l, const ClientId &r);
+ friend bool operator<(const ClientId &l, const ClientId &r);
+};
+
+bool operator==(const ClientId &l, const ClientId &r);
+bool operator<(const ClientId &l, const ClientId &r);
+
+inline bool operator!=(const ClientId &l, const ClientId &r)
+{
+ return !(l == r);
+}
+
+#endif /* !VBOX_INCLUDED_SRC_Dhcpd_ClientId_h */
diff --git a/src/VBox/NetworkServices/Dhcpd/Config.cpp b/src/VBox/NetworkServices/Dhcpd/Config.cpp
new file mode 100644
index 00000000..b11e29fd
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/Config.cpp
@@ -0,0 +1,949 @@
+/* $Id: Config.cpp $ */
+/** @file
+ * DHCP server - server configuration
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include "Config.h"
+
+#include <iprt/types.h>
+#include <iprt/net.h> /* NB: must come before getopt.h */
+#include <iprt/getopt.h>
+#include <iprt/path.h>
+#include <iprt/message.h>
+#include <iprt/string.h>
+#include <iprt/uuid.h>
+
+#include <VBox/com/com.h>
+
+#include <iostream>
+
+class ConfigFileError
+ : public RTCError
+{
+public:
+ ConfigFileError(const char *pszMessage)
+ : RTCError(pszMessage) {}
+
+ ConfigFileError(const RTCString &a_rstrMessage)
+ : RTCError(a_rstrMessage) {}
+};
+
+
+Config::Config()
+ : m_strHome(),
+ m_strNetwork(),
+ m_strBaseName(),
+ m_strTrunk(),
+ m_enmTrunkType(kIntNetTrunkType_Invalid),
+ m_MacAddress(),
+ m_IPv4Address(),
+ m_IPv4Netmask(),
+ m_IPv4PoolFirst(),
+ m_IPv4PoolLast(),
+ m_GlobalOptions(),
+ m_VMMap()
+{
+ return;
+}
+
+
+int Config::init()
+{
+ int rc;
+
+ rc = homeInit();
+ if (RT_FAILURE(rc))
+ return rc;
+
+ return VINF_SUCCESS;
+}
+
+
+int Config::homeInit()
+{
+ int rc;
+
+ /* pathname of ~/.VirtualBox or equivalent */
+ char szHome[RTPATH_MAX];
+ rc = com::GetVBoxUserHomeDirectory(szHome, sizeof(szHome), false);
+ if (RT_FAILURE(rc))
+ {
+ LogDHCP(("unable to find VirtualBox home directory: %Rrs", rc));
+ return rc;
+ }
+
+ m_strHome.assign(szHome);
+ return VINF_SUCCESS;
+}
+
+
+void Config::setNetwork(const std::string &aStrNetwork)
+{
+ AssertReturnVoid(m_strNetwork.empty());
+
+ m_strNetwork = aStrNetwork;
+ sanitizeBaseName();
+}
+
+
+/*
+ * Requires network name to be known as the log file name depends on
+ * it. Alternatively, consider passing the log file name via the
+ * command line?
+ */
+int Config::logInit()
+{
+ int rc;
+ size_t cch;
+
+ if (m_strHome.empty() || m_strBaseName.empty())
+ return VERR_GENERAL_FAILURE;
+
+ /* default log file name */
+ char szLogFile[RTPATH_MAX];
+ cch = RTStrPrintf(szLogFile, sizeof(szLogFile),
+ "%s%c%s-Dhcpd.log",
+ m_strHome.c_str(), RTPATH_DELIMITER, m_strBaseName.c_str());
+ if (cch >= sizeof(szLogFile))
+ return VERR_BUFFER_OVERFLOW;
+
+
+ /* get a writable copy of the base name */
+ char szBaseName[RTPATH_MAX];
+ rc = RTStrCopy(szBaseName, sizeof(szBaseName), m_strBaseName.c_str());
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /* sanitize base name some more to be usable in an environment variable name */
+ for (char *p = szBaseName; *p != '\0'; ++p)
+ {
+ if ( *p != '_'
+ && (*p < '0' || '9' < *p)
+ && (*p < 'a' || 'z' < *p)
+ && (*p < 'A' || 'Z' < *p))
+ {
+ *p = '_';
+ }
+ }
+
+
+ /* name of the environment variable to control logging */
+ char szEnvVarBase[128];
+ cch = RTStrPrintf(szEnvVarBase, sizeof(szEnvVarBase),
+ "VBOXDHCP_%s_RELEASE_LOG", szBaseName);
+ if (cch >= sizeof(szEnvVarBase))
+ return VERR_BUFFER_OVERFLOW;
+
+
+ rc = com::VBoxLogRelCreate("DHCP Server",
+ szLogFile,
+ RTLOGFLAGS_PREFIX_TIME_PROG,
+ "all all.restrict -default.restrict",
+ szEnvVarBase,
+ RTLOGDEST_FILE
+#ifdef DEBUG
+ | RTLOGDEST_STDERR
+#endif
+ ,
+ 32768 /* cMaxEntriesPerGroup */,
+ 0 /* cHistory */,
+ 0 /* uHistoryFileTime */,
+ 0 /* uHistoryFileSize */,
+ NULL /* pErrInfo */);
+
+ return rc;
+}
+
+
+int Config::complete()
+{
+ int rc;
+
+ if (m_strNetwork.empty())
+ {
+ LogDHCP(("network name is not specified\n"));
+ return false;
+ }
+
+ logInit();
+
+ bool fMACGenerated = false;
+ if ( m_MacAddress.au16[0] == 0
+ && m_MacAddress.au16[1] == 0
+ && m_MacAddress.au16[2] == 0)
+ {
+ RTUUID Uuid;
+ RTUuidCreate(&Uuid);
+
+ m_MacAddress.au8[0] = 0x08;
+ m_MacAddress.au8[1] = 0x00;
+ m_MacAddress.au8[2] = 0x27;
+ m_MacAddress.au8[3] = Uuid.Gen.au8Node[3];
+ m_MacAddress.au8[4] = Uuid.Gen.au8Node[4];
+ m_MacAddress.au8[5] = Uuid.Gen.au8Node[5];
+
+ LogDHCP(("MAC address is not specified: will use generated MAC %RTmac\n", &m_MacAddress));
+ fMACGenerated = true;
+ }
+
+ /* unicast MAC address */
+ if (m_MacAddress.au8[0] & 0x01)
+ {
+ LogDHCP(("MAC address is not unicast: %RTmac\n", &m_MacAddress));
+ return VERR_GENERAL_FAILURE;
+ }
+
+ /* unicast IP address */
+ if ((m_IPv4Address.au8[0] & 0xe0) == 0xe0)
+ {
+ LogDHCP(("IP address is not unicast: %RTnaipv4\n", m_IPv4Address.u));
+ return VERR_GENERAL_FAILURE;
+ }
+
+ /* valid netmask */
+ int iPrefixLengh;
+ rc = RTNetMaskToPrefixIPv4(&m_IPv4Netmask, &iPrefixLengh);
+ if (RT_FAILURE(rc) || iPrefixLengh == 0)
+ {
+ LogDHCP(("IP mask is not valid: %RTnaipv4\n", m_IPv4Netmask.u));
+ return VERR_GENERAL_FAILURE;
+ }
+
+ /* first IP is from the same network */
+ if ((m_IPv4PoolFirst.u & m_IPv4Netmask.u) != (m_IPv4Address.u & m_IPv4Netmask.u))
+ {
+ LogDHCP(("first pool address is outside the network %RTnaipv4/%d: %RTnaipv4\n",
+ (m_IPv4Address.u & m_IPv4Netmask.u), iPrefixLengh,
+ m_IPv4PoolFirst.u));
+ return VERR_GENERAL_FAILURE;
+ }
+
+ /* last IP is from the same network */
+ if ((m_IPv4PoolLast.u & m_IPv4Netmask.u) != (m_IPv4Address.u & m_IPv4Netmask.u))
+ {
+ LogDHCP(("last pool address is outside the network %RTnaipv4/%d: %RTnaipv4\n",
+ (m_IPv4Address.u & m_IPv4Netmask.u), iPrefixLengh,
+ m_IPv4PoolLast.u));
+ return VERR_GENERAL_FAILURE;
+ }
+
+ /* the pool is valid */
+ if (RT_N2H_U32(m_IPv4PoolLast.u) < RT_N2H_U32(m_IPv4PoolFirst.u))
+ {
+ LogDHCP(("pool range is invalid: %RTnaipv4 - %RTnaipv4\n",
+ m_IPv4PoolFirst.u, m_IPv4PoolLast.u));
+ return VERR_GENERAL_FAILURE;
+ }
+
+ /* our own address is not inside the pool */
+ if ( RT_N2H_U32(m_IPv4PoolFirst.u) <= RT_N2H_U32(m_IPv4Address.u)
+ && RT_N2H_U32(m_IPv4Address.u) <= RT_N2H_U32(m_IPv4PoolLast.u))
+ {
+ LogDHCP(("server address inside the pool range %RTnaipv4 - %RTnaipv4: %RTnaipv4\n",
+ m_IPv4PoolFirst.u, m_IPv4PoolLast.u, m_IPv4Address.u));
+ return VERR_GENERAL_FAILURE;
+ }
+
+ if (!fMACGenerated)
+ LogDHCP(("MAC address %RTmac\n", &m_MacAddress));
+ LogDHCP(("IP address %RTnaipv4/%d\n", m_IPv4Address.u, iPrefixLengh));
+ LogDHCP(("address pool %RTnaipv4 - %RTnaipv4\n", m_IPv4PoolFirst.u, m_IPv4PoolLast.u));
+
+ return VINF_SUCCESS;
+}
+
+
+Config *Config::hardcoded()
+{
+ int rc;
+
+ std::unique_ptr<Config> config(new Config());
+ rc = config->init();
+ if (RT_FAILURE(rc))
+ return NULL;
+
+ config->setNetwork("HostInterfaceNetworking-vboxnet0");
+ config->m_strTrunk.assign("vboxnet0");
+ config->m_enmTrunkType = kIntNetTrunkType_NetFlt;
+
+ config->m_MacAddress.au8[0] = 0x08;
+ config->m_MacAddress.au8[1] = 0x00;
+ config->m_MacAddress.au8[2] = 0x27;
+ config->m_MacAddress.au8[3] = 0xa9;
+ config->m_MacAddress.au8[4] = 0xcf;
+ config->m_MacAddress.au8[5] = 0xef;
+
+
+ config->m_IPv4Address.u = RT_H2N_U32_C(0xc0a838fe); /* 192.168.56.254 */
+ config->m_IPv4Netmask.u = RT_H2N_U32_C(0xffffff00); /* 255.255.255.0 */
+
+ /* flip to test naks */
+#if 1
+ config->m_IPv4PoolFirst.u = RT_H2N_U32_C(0xc0a8385a); /* 192.168.56.90 */
+ config->m_IPv4PoolLast.u = RT_H2N_U32_C(0xc0a83863); /* 192.168.56.99 */
+#else
+ config->m_IPv4PoolFirst.u = RT_H2N_U32_C(0xc0a838c9); /* 192.168.56.201 */
+ config->m_IPv4PoolLast.u = RT_H2N_U32_C(0xc0a838dc); /* 192.168.56.220 */
+#endif
+
+ rc = config->complete();
+ AssertRCReturn(rc, NULL);
+
+ return config.release();
+}
+
+
+/* compatibility with old VBoxNetDHCP */
+static const RTGETOPTDEF g_aCompatOptions[] =
+{
+ { "--ip-address", 'i', RTGETOPT_REQ_IPV4ADDR },
+ { "--lower-ip", 'l', RTGETOPT_REQ_IPV4ADDR },
+ { "--mac-address", 'a', RTGETOPT_REQ_MACADDR },
+ { "--need-main", 'M', RTGETOPT_REQ_BOOL },
+ { "--netmask", 'm', RTGETOPT_REQ_IPV4ADDR },
+ { "--network", 'n', RTGETOPT_REQ_STRING },
+ { "--trunk-name", 't', RTGETOPT_REQ_STRING },
+ { "--trunk-type", 'T', RTGETOPT_REQ_STRING },
+ { "--upper-ip", 'u', RTGETOPT_REQ_IPV4ADDR },
+};
+
+
+Config *Config::compat(int argc, char **argv)
+{
+ RTGETOPTSTATE State;
+ int rc;
+
+ rc = RTGetOptInit(&State, argc, argv,
+ g_aCompatOptions, RT_ELEMENTS(g_aCompatOptions), 1,
+ RTGETOPTINIT_FLAGS_NO_STD_OPTS);
+ AssertRCReturn(rc, NULL);
+
+ std::unique_ptr<Config> config(new Config());
+ rc = config->init();
+ if (RT_FAILURE(rc))
+ return NULL;
+
+ for (;;)
+ {
+ RTGETOPTUNION Val;
+
+ rc = RTGetOpt(&State, &Val);
+ if (rc == 0) /* done */
+ break;
+
+ switch (rc)
+ {
+ case 'a': /* --mac-address */
+ if ( config->m_MacAddress.au16[0] != 0
+ || config->m_MacAddress.au16[1] != 0
+ || config->m_MacAddress.au16[2] != 0)
+ {
+ RTMsgError("Duplicate --mac-address option");
+ return NULL;
+ }
+ config->m_MacAddress = Val.MacAddr;
+ break;
+
+ case 'i': /* --ip-address */
+ if (config->m_IPv4Address.u != 0)
+ {
+ RTMsgError("Duplicate --ip-address option");
+ return NULL;
+ }
+ config->m_IPv4Address = Val.IPv4Addr;
+ break;
+
+ case 'l': /* --lower-ip */
+ if (config->m_IPv4PoolFirst.u != 0)
+ {
+ RTMsgError("Duplicate --lower-ip option");
+ return NULL;
+ }
+ config->m_IPv4PoolFirst = Val.IPv4Addr;
+ break;
+
+ case 'M': /* --need-main */
+ /* for backward compatibility, ignored */
+ break;
+
+ case 'm': /* --netmask */
+ if (config->m_IPv4Netmask.u != 0)
+ {
+ RTMsgError("Duplicate --netmask option");
+ return NULL;
+ }
+ config->m_IPv4Netmask = Val.IPv4Addr;
+ break;
+
+ case 'n': /* --network */
+ if (!config->m_strNetwork.empty())
+ {
+ RTMsgError("Duplicate --network option");
+ return NULL;
+ }
+ config->setNetwork(Val.psz);
+ break;
+
+ case 't': /* --trunk-name */
+ if (!config->m_strTrunk.empty())
+ {
+ RTMsgError("Duplicate --trunk-name option");
+ return NULL;
+ }
+ config->m_strTrunk.assign(Val.psz);
+ break;
+
+ case 'T': /* --trunk-type */
+ if (config->m_enmTrunkType != kIntNetTrunkType_Invalid)
+ {
+ RTMsgError("Duplicate --trunk-type option");
+ return NULL;
+ }
+ else if (strcmp(Val.psz, "none") == 0)
+ config->m_enmTrunkType = kIntNetTrunkType_None;
+ else if (strcmp(Val.psz, "whatever") == 0)
+ config->m_enmTrunkType = kIntNetTrunkType_WhateverNone;
+ else if (strcmp(Val.psz, "netflt") == 0)
+ config->m_enmTrunkType = kIntNetTrunkType_NetFlt;
+ else if (strcmp(Val.psz, "netadp") == 0)
+ config->m_enmTrunkType = kIntNetTrunkType_NetAdp;
+ else
+ {
+ RTMsgError("Unknown trunk type '%s'", Val.psz);
+ return NULL;
+ }
+ break;
+
+ case 'u': /* --upper-ip */
+ if (config->m_IPv4PoolLast.u != 0)
+ {
+ RTMsgError("Duplicate --upper-ip option");
+ return NULL;
+ }
+ config->m_IPv4PoolLast = Val.IPv4Addr;
+ break;
+
+ case VINF_GETOPT_NOT_OPTION:
+ RTMsgError("%s: Unexpected command line argument", Val.psz);
+ return NULL;
+
+ default:
+ RTGetOptPrintError(rc, &Val);
+ return NULL;
+ }
+ }
+
+ rc = config->complete();
+ if (RT_FAILURE(rc))
+ return NULL;
+
+ return config.release();
+}
+
+
+#define DHCPD_GETOPT_COMMENT 256 /* No short option for --comment */
+static const RTGETOPTDEF g_aOptions[] =
+{
+ { "--config", 'c', RTGETOPT_REQ_STRING },
+ { "--comment", DHCPD_GETOPT_COMMENT, RTGETOPT_REQ_STRING }
+};
+
+
+Config *Config::create(int argc, char **argv)
+{
+ RTGETOPTSTATE State;
+ int rc;
+
+ rc = RTGetOptInit(&State, argc, argv,
+ g_aOptions, RT_ELEMENTS(g_aOptions), 1,
+ RTGETOPTINIT_FLAGS_NO_STD_OPTS);
+ AssertRCReturn(rc, NULL);
+
+ std::unique_ptr<Config> config;
+ for (;;)
+ {
+ RTGETOPTUNION Val;
+
+ rc = RTGetOpt(&State, &Val);
+ if (rc == 0) /* done */
+ break;
+
+ switch (rc)
+ {
+ case 'c': /* --config */
+ if (config.get() != NULL)
+ {
+ printf("Duplicate option: --config '%s'\n", Val.psz);
+ return NULL;
+ }
+
+ printf("reading config from %s\n", Val.psz);
+ config.reset(Config::read(Val.psz));
+ if (config.get() == NULL)
+ return NULL;
+
+ break;
+
+ case DHCPD_GETOPT_COMMENT: /* --comment */
+ /* The sole purpose of this option is to allow identification of DHCP
+ * server instances in the process list. We ignore the required string
+ * argument of this option.
+ */
+ continue;
+
+ case VINF_GETOPT_NOT_OPTION:
+ RTMsgError("Unexpected command line argument: '%s'", Val.psz);
+ return NULL;
+
+ default:
+ RTGetOptPrintError(rc, &Val);
+ return NULL;
+ }
+ }
+
+ if (config.get() == NULL)
+ return NULL;
+
+ rc = config->complete();
+ if (RT_FAILURE(rc))
+ return NULL;
+
+ return config.release();
+}
+
+
+Config *Config::read(const char *pszFileName)
+{
+ int rc;
+
+ if (pszFileName == NULL || pszFileName[0] == '\0')
+ return NULL;
+
+ xml::Document doc;
+ try
+ {
+ xml::XmlFileParser parser;
+ parser.read(pszFileName, doc);
+ }
+ catch (const xml::EIPRTFailure &e)
+ {
+ LogDHCP(("%s\n", e.what()));
+ return NULL;
+ }
+ catch (const RTCError &e)
+ {
+ LogDHCP(("%s\n", e.what()));
+ return NULL;
+ }
+ catch (...)
+ {
+ LogDHCP(("Unknown exception while reading and parsing '%s'\n",
+ pszFileName));
+ return NULL;
+ }
+
+ std::unique_ptr<Config> config(new Config());
+ rc = config->init();
+ if (RT_FAILURE(rc))
+ return NULL;
+
+ try
+ {
+ config->parseConfig(doc.getRootElement());
+ }
+ catch (const RTCError &e)
+ {
+ LogDHCP(("%s\n", e.what()));
+ return NULL;
+ }
+ catch (...)
+ {
+ LogDHCP(("Unexpected exception\n"));
+ return NULL;
+ }
+
+ return config.release();
+}
+
+
+void Config::parseConfig(const xml::ElementNode *root)
+{
+ if (root == NULL)
+ throw ConfigFileError("Empty config file");
+
+ /*
+ * XXX: NAMESPACE API IS COMPLETELY BROKEN, SO IGNORE IT FOR NOW
+ */
+ if (!root->nameEquals("DHCPServer"))
+ {
+ const char *name = root->getName();
+ throw ConfigFileError(RTCStringFmt("Unexpected root element \"%s\"",
+ name ? name : "(null)"));
+ }
+
+ parseServer(root);
+
+ /** @todo r=bird: Visual C++ 2010 does not grok this use of 'auto'. */
+ // XXX: debug
+ for (optmap_t::const_iterator it = m_GlobalOptions.begin(); it != m_GlobalOptions.end(); ++it) {
+ std::shared_ptr<DhcpOption> opt(it->second);
+
+ octets_t data;
+ opt->encode(data);
+
+ bool space = false;
+ for (octets_t::const_iterator itData = data.begin(); itData != data.end(); ++itData) {
+ uint8_t c = *itData;
+ if (space)
+ std::cout << " ";
+ else
+ space = true;
+ std::cout << (int)c;
+ }
+ std::cout << std::endl;
+ }
+}
+
+
+static void getIPv4AddrAttribute(const xml::ElementNode *pNode, const char *pcszAttrName,
+ RTNETADDRIPV4 *pAddr)
+{
+ RTCString strAddr;
+ bool fHasAttr = pNode->getAttributeValue(pcszAttrName, &strAddr);
+ if (!fHasAttr)
+ throw ConfigFileError(RTCStringFmt("%s attribute missing",
+ pcszAttrName));
+
+ int rc = RTNetStrToIPv4Addr(strAddr.c_str(), pAddr);
+ if (RT_FAILURE(rc))
+ throw ConfigFileError(RTCStringFmt("%s attribute invalid",
+ pcszAttrName));
+}
+
+
+void Config::parseServer(const xml::ElementNode *server)
+{
+ /*
+ * DHCPServer attributes
+ */
+ RTCString strNetworkName;
+ bool fHasNetworkName = server->getAttributeValue("networkName", &strNetworkName);
+ if (!fHasNetworkName)
+ throw ConfigFileError("DHCPServer/@networkName missing");
+
+ setNetwork(strNetworkName.c_str());
+
+ RTCString strTrunkType;
+ if (!server->getAttributeValue("trunkType", &strTrunkType))
+ throw ConfigFileError("DHCPServer/@trunkType missing");
+ if (strTrunkType == "none")
+ m_enmTrunkType = kIntNetTrunkType_None;
+ else if (strTrunkType == "whatever")
+ m_enmTrunkType = kIntNetTrunkType_WhateverNone;
+ else if (strTrunkType == "netflt")
+ m_enmTrunkType = kIntNetTrunkType_NetFlt;
+ else if (strTrunkType == "netadp")
+ m_enmTrunkType = kIntNetTrunkType_NetAdp;
+ else
+ throw ConfigFileError(RTCStringFmt("Invalid DHCPServer/@trunkType value: %s", strTrunkType.c_str()));
+
+ if ( m_enmTrunkType == kIntNetTrunkType_NetFlt
+ || m_enmTrunkType == kIntNetTrunkType_NetAdp)
+ {
+ RTCString strTrunk;
+ if (!server->getAttributeValue("trunkName", &strTrunk))
+ throw ConfigFileError("DHCPServer/@trunkName missing");
+ m_strTrunk = strTrunk.c_str();
+ }
+ else
+ m_strTrunk = "";
+
+ getIPv4AddrAttribute(server, "IPAddress", &m_IPv4Address);
+ getIPv4AddrAttribute(server, "networkMask", &m_IPv4Netmask);
+ getIPv4AddrAttribute(server, "lowerIP", &m_IPv4PoolFirst);
+ getIPv4AddrAttribute(server, "upperIP", &m_IPv4PoolLast);
+
+ /*
+ * DHCPServer children
+ */
+ xml::NodesLoop it(*server);
+ const xml::ElementNode *node;
+ while ((node = it.forAllNodes()) != NULL)
+ {
+ /*
+ * Global options
+ */
+ if (node->nameEquals("Options"))
+ {
+ parseGlobalOptions(node);
+ }
+
+ /*
+ * Per-VM configuration
+ */
+ else if (node->nameEquals("Config"))
+ {
+ parseVMConfig(node);
+ }
+ }
+}
+
+
+void Config::parseGlobalOptions(const xml::ElementNode *options)
+{
+ xml::NodesLoop it(*options);
+ const xml::ElementNode *node;
+ while ((node = it.forAllNodes()) != NULL)
+ {
+ if (node->nameEquals("Option"))
+ {
+ parseOption(node, m_GlobalOptions);
+ }
+ else
+ {
+ throw ConfigFileError(RTCStringFmt("Unexpected element \"%s\"",
+ node->getName()));
+ }
+ }
+}
+
+
+/*
+ * VM Config entries are generated automatically from VirtualBox.xml
+ * with the MAC fetched from the VM config. The client id is nowhere
+ * in the picture there, so VM config is indexed with plain RTMAC, not
+ * ClientId (also see getOptions below).
+ */
+void Config::parseVMConfig(const xml::ElementNode *config)
+{
+ RTMAC mac;
+ int rc;
+
+ RTCString strMac;
+ bool fHasMac = config->getAttributeValue("MACAddress", &strMac);
+ if (!fHasMac)
+ throw ConfigFileError(RTCStringFmt("Config missing MACAddress attribute"));
+
+ rc = parseMACAddress(mac, strMac);
+ if (RT_FAILURE(rc))
+ {
+ throw ConfigFileError(RTCStringFmt("Malformed MACAddress attribute \"%s\"",
+ strMac.c_str()));
+ }
+
+ vmmap_t::iterator vmit( m_VMMap.find(mac) );
+ if (vmit != m_VMMap.end())
+ {
+ throw ConfigFileError(RTCStringFmt("Duplicate Config for MACAddress \"%s\"",
+ strMac.c_str()));
+ }
+
+ optmap_t &vmopts = m_VMMap[mac];
+
+ xml::NodesLoop it(*config);
+ const xml::ElementNode *node;
+ while ((node = it.forAllNodes()) != NULL)
+ if (node->nameEquals("Option"))
+ parseOption(node, vmopts);
+ else
+ throw ConfigFileError(RTCStringFmt("Unexpected element \"%s\"",
+ node->getName()));
+}
+
+
+int Config::parseMACAddress(RTMAC &aMac, const RTCString &aStr)
+{
+ RTMAC mac;
+ int rc;
+
+ rc = RTNetStrToMacAddr(aStr.c_str(), &mac);
+ if (RT_FAILURE(rc))
+ return rc;
+ if (rc == VWRN_TRAILING_CHARS)
+ return VERR_INVALID_PARAMETER;
+
+ aMac = mac;
+ return VINF_SUCCESS;
+}
+
+
+int Config::parseClientId(OptClientId &aId, const RTCString &aStr)
+{
+ RT_NOREF(aId, aStr);
+ return VERR_GENERAL_FAILURE;
+}
+
+
+/*
+ * Parse <Option/> element and add the option to the specified optmap.
+ */
+void Config::parseOption(const xml::ElementNode *option, optmap_t &optmap)
+{
+ int rc;
+
+ uint8_t u8Opt;
+ RTCString strName;
+ bool fHasName = option->getAttributeValue("name", &strName);
+ if (fHasName)
+ {
+ const char *pcszName = strName.c_str();
+
+ rc = RTStrToUInt8Full(pcszName, 10, &u8Opt);
+ if (rc != VINF_SUCCESS) /* no warnings either */
+ throw ConfigFileError(RTCStringFmt("Bad option \"%s\"", pcszName));
+
+ }
+ else
+ throw ConfigFileError("missing option name");
+
+
+ uint32_t u32Enc = 0; /* XXX: DhcpOptEncoding_Legacy */
+ RTCString strEncoding;
+ bool fHasEncoding = option->getAttributeValue("encoding", &strEncoding);
+ if (fHasEncoding)
+ {
+ const char *pcszEnc = strEncoding.c_str();
+
+ rc = RTStrToUInt32Full(pcszEnc, 10, &u32Enc);
+ if (rc != VINF_SUCCESS) /* no warnings either */
+ throw ConfigFileError(RTCStringFmt("Bad encoding \"%s\"", pcszEnc));
+
+ switch (u32Enc)
+ {
+ case 0: /* XXX: DhcpOptEncoding_Legacy */
+ case 1: /* XXX: DhcpOptEncoding_Hex */
+ break;
+ default:
+ throw ConfigFileError(RTCStringFmt("Unknown encoding \"%s\"", pcszEnc));
+ }
+ }
+
+
+ /* value may be omitted for OptNoValue options like rapid commit */
+ RTCString strValue;
+ option->getAttributeValue("value", &strValue);
+
+ /* XXX: TODO: encoding, handle hex */
+ DhcpOption *opt = DhcpOption::parse(u8Opt, u32Enc, strValue.c_str());
+ if (opt == NULL)
+ throw ConfigFileError(RTCStringFmt("Bad option \"%s\"", strName.c_str()));
+
+ optmap << opt;
+}
+
+
+/*
+ * Set m_strBaseName to sanitized version of m_strNetwork that can be
+ * used in a path component.
+ */
+void Config::sanitizeBaseName()
+{
+ int rc;
+
+ if (m_strNetwork.empty())
+ return;
+
+ char szBaseName[RTPATH_MAX];
+ rc = RTStrCopy(szBaseName, sizeof(szBaseName), m_strNetwork.c_str());
+ if (RT_FAILURE(rc))
+ return;
+
+ for (char *p = szBaseName; *p != '\0'; ++p)
+ {
+ if (RTPATH_IS_SEP(*p))
+ {
+ *p = '_';
+ }
+ }
+
+ m_strBaseName.assign(szBaseName);
+}
+
+
+optmap_t Config::getOptions(const OptParameterRequest &reqOpts,
+ const ClientId &id,
+ const OptVendorClassId &vendor) const
+{
+ optmap_t optmap;
+
+ const optmap_t *vmopts = NULL;
+ vmmap_t::const_iterator vmit( m_VMMap.find(id.mac()) );
+ if (vmit != m_VMMap.end())
+ vmopts = &vmit->second;
+
+ RT_NOREF(vendor); /* not yet */
+
+
+ optmap << new OptSubnetMask(m_IPv4Netmask);
+
+ const OptParameterRequest::value_t& reqValue = reqOpts.value();
+ for (octets_t::const_iterator itOptReq = reqValue.begin(); itOptReq != reqValue.end(); ++itOptReq)
+ {
+ uint8_t optreq = *itOptReq;
+ std::cout << ">>> requested option " << (int)optreq << std::endl;
+
+ if (optreq == OptSubnetMask::optcode)
+ {
+ std::cout << "... always supplied" << std::endl;
+ continue;
+ }
+
+ if (vmopts != NULL)
+ {
+ optmap_t::const_iterator it( vmopts->find(optreq) );
+ if (it != vmopts->end())
+ {
+ optmap << it->second;
+ std::cout << "... found in VM options" << std::endl;
+ continue;
+ }
+ }
+
+ optmap_t::const_iterator it( m_GlobalOptions.find(optreq) );
+ if (it != m_GlobalOptions.end())
+ {
+ optmap << it->second;
+ std::cout << "... found in global options" << std::endl;
+ continue;
+ }
+
+ // std::cout << "... not found" << std::endl;
+ }
+
+
+ /* XXX: testing ... */
+ if (vmopts != NULL)
+ {
+ for (optmap_t::const_iterator it = vmopts->begin(); it != vmopts->end(); ++it) {
+ std::shared_ptr<DhcpOption> opt(it->second);
+ if (optmap.count(opt->optcode()) == 0 && opt->optcode() > 127)
+ {
+ optmap << opt;
+ std::cout << "... forcing VM option " << (int)opt->optcode() << std::endl;
+ }
+ }
+ }
+
+ for (optmap_t::const_iterator it = m_GlobalOptions.begin(); it != m_GlobalOptions.end(); ++it) {
+ std::shared_ptr<DhcpOption> opt(it->second);
+ if (optmap.count(opt->optcode()) == 0 && opt->optcode() > 127)
+ {
+ optmap << opt;
+ std::cout << "... forcing global option " << (int)opt->optcode() << std::endl;
+ }
+ }
+
+ return optmap;
+}
diff --git a/src/VBox/NetworkServices/Dhcpd/Config.h b/src/VBox/NetworkServices/Dhcpd/Config.h
new file mode 100644
index 00000000..04d35959
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/Config.h
@@ -0,0 +1,110 @@
+/* $Id: Config.h $ */
+/** @file
+ * DHCP server - server configuration
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_Dhcpd_Config_h
+#define VBOX_INCLUDED_SRC_Dhcpd_Config_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <iprt/types.h>
+#include <iprt/net.h>
+#include <iprt/cpp/xml.h>
+
+#include <VBox/intnet.h>
+
+#include <string>
+
+#include "Defs.h"
+#include "DhcpOptions.h"
+#include "ClientId.h"
+
+
+class Config
+{
+ /* XXX: TODO: also store fixed address assignments, etc? */
+ typedef std::map<RTMAC, optmap_t> vmmap_t;
+
+ std::string m_strHome; /* path of ~/.VirtualBox or equivalent */
+
+ std::string m_strNetwork;
+ std::string m_strBaseName; /* m_strNetwork sanitized to be usable in a path component */
+
+ std::string m_strTrunk;
+ INTNETTRUNKTYPE m_enmTrunkType;
+
+ RTMAC m_MacAddress;
+
+ RTNETADDRIPV4 m_IPv4Address;
+ RTNETADDRIPV4 m_IPv4Netmask;
+
+ RTNETADDRIPV4 m_IPv4PoolFirst;
+ RTNETADDRIPV4 m_IPv4PoolLast;
+
+ optmap_t m_GlobalOptions;
+ vmmap_t m_VMMap;
+
+private:
+ Config();
+
+ int init();
+ int homeInit();
+ int logInit();
+ int complete();
+
+public: /* factory methods */
+ static Config *hardcoded(); /* for testing */
+ static Config *create(int argc, char **argv); /* --config */
+ static Config *compat(int argc, char **argv); /* old VBoxNetDHCP flags */
+
+public: /* accessors */
+ const std::string &getHome() const { return m_strHome; }
+
+ const std::string &getNetwork() const { return m_strNetwork; }
+ void setNetwork(const std::string &aStrNetwork);
+
+ const std::string &getBaseName() const { return m_strBaseName; }
+ const std::string &getTrunk() const { return m_strTrunk; }
+ INTNETTRUNKTYPE getTrunkType() const { return m_enmTrunkType; }
+
+ const RTMAC &getMacAddress() const { return m_MacAddress; }
+
+ RTNETADDRIPV4 getIPv4Address() const { return m_IPv4Address; }
+ RTNETADDRIPV4 getIPv4Netmask() const { return m_IPv4Netmask; }
+
+ RTNETADDRIPV4 getIPv4PoolFirst() const { return m_IPv4PoolFirst; }
+ RTNETADDRIPV4 getIPv4PoolLast() const { return m_IPv4PoolLast; }
+
+public:
+ optmap_t getOptions(const OptParameterRequest &reqOpts, const ClientId &id,
+ const OptVendorClassId &vendor = OptVendorClassId()) const;
+
+private:
+ static Config *read(const char *pszFileName);
+ void parseConfig(const xml::ElementNode *root);
+ void parseServer(const xml::ElementNode *server);
+ void parseGlobalOptions(const xml::ElementNode *options);
+ void parseVMConfig(const xml::ElementNode *config);
+ void parseOption(const xml::ElementNode *option, optmap_t &optmap);
+
+ int parseMACAddress(RTMAC &aMac, const RTCString &aStr);
+ int parseClientId(OptClientId &aId, const RTCString &aStr);
+
+ void sanitizeBaseName();
+};
+
+#endif /* !VBOX_INCLUDED_SRC_Dhcpd_Config_h */
diff --git a/src/VBox/NetworkServices/Dhcpd/DHCPD.cpp b/src/VBox/NetworkServices/Dhcpd/DHCPD.cpp
new file mode 100644
index 00000000..5ded7b38
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/DHCPD.cpp
@@ -0,0 +1,286 @@
+/* $Id: DHCPD.cpp $ */
+/** @file
+ * DHCP server - protocol logic
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include "DHCPD.h"
+#include "DhcpOptions.h"
+
+#include <iprt/path.h>
+
+
+DHCPD::DHCPD()
+ : m_pConfig(NULL), m_db()
+{
+}
+
+
+int DHCPD::init(const Config *pConfig)
+{
+ int rc;
+
+ if (m_pConfig != NULL)
+ return VERR_INVALID_STATE;
+
+ /* leases file name */
+ m_strLeasesFileName = pConfig->getHome();
+ m_strLeasesFileName += RTPATH_DELIMITER;
+ m_strLeasesFileName += pConfig->getBaseName();
+ m_strLeasesFileName += "-Dhcpd.leases";
+
+ rc = m_db.init(pConfig);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ loadLeases();
+
+ m_pConfig = pConfig;
+ return VINF_SUCCESS;
+}
+
+
+void DHCPD::loadLeases()
+{
+ m_db.loadLeases(m_strLeasesFileName);
+}
+
+
+void DHCPD::saveLeases()
+{
+ m_db.expire();
+ m_db.writeLeases(m_strLeasesFileName);
+}
+
+
+DhcpServerMessage *DHCPD::process(DhcpClientMessage &req)
+{
+ DhcpServerMessage *reply = NULL;
+
+ req.dump();
+
+ OptServerId sid(req);
+ if (sid.present() && sid.value().u != m_pConfig->getIPv4Address().u)
+ {
+ if (req.broadcasted() && req.messageType() == RTNET_DHCP_MT_REQUEST)
+ m_db.cancelOffer(req);
+
+ return NULL;
+ }
+
+ switch (req.messageType())
+ {
+ /*
+ * Requests that require server's reply.
+ */
+ case RTNET_DHCP_MT_DISCOVER:
+ reply = doDiscover(req);
+ break;
+
+ case RTNET_DHCP_MT_REQUEST:
+ reply = doRequest(req);
+ break;
+
+ case RTNET_DHCP_MT_INFORM:
+ reply = doInform(req);
+ break;
+
+ /*
+ * Requests that don't have a reply.
+ */
+ case RTNET_DHCP_MT_DECLINE:
+ doDecline(req);
+ break;
+
+ case RTNET_DHCP_MT_RELEASE:
+ doRelease(req);
+ break;
+
+ /*
+ * Unexpected or unknown message types.
+ */
+ case RTNET_DHCP_MT_OFFER: /* FALLTHROUGH */
+ case RTNET_DHCP_MT_ACK: /* FALLTHROUGH */
+ case RTNET_DHCP_MT_NAC: /* FALLTHROUGH */
+ default:
+ break;
+ }
+
+ return reply;
+}
+
+
+DhcpServerMessage *DHCPD::createMessage(int type, DhcpClientMessage &req)
+{
+ return new DhcpServerMessage(req, type, m_pConfig->getIPv4Address());
+}
+
+
+DhcpServerMessage *DHCPD::doDiscover(DhcpClientMessage &req)
+{
+ /*
+ * XXX: TODO: Windows iSCSI initiator sends DHCPDISCOVER first and
+ * it has ciaddr filled. Shouldn't let it screw up the normal
+ * lease we already have for that client, but we should probably
+ * reply with a pro-forma offer.
+ */
+ if (req.ciaddr().u != 0)
+ return NULL;
+
+ Binding *b = m_db.allocateBinding(req);
+ if (b == NULL)
+ return NULL;
+
+
+ std::unique_ptr<DhcpServerMessage> reply;
+
+ bool fRapidCommit = OptRapidCommit(req).present();
+ if (!fRapidCommit)
+ {
+ reply.reset(createMessage(RTNET_DHCP_MT_OFFER, req));
+
+ if (b->state() < Binding::OFFERED)
+ b->setState(Binding::OFFERED);
+
+ /* use small lease time internally to quickly free unclaimed offers? */
+ }
+ else
+ {
+ reply.reset(createMessage(RTNET_DHCP_MT_ACK, req));
+ reply->addOption(OptRapidCommit(true));
+
+ b->setState(Binding::ACKED);
+ saveLeases();
+ }
+
+ reply->setYiaddr(b->addr());
+ reply->addOption(OptLeaseTime(b->leaseTime()));
+
+
+ OptParameterRequest optlist(req);
+ reply->addOptions(m_pConfig->getOptions(optlist, req.clientId()));
+
+ // reply->maybeUnicast(req); /* XXX: we reject ciaddr != 0 above */
+ return reply.release();
+}
+
+
+DhcpServerMessage *DHCPD::doRequest(DhcpClientMessage &req)
+{
+ OptRequestedAddress reqAddr(req);
+ if (req.ciaddr().u != 0 && reqAddr.present() && reqAddr.value().u != req.ciaddr().u)
+ {
+ std::unique_ptr<DhcpServerMessage> nak (
+ createMessage(RTNET_DHCP_MT_NAC, req)
+ );
+ nak->addOption(OptMessage("Requested address does not match ciaddr"));
+ return nak.release();
+ }
+
+
+ Binding *b = m_db.allocateBinding(req);
+ if (b == NULL)
+ {
+ return createMessage(RTNET_DHCP_MT_NAC, req);
+ }
+
+
+ std::unique_ptr<DhcpServerMessage> ack (
+ createMessage(RTNET_DHCP_MT_ACK, req)
+ );
+
+ b->setState(Binding::ACKED);
+ saveLeases();
+
+ ack->setYiaddr(b->addr());
+ ack->addOption(OptLeaseTime(b->leaseTime()));
+
+ OptParameterRequest optlist(req);
+ ack->addOptions(m_pConfig->getOptions(optlist, req.clientId()));
+
+ ack->addOption(OptMessage("Ok, ok, here it is"));
+
+ ack->maybeUnicast(req);
+ return ack.release();
+}
+
+
+/*
+ * 4.3.5 DHCPINFORM message
+ *
+ * The server responds to a DHCPINFORM message by sending a DHCPACK
+ * message directly to the address given in the 'ciaddr' field of the
+ * DHCPINFORM message. The server MUST NOT send a lease expiration time
+ * to the client and SHOULD NOT fill in 'yiaddr'. The server includes
+ * other parameters in the DHCPACK message as defined in section 4.3.1.
+ */
+DhcpServerMessage *DHCPD::doInform(DhcpClientMessage &req)
+{
+ if (req.ciaddr().u == 0)
+ return NULL;
+
+ const OptParameterRequest params(req);
+ if (!params.present())
+ return NULL;
+
+ optmap_t info(m_pConfig->getOptions(params, req.clientId()));
+ if (info.empty())
+ return NULL;
+
+ std::unique_ptr<DhcpServerMessage> ack (
+ createMessage(RTNET_DHCP_MT_ACK, req)
+ );
+
+ ack->addOptions(info);
+
+ ack->maybeUnicast(req);
+ return ack.release();
+}
+
+
+/*
+ * 4.3.3 DHCPDECLINE message
+ *
+ * If the server receives a DHCPDECLINE message, the client has
+ * discovered through some other means that the suggested network
+ * address is already in use. The server MUST mark the network address
+ * as not available and SHOULD notify the local system administrator of
+ * a possible configuration problem.
+ */
+DhcpServerMessage *DHCPD::doDecline(DhcpClientMessage &req)
+{
+ RT_NOREF(req);
+ return NULL;
+}
+
+
+/*
+ * 4.3.4 DHCPRELEASE message
+ *
+ * Upon receipt of a DHCPRELEASE message, the server marks the network
+ * address as not allocated. The server SHOULD retain a record of the
+ * client's initialization parameters for possible reuse in response to
+ * subsequent requests from the client.
+ */
+DhcpServerMessage *DHCPD::doRelease(DhcpClientMessage &req)
+{
+ if (req.ciaddr().u == 0)
+ return NULL;
+
+ bool released = m_db.releaseBinding(req);
+ if (released)
+ saveLeases();
+
+ return NULL;
+}
diff --git a/src/VBox/NetworkServices/Dhcpd/DHCPD.h b/src/VBox/NetworkServices/Dhcpd/DHCPD.h
new file mode 100644
index 00000000..5857da78
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/DHCPD.h
@@ -0,0 +1,65 @@
+/* $Id: DHCPD.h $ */
+/** @file
+ * DHCP server - protocol logic
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_Dhcpd_DHCPD_h
+#define VBOX_INCLUDED_SRC_Dhcpd_DHCPD_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include "Defs.h"
+#include "Config.h"
+#include "DhcpMessage.h"
+#include "Db.h"
+
+
+class DHCPD
+{
+ const Config *m_pConfig;
+ std::string m_strLeasesFileName;
+ Db m_db;
+
+public:
+ DHCPD();
+
+ int init(const Config *);
+
+ DhcpServerMessage *process(const std::unique_ptr<DhcpClientMessage> &req)
+ {
+ if (req.get() == NULL)
+ return NULL;
+
+ return process(*req.get());
+ }
+
+ DhcpServerMessage *process(DhcpClientMessage &req);
+
+private:
+ DhcpServerMessage *doDiscover(DhcpClientMessage &req);
+ DhcpServerMessage *doRequest(DhcpClientMessage &req);
+ DhcpServerMessage *doInform(DhcpClientMessage &req);
+
+ DhcpServerMessage *doDecline(DhcpClientMessage &req);
+ DhcpServerMessage *doRelease(DhcpClientMessage &req);
+
+ DhcpServerMessage *createMessage(int type, DhcpClientMessage &req);
+
+ void loadLeases();
+ void saveLeases();
+};
+
+#endif /* !VBOX_INCLUDED_SRC_Dhcpd_DHCPD_h */
diff --git a/src/VBox/NetworkServices/Dhcpd/Db.cpp b/src/VBox/NetworkServices/Dhcpd/Db.cpp
new file mode 100644
index 00000000..dfafc784
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/Db.cpp
@@ -0,0 +1,747 @@
+/* $Id: Db.cpp $ */
+/** @file
+ * DHCP server - address database
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include <iprt/errcore.h>
+#include <iprt/stream.h>
+
+#include "Db.h"
+
+
+Db::Db()
+ : m_pConfig(NULL)
+{
+ return;
+}
+
+
+Db::~Db()
+{
+ /** @todo free bindings */
+}
+
+
+int Db::init(const Config *pConfig)
+{
+ Binding::registerFormat();
+
+ m_pConfig = pConfig;
+
+ m_pool.init(pConfig->getIPv4PoolFirst(),
+ pConfig->getIPv4PoolLast());
+
+ return VINF_SUCCESS;
+}
+
+
+bool Binding::g_fFormatRegistered = false;
+
+
+void Binding::registerFormat()
+{
+ if (g_fFormatRegistered)
+ return;
+
+ int rc = RTStrFormatTypeRegister("binding", rtStrFormat, NULL);
+ AssertRC(rc);
+
+ g_fFormatRegistered = true;
+}
+
+
+DECLCALLBACK(size_t)
+Binding::rtStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
+ const char *pszType, void const *pvValue,
+ int cchWidth, int cchPrecision, unsigned fFlags,
+ void *pvUser)
+{
+ const Binding *b = static_cast<const Binding *>(pvValue);
+ size_t cb = 0;
+
+ AssertReturn(strcmp(pszType, "binding") == 0, 0);
+ RT_NOREF(pszType);
+
+ RT_NOREF(cchWidth, cchPrecision, fFlags);
+ RT_NOREF(pvUser);
+
+ if (b == NULL)
+ {
+ return RTStrFormat(pfnOutput, pvArgOutput, NULL, 0,
+ "<NULL>");
+ }
+
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0,
+ "%RTnaipv4", b->m_addr.u);
+
+ if (b->m_state == Binding::FREE)
+ {
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0,
+ " free");
+ }
+ else
+ {
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0,
+ " to %R[id], %s, valid from ",
+ &b->m_id, b->stateName());
+
+ TimeStamp tsIssued = b->issued();
+ cb += tsIssued.absStrFormat(pfnOutput, pvArgOutput);
+
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0,
+ " for %ds until ",
+ b->leaseTime());
+
+ TimeStamp tsValid = b->issued();
+ tsValid.addSeconds(b->leaseTime());
+ cb += tsValid.absStrFormat(pfnOutput, pvArgOutput);
+ }
+
+ return cb;
+}
+
+const char *Binding::stateName() const
+{
+ switch (m_state) {
+ case FREE:
+ return "free";
+ case RELEASED:
+ return "released";
+ case EXPIRED:
+ return "expired";
+ case OFFERED:
+ return "offered";
+ case ACKED:
+ return "acked";
+ default:
+ return "released";
+ }
+}
+
+
+Binding &Binding::setState(const char *pszStateName)
+{
+ if (strcmp(pszStateName, "free") == 0)
+ m_state = Binding::FREE;
+ else if (strcmp(pszStateName, "released") == 0)
+ m_state = Binding::RELEASED;
+ else if (strcmp(pszStateName, "expired") == 0)
+ m_state = Binding::EXPIRED;
+ else if (strcmp(pszStateName, "offered") == 0)
+ m_state = Binding::OFFERED;
+ else if (strcmp(pszStateName, "acked") == 0)
+ m_state = Binding::ACKED;
+ else
+ m_state = Binding::RELEASED;
+
+ return *this;
+}
+
+
+bool Binding::expire(TimeStamp deadline)
+{
+ if (m_state <= Binding::EXPIRED)
+ return false;
+
+ TimeStamp t = m_issued;
+ t.addSeconds(m_secLease);
+
+ if (t < deadline)
+ {
+ if (m_state == Binding::OFFERED)
+ setState(Binding::FREE);
+ else
+ setState(Binding::EXPIRED);
+ }
+ return true;
+}
+
+
+int Binding::toXML(xml::ElementNode *ndParent) const
+{
+ int rc;
+
+ /*
+ * Lease
+ */
+ xml::ElementNode *ndLease = ndParent->createChild("Lease");
+ if (ndLease == NULL)
+ return VERR_GENERAL_FAILURE;
+
+ /* XXX: arrange for lease to get deleted if anything below fails */
+
+
+ ndLease->setAttribute("mac", RTCStringFmt("%RTmac", &m_id.mac()));
+ if (m_id.id().present())
+ {
+ /* I'd prefer RTSTRPRINTHEXBYTES_F_SEP_COLON but there's no decoder */
+ size_t cbStrId = m_id.id().value().size() * 2 + 1;
+ char *pszId = new char[cbStrId];
+ rc = RTStrPrintHexBytes(pszId, cbStrId,
+ &m_id.id().value().front(), m_id.id().value().size(),
+ 0);
+ ndLease->setAttribute("id", pszId);
+ delete[] pszId;
+ }
+
+ /* unused but we need it to keep the old code happy */
+ ndLease->setAttribute("network", "0.0.0.0");
+
+ ndLease->setAttribute("state", stateName());
+
+
+ /*
+ * Lease/Address
+ */
+ xml::ElementNode *ndAddr = ndLease->createChild("Address");
+ ndAddr->setAttribute("value", RTCStringFmt("%RTnaipv4", m_addr.u));
+
+
+ /*
+ * Lease/Time
+ */
+ xml::ElementNode *ndTime = ndLease->createChild("Time");
+ ndTime->setAttribute("issued", m_issued.getAbsSeconds());
+ ndTime->setAttribute("expiration", m_secLease);
+
+ return VINF_SUCCESS;
+}
+
+
+Binding *Binding::fromXML(const xml::ElementNode *ndLease)
+{
+ int rc;
+
+ /* Lease/@network seems to always have bogus value, ignore it. */
+
+ /*
+ * Lease/@mac
+ */
+ RTCString strMac;
+ bool fHasMac = ndLease->getAttributeValue("mac", &strMac);
+ if (!fHasMac)
+ return NULL;
+
+ RTMAC mac;
+ rc = RTNetStrToMacAddr(strMac.c_str(), &mac);
+ if (RT_FAILURE(rc))
+ return NULL;
+
+ OptClientId id;
+ RTCString strId;
+ bool fHasId = ndLease->getAttributeValue("id", &strId);
+ if (fHasId)
+ {
+ /*
+ * Decode from "de:ad:be:ef".
+ * XXX: RTStrConvertHexBytes() doesn't grok colons
+ */
+ size_t cbBytes = strId.length() / 2;
+ uint8_t *pBytes = new uint8_t[cbBytes];
+ rc = RTStrConvertHexBytes(strId.c_str(), pBytes, cbBytes, 0);
+ if (RT_SUCCESS(rc))
+ {
+ std::vector<uint8_t> rawopt(pBytes, pBytes + cbBytes);
+ id = OptClientId(rawopt);
+ }
+ delete[] pBytes;
+ }
+
+ /*
+ * Lease/@state - not present in old leases file. We will try to
+ * infer from lease time below.
+ */
+ RTCString strState;
+ bool fHasState = ndLease->getAttributeValue("state", &strState);
+
+ /*
+ * Lease/Address
+ */
+ const xml::ElementNode *ndAddress = ndLease->findChildElement("Address");
+ if (ndAddress == NULL)
+ return NULL;
+
+ /*
+ * Lease/Address/@value
+ */
+ RTCString strAddress;
+ bool fHasValue = ndAddress->getAttributeValue("value", &strAddress);
+ if (!fHasValue)
+ return NULL;
+
+ RTNETADDRIPV4 addr;
+ rc = RTNetStrToIPv4Addr(strAddress.c_str(), &addr);
+ if (RT_FAILURE(rc))
+ return NULL;
+
+ /*
+ * Lease/Time
+ */
+ const xml::ElementNode *ndTime = ndLease->findChildElement("Time");
+ if (ndTime == NULL)
+ return NULL;
+
+ /*
+ * Lease/Time/@issued
+ */
+ int64_t issued;
+ bool fHasIssued = ndTime->getAttributeValue("issued", &issued);
+ if (!fHasIssued)
+ return NULL;
+
+ /*
+ * Lease/Time/@expiration
+ */
+ uint32_t duration;
+ bool fHasExpiration = ndTime->getAttributeValue("expiration", &duration);
+ if (!fHasExpiration)
+ return NULL;
+
+ std::unique_ptr<Binding> b(new Binding(addr));
+ b->m_id = ClientId(mac, id);
+
+ if (fHasState)
+ {
+ b->m_issued = TimeStamp::absSeconds(issued);
+ b->m_secLease = duration;
+ b->setState(strState.c_str());
+ }
+ else
+ { /* XXX: old code wrote timestamps instead of absolute time. */
+ /* pretend that lease has just ended */
+ TimeStamp fakeIssued = TimeStamp::now();
+ fakeIssued.subSeconds(duration);
+ b->m_issued = fakeIssued;
+ b->m_secLease = duration;
+ b->m_state = Binding::EXPIRED;
+ }
+
+ return b.release();
+}
+
+
+void Db::expire()
+{
+ const TimeStamp now = TimeStamp::now();
+
+ for (bindings_t::iterator it = m_bindings.begin();
+ it != m_bindings.end(); ++it)
+ {
+ Binding *b = *it;
+ b->expire(now);
+ }
+}
+
+
+Binding *Db::createBinding(const ClientId &id)
+{
+ RTNETADDRIPV4 addr = m_pool.allocate();
+ if (addr.u == 0)
+ return NULL;
+
+ Binding *b = new Binding(addr, id);
+ m_bindings.push_front(b);
+ return b;
+}
+
+
+Binding *Db::createBinding(RTNETADDRIPV4 addr, const ClientId &id)
+{
+ bool fAvailable = m_pool.allocate(addr);
+ if (!fAvailable)
+ {
+ /*
+ * XXX: this should not happen. If the address is from the
+ * pool, which we have verified before, then either it's in
+ * the free pool or there's an binding (possibly free) for it.
+ */
+ return NULL;
+ }
+
+ Binding *b = new Binding(addr, id);
+ m_bindings.push_front(b);
+ return b;
+}
+
+
+Binding *Db::allocateAddress(const ClientId &id, RTNETADDRIPV4 addr)
+{
+ Assert(addr.u == 0 || addressBelongs(addr));
+
+ Binding *addrBinding = NULL;
+ Binding *freeBinding = NULL;
+ Binding *reuseBinding = NULL;
+
+ if (addr.u != 0)
+ LogDHCP(("> allocateAddress %RTnaipv4 to client %R[id]\n", addr.u, &id));
+ else
+ LogDHCP(("> allocateAddress to client %R[id]\n", &id));
+
+ /*
+ * Allocate existing address if client has one. Ignore requested
+ * address in that case. While here, look for free addresses and
+ * addresses that can be reused.
+ */
+ const TimeStamp now = TimeStamp::now();
+ for (bindings_t::iterator it = m_bindings.begin();
+ it != m_bindings.end(); ++it)
+ {
+ Binding *b = *it;
+ b->expire(now);
+
+ /*
+ * We've already seen this client, give it its old binding.
+ */
+ if (b->m_id == id)
+ {
+ LogDHCP(("> ... found existing binding %R[binding]\n", b));
+ return b;
+ }
+
+ if (addr.u != 0 && b->m_addr.u == addr.u)
+ {
+ Assert(addrBinding == NULL);
+ addrBinding = b;
+ LogDHCP(("> .... noted existing binding %R[binding]\n", addrBinding));
+ }
+
+ /* if we haven't found a free binding yet, keep looking */
+ if (freeBinding == NULL)
+ {
+ if (b->m_state == Binding::FREE)
+ {
+ freeBinding = b;
+ LogDHCP(("> .... noted free binding %R[binding]\n", freeBinding));
+ continue;
+ }
+
+ /* still no free binding, can this one be reused? */
+ if (b->m_state == Binding::RELEASED)
+ {
+ if ( reuseBinding == NULL
+ /* released binding is better than an expired one */
+ || reuseBinding->m_state == Binding::EXPIRED)
+ {
+ reuseBinding = b;
+ LogDHCP(("> .... noted released binding %R[binding]\n", reuseBinding));
+ }
+ }
+ else if (b->m_state == Binding::EXPIRED)
+ {
+ if ( reuseBinding == NULL
+ /* long expired binding is bettern than a recent one */
+ /* || (reuseBinding->m_state == Binding::EXPIRED && b->olderThan(reuseBinding)) */)
+ {
+ reuseBinding = b;
+ LogDHCP(("> .... noted expired binding %R[binding]\n", reuseBinding));
+ }
+ }
+ }
+ }
+
+ /*
+ * Allocate requested address if we can.
+ */
+ if (addr.u != 0)
+ {
+ if (addrBinding == NULL)
+ {
+ addrBinding = createBinding(addr, id);
+ Assert(addrBinding != NULL);
+ LogDHCP(("> .... creating new binding for this address %R[binding]\n",
+ addrBinding));
+ return addrBinding;
+ }
+
+ if (addrBinding->m_state <= Binding::EXPIRED) /* not in use */
+ {
+ LogDHCP(("> .... reusing %s binding for this address\n",
+ addrBinding->stateName()));
+ addrBinding->giveTo(id);
+ return addrBinding;
+ }
+ else
+ {
+ LogDHCP(("> .... cannot reuse %s binding for this address\n",
+ addrBinding->stateName()));
+ }
+ }
+
+ /*
+ * Allocate new (or reuse).
+ */
+ Binding *idBinding = NULL;
+ if (freeBinding != NULL)
+ {
+ idBinding = freeBinding;
+ LogDHCP(("> .... reusing free binding\n"));
+ }
+ else
+ {
+ idBinding = createBinding();
+ if (idBinding != NULL)
+ {
+ LogDHCP(("> .... creating new binding\n"));
+ }
+ else
+ {
+ idBinding = reuseBinding;
+ LogDHCP(("> .... reusing %s binding %R[binding]\n",
+ reuseBinding->stateName(), reuseBinding));
+ }
+ }
+
+ if (idBinding == NULL)
+ {
+ LogDHCP(("> .... failed to allocate binding\n"));
+ return NULL;
+ }
+
+ idBinding->giveTo(id);
+ LogDHCP(("> .... allocated %R[binding]\n", idBinding));
+
+ return idBinding;
+}
+
+
+
+Binding *Db::allocateBinding(const DhcpClientMessage &req)
+{
+ /** @todo XXX: handle fixed address assignments */
+ OptRequestedAddress reqAddr(req);
+ if (reqAddr.present() && !addressBelongs(reqAddr.value()))
+ {
+ if (req.messageType() == RTNET_DHCP_MT_DISCOVER)
+ {
+ LogDHCP(("DISCOVER: ignoring invalid requested address\n"));
+ reqAddr = OptRequestedAddress();
+ }
+ else
+ {
+ LogDHCP(("rejecting invalid requested address\n"));
+ return NULL;
+ }
+ }
+
+ const ClientId &id(req.clientId());
+
+ Binding *b = allocateAddress(id, reqAddr.value());
+ if (b == NULL)
+ return NULL;
+
+ Assert(b->id() == id);
+
+ /*
+ * XXX: handle requests for specific lease time!
+ * XXX: old lease might not have expired yet?
+ */
+ // OptLeaseTime reqLeaseTime(req);
+ b->setLeaseTime(1200);
+ return b;
+}
+
+
+int Db::addBinding(Binding *newb)
+{
+ if (!addressBelongs(newb->m_addr))
+ {
+ LogDHCP(("Binding for out of range address %RTnaipv4 ignored\n",
+ newb->m_addr.u));
+ return VERR_INVALID_PARAMETER;
+ }
+
+ for (bindings_t::iterator it = m_bindings.begin();
+ it != m_bindings.end(); ++it)
+ {
+ Binding *b = *it;
+
+ if (newb->m_addr.u == b->m_addr.u)
+ {
+ LogDHCP(("> ADD: %R[binding]\n", newb));
+ LogDHCP(("> .... duplicate ip: %R[binding]\n", b));
+ return VERR_INVALID_PARAMETER;
+ }
+
+ if (newb->m_id == b->m_id)
+ {
+ LogDHCP(("> ADD: %R[binding]\n", newb));
+ LogDHCP(("> .... duplicate id: %R[binding]\n", b));
+ return VERR_INVALID_PARAMETER;
+ }
+ }
+
+ bool ok = m_pool.allocate(newb->m_addr);
+ if (!ok)
+ {
+ LogDHCP(("> ADD: failed to claim IP %R[binding]\n", newb));
+ return VERR_INVALID_PARAMETER;
+ }
+
+ m_bindings.push_back(newb);
+ return VINF_SUCCESS;
+}
+
+
+void Db::cancelOffer(const DhcpClientMessage &req)
+{
+ const OptRequestedAddress reqAddr(req);
+ if (!reqAddr.present())
+ return;
+
+ const RTNETADDRIPV4 addr = reqAddr.value();
+ const ClientId &id(req.clientId());
+
+ for (bindings_t::iterator it = m_bindings.begin();
+ it != m_bindings.end(); ++it)
+ {
+ Binding *b = *it;
+
+ if (b->addr().u == addr.u && b->id() == id)
+ {
+ if (b->state() == Binding::OFFERED)
+ {
+ b->setLeaseTime(0);
+ b->setState(Binding::RELEASED);
+ }
+ return;
+ }
+ }
+}
+
+
+bool Db::releaseBinding(const DhcpClientMessage &req)
+{
+ const RTNETADDRIPV4 addr = req.ciaddr();
+ const ClientId &id(req.clientId());
+
+ for (bindings_t::iterator it = m_bindings.begin();
+ it != m_bindings.end(); ++it)
+ {
+ Binding *b = *it;
+
+ if (b->addr().u == addr.u && b->id() == id)
+ {
+ b->setState(Binding::RELEASED);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+int Db::writeLeases(const std::string &strFileName) const
+{
+ LogDHCP(("writing leases to %s\n", strFileName.c_str()));
+
+ xml::Document doc;
+
+ xml::ElementNode *root = doc.createRootElement("Leases");
+ if (root == NULL)
+ return VERR_INTERNAL_ERROR;
+
+ root->setAttribute("version", "1.0");
+
+ for (bindings_t::const_iterator it = m_bindings.begin();
+ it != m_bindings.end(); ++it)
+ {
+ const Binding *b = *it;
+ b->toXML(root);
+ }
+
+ try {
+ xml::XmlFileWriter writer(doc);
+ writer.write(strFileName.c_str(), true);
+ }
+ catch (const xml::EIPRTFailure &e)
+ {
+ LogDHCP(("%s\n", e.what()));
+ return e.rc();
+ }
+ catch (const RTCError &e)
+ {
+ LogDHCP(("%s\n", e.what()));
+ return VERR_GENERAL_FAILURE;
+ }
+ catch (...)
+ {
+ LogDHCP(("Unknown exception while writing '%s'\n",
+ strFileName.c_str()));
+ return VERR_GENERAL_FAILURE;
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+int Db::loadLeases(const std::string &strFileName)
+{
+ LogDHCP(("loading leases from %s\n", strFileName.c_str()));
+
+ xml::Document doc;
+ try
+ {
+ xml::XmlFileParser parser;
+ parser.read(strFileName.c_str(), doc);
+ }
+ catch (const xml::EIPRTFailure &e)
+ {
+ LogDHCP(("%s\n", e.what()));
+ return e.rc();
+ }
+ catch (const RTCError &e)
+ {
+ LogDHCP(("%s\n", e.what()));
+ return VERR_GENERAL_FAILURE;
+ }
+ catch (...)
+ {
+ LogDHCP(("Unknown exception while reading and parsing '%s'\n",
+ strFileName.c_str()));
+ return VERR_GENERAL_FAILURE;
+ }
+
+ xml::ElementNode *ndRoot = doc.getRootElement();
+ if (ndRoot == NULL || !ndRoot->nameEquals("Leases"))
+ {
+ return VERR_NOT_FOUND;
+ }
+
+ xml::NodesLoop it(*ndRoot);
+ const xml::ElementNode *node;
+ while ((node = it.forAllNodes()) != NULL)
+ {
+ if (!node->nameEquals("Lease"))
+ continue;
+
+ loadLease(node);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+void Db::loadLease(const xml::ElementNode *ndLease)
+{
+ Binding *b = Binding::fromXML(ndLease);
+ bool expired = b->expire();
+
+ if (!expired)
+ LogDHCP(("> LOAD: lease %R[binding]\n", b));
+ else
+ LogDHCP(("> LOAD: EXPIRED lease %R[binding]\n", b));
+
+ addBinding(b);
+}
diff --git a/src/VBox/NetworkServices/Dhcpd/Db.h b/src/VBox/NetworkServices/Dhcpd/Db.h
new file mode 100644
index 00000000..793101a7
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/Db.h
@@ -0,0 +1,162 @@
+/* $Id: Db.h $ */
+/** @file
+ * DHCP server - address database
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_Dhcpd_Db_h
+#define VBOX_INCLUDED_SRC_Dhcpd_Db_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <iprt/net.h>
+
+#include <iprt/cpp/xml.h>
+
+#include <list>
+
+#include "Defs.h"
+#include "TimeStamp.h"
+#include "ClientId.h"
+#include "IPv4Pool.h"
+#include "Config.h"
+#include "DhcpMessage.h"
+
+
+class Binding
+{
+ friend class Db;
+
+public:
+ enum State { FREE, RELEASED, EXPIRED, OFFERED, ACKED };
+
+private:
+ const RTNETADDRIPV4 m_addr;
+ State m_state;
+ ClientId m_id;
+ TimeStamp m_issued;
+ uint32_t m_secLease;
+
+public:
+ Binding();
+ Binding(const Binding &);
+
+ explicit Binding(RTNETADDRIPV4 addrParam)
+ : m_addr(addrParam), m_state(FREE),
+ m_issued(), m_secLease() {}
+
+ Binding(RTNETADDRIPV4 addrParam, const ClientId &idParam)
+ : m_addr(addrParam), m_state(FREE), m_id(idParam),
+ m_issued(), m_secLease() {}
+
+
+ RTNETADDRIPV4 addr() const { return m_addr; }
+
+ State state() const { return m_state; }
+ const char *stateName() const;
+
+ const ClientId &id() const { return m_id; }
+
+ uint32_t leaseTime() const { return m_secLease; }
+ TimeStamp issued() const { return m_issued; }
+
+ Binding &setState(State stateParam)
+ {
+ m_state = stateParam;
+ return *this;
+ }
+
+ Binding &setState(const char *pszStateName);
+
+ Binding &setLeaseTime(uint32_t secLease)
+ {
+ m_issued = TimeStamp::now();
+ m_secLease = secLease;
+ return *this;
+ }
+
+ Binding &giveTo(const ClientId &idParam)
+ {
+ m_id = idParam;
+ m_state = FREE;
+ return *this;
+ }
+
+ void free()
+ {
+ m_id = ClientId();
+ m_state = FREE;
+ }
+
+ bool expire(TimeStamp deadline);
+ bool expire() { return expire(TimeStamp::now()); }
+
+ static Binding *fromXML(const xml::ElementNode *ndLease);
+ int toXML(xml::ElementNode *ndParent) const;
+
+public:
+ static void registerFormat(); /* %R[binding] */
+
+private:
+ static bool g_fFormatRegistered;
+ static DECLCALLBACK(size_t) rtStrFormat(
+ PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
+ const char *pszType, void const *pvValue,
+ int cchWidth, int cchPrecision, unsigned fFlags,
+ void *pvUser);
+};
+
+
+class Db
+{
+private:
+ typedef std::list<Binding *> bindings_t;
+
+ const Config *m_pConfig;
+ bindings_t m_bindings;
+ IPv4Pool m_pool;
+
+public:
+ Db();
+ ~Db();
+
+ int init(const Config *pConfig);
+
+ bool addressBelongs(RTNETADDRIPV4 addr) const { return m_pool.contains(addr); }
+
+ Binding *allocateBinding(const DhcpClientMessage &req);
+ bool releaseBinding(const DhcpClientMessage &req);
+
+ void cancelOffer(const DhcpClientMessage &req);
+
+ void expire();
+
+public:
+ int loadLeases(const std::string &strFileName);
+ void loadLease(const xml::ElementNode *ndLease);
+
+ int writeLeases(const std::string &strFileName) const;
+
+private:
+ Binding *createBinding(const ClientId &id = ClientId());
+ Binding *createBinding(RTNETADDRIPV4 addr, const ClientId &id = ClientId());
+
+ Binding *allocateAddress(const ClientId &id, RTNETADDRIPV4 addr);
+
+ /* add binding e.g. from the leases file */
+ int addBinding(Binding *b);
+};
+
+#endif /* !VBOX_INCLUDED_SRC_Dhcpd_Db_h */
diff --git a/src/VBox/NetworkServices/Dhcpd/Defs.h b/src/VBox/NetworkServices/Dhcpd/Defs.h
new file mode 100644
index 00000000..ee1e0b95
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/Defs.h
@@ -0,0 +1,62 @@
+/* $Id: Defs.h $ */
+/** @file
+ * DHCP server - common definitions
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_Dhcpd_Defs_h
+#define VBOX_INCLUDED_SRC_Dhcpd_Defs_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <iprt/stdint.h>
+#include <iprt/string.h>
+#include <VBox/log.h>
+
+#include <map>
+#include <vector>
+
+#if __cplusplus >= 199711
+#include <memory>
+using std::shared_ptr;
+#else
+#include <tr1/memory>
+using std::tr1::shared_ptr;
+#endif
+
+typedef std::vector<uint8_t> octets_t;
+
+typedef std::map<uint8_t, octets_t> rawopts_t;
+
+class DhcpOption;
+typedef std::map<uint8_t, std::shared_ptr<DhcpOption> > optmap_t;
+
+inline bool operator==(const RTMAC &l, const RTMAC &r)
+{
+ return memcmp(&l, &r, sizeof(RTMAC)) == 0;
+}
+
+inline bool operator<(const RTMAC &l, const RTMAC &r)
+{
+ return memcmp(&l, &r, sizeof(RTMAC)) < 0;
+}
+
+#if 1
+#define LogDHCP LogRel
+#else
+#define LogDHCP(args) RTPrintf args
+#endif
+
+#endif /* !VBOX_INCLUDED_SRC_Dhcpd_Defs_h */
diff --git a/src/VBox/NetworkServices/Dhcpd/DhcpMessage.cpp b/src/VBox/NetworkServices/Dhcpd/DhcpMessage.cpp
new file mode 100644
index 00000000..2165959f
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/DhcpMessage.cpp
@@ -0,0 +1,412 @@
+/* $Id: DhcpMessage.cpp $ */
+/** @file
+ * DHCP Message and its de/serialization.
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include "DhcpMessage.h"
+#include "DhcpOptions.h"
+
+#include <iprt/string.h>
+#include <iprt/stream.h>
+
+
+
+DhcpMessage::DhcpMessage()
+ : m_xid(0), m_flags(0),
+ m_ciaddr(), m_yiaddr(), m_siaddr(), m_giaddr(),
+ m_sname(), m_file(),
+ m_optMessageType()
+{
+}
+
+
+/* static */
+DhcpClientMessage *DhcpClientMessage::parse(bool broadcasted, const void *buf, size_t buflen)
+{
+ if (buflen < RT_OFFSETOF(RTNETBOOTP, bp_vend.Dhcp.dhcp_opts))
+ {
+ RTPrintf("%s: %zu bytes datagram is too short\n", __FUNCTION__, buflen);
+ return NULL;
+ }
+
+ PCRTNETBOOTP bp = (PCRTNETBOOTP)buf;
+
+ if (bp->bp_op != RTNETBOOTP_OP_REQUEST)
+ {
+ RTPrintf("%s: bad opcode: %d\n", __FUNCTION__, bp->bp_op);
+ return NULL;
+ }
+
+ if (bp->bp_htype != RTNET_ARP_ETHER)
+ {
+ RTPrintf("%s: unsupported htype %d\n", __FUNCTION__, bp->bp_htype);
+ return NULL;
+ }
+
+ if (bp->bp_hlen != sizeof(RTMAC))
+ {
+ RTPrintf("%s: unexpected hlen %d\n", __FUNCTION__, bp->bp_hlen);
+ return NULL;
+ }
+
+ if ( (bp->bp_chaddr.Mac.au8[0] & 0x01) != 0
+ && (bp->bp_flags & RTNET_DHCP_FLAG_BROADCAST) == 0)
+ {
+ RTPrintf("%s: multicast chaddr %RTmac without broadcast flag\n",
+ __FUNCTION__, &bp->bp_chaddr.Mac);
+ }
+
+ /* we don't want to deal with forwarding */
+ if (bp->bp_giaddr.u != 0)
+ {
+ RTPrintf("%s: giaddr %RTnaipv4\n", __FUNCTION__, bp->bp_giaddr.u);
+ return NULL;
+ }
+
+ if (bp->bp_hops != 0)
+ {
+ RTPrintf("%s: non-zero hops %d\n", __FUNCTION__, bp->bp_hops);
+ return NULL;
+ }
+
+ std::unique_ptr<DhcpClientMessage> msg(new DhcpClientMessage());
+
+ msg->m_broadcasted = broadcasted;
+
+ msg->m_xid = bp->bp_xid;
+ msg->m_flags = bp->bp_flags;
+
+ msg->m_mac = bp->bp_chaddr.Mac;
+
+ msg->m_ciaddr = bp->bp_ciaddr;
+ msg->m_yiaddr = bp->bp_yiaddr;
+ msg->m_siaddr = bp->bp_siaddr;
+ msg->m_giaddr = bp->bp_giaddr;
+
+ if (bp->bp_vend.Dhcp.dhcp_cookie != RT_H2N_U32_C(RTNET_DHCP_COOKIE))
+ {
+ RTPrintf("bad cookie\n");
+ return NULL;
+ }
+
+ int overload;
+ overload = msg->parseOptions(&bp->bp_vend.Dhcp.dhcp_opts,
+ buflen - RT_OFFSETOF(RTNETBOOTP, bp_vend.Dhcp.dhcp_opts));
+ if (overload < 0)
+ return NULL;
+
+ /* "The 'file' field MUST be interpreted next ..." */
+ if (overload & DHCP_OPTION_OVERLOAD_FILE) {
+ int status = msg->parseOptions(bp->bp_file, sizeof(bp->bp_file));
+ if (status != 0)
+ return NULL;
+ }
+ else if (bp->bp_file[0] != '\0')
+ {
+ /* must be zero terminated, ignore if not */
+ const char *pszFile = (const char *)bp->bp_file;
+ size_t len = RTStrNLen(pszFile, sizeof(bp->bp_file));
+ if (len < sizeof(bp->bp_file))
+ msg->m_file.assign(pszFile, len);
+ }
+
+ /* "... followed by the 'sname' field." */
+ if (overload & DHCP_OPTION_OVERLOAD_SNAME) {
+ int status = msg->parseOptions(bp->bp_sname, sizeof(bp->bp_sname));
+ if (status != 0) /* NB: this includes "nested" Option Overload */
+ return NULL;
+ }
+ else if (bp->bp_sname[0] != '\0')
+ {
+ /* must be zero terminated, ignore if not */
+ const char *pszSName = (const char *)bp->bp_sname;
+ size_t len = RTStrNLen(pszSName, sizeof(bp->bp_sname));
+ if (len < sizeof(bp->bp_sname))
+ msg->m_sname.assign(pszSName, len);
+ }
+
+ msg->m_optMessageType = OptMessageType(*msg);
+ if (!msg->m_optMessageType.present())
+ return NULL;
+
+ msg->m_id = ClientId(msg->m_mac, OptClientId(*msg));
+
+ return msg.release();
+}
+
+
+int DhcpClientMessage::parseOptions(const void *buf, size_t buflen)
+{
+ uint8_t opt, optlen;
+ const uint8_t *data;
+ int overload;
+
+ overload = 0;
+
+ data = static_cast<const uint8_t *>(buf);
+ while (buflen > 0) {
+ opt = *data++;
+ --buflen;
+
+ if (opt == RTNET_DHCP_OPT_PAD) {
+ continue;
+ }
+
+ if (opt == RTNET_DHCP_OPT_END) {
+ break;
+ }
+
+ if (buflen == 0) {
+ RTPrintf("option %d has no length field\n", opt);
+ return -1;
+ }
+
+ optlen = *data++;
+ --buflen;
+
+ if (optlen > buflen) {
+ RTPrintf("option %d truncated (length %d, but only %lu bytes left)\n",
+ opt, optlen, (unsigned long)buflen);
+ return -1;
+ }
+
+#if 0
+ rawopts_t::const_iterator it(m_optmap.find(opt));
+ if (it != m_optmap.cend())
+ return -1;
+#endif
+ if (opt == RTNET_DHCP_OPT_OPTION_OVERLOAD) {
+ if (optlen != 1) {
+ RTPrintf("Overload Option (option %d) has invalid length %d\n",
+ opt, optlen);
+ return -1;
+ }
+
+ overload = *data;
+
+ if ((overload & ~DHCP_OPTION_OVERLOAD_MASK) != 0) {
+ RTPrintf("Overload Option (option %d) has invalid value 0x%x\n",
+ opt, overload);
+ return -1;
+ }
+ }
+ else
+ {
+ m_rawopts.insert(std::make_pair(opt, octets_t(data, data + optlen)));
+ }
+
+ data += optlen;
+ buflen -= optlen;
+ }
+
+ return overload;
+}
+
+
+void DhcpClientMessage::dump() const
+{
+ switch (m_optMessageType.value())
+ {
+ case RTNET_DHCP_MT_DISCOVER:
+ LogDHCP(("DISCOVER"));
+ break;
+
+ case RTNET_DHCP_MT_REQUEST:
+ LogDHCP(("REQUEST"));
+ break;
+
+ case RTNET_DHCP_MT_INFORM:
+ LogDHCP(("INFORM"));
+ break;
+
+ case RTNET_DHCP_MT_DECLINE:
+ LogDHCP(("DECLINE"));
+ break;
+
+ case RTNET_DHCP_MT_RELEASE:
+ LogDHCP(("RELEASE"));
+ break;
+
+ default:
+ LogDHCP(("<Unknown Mesage Type %d>", m_optMessageType.value()));
+ break;
+ }
+
+ if (OptRapidCommit(*this).present())
+ LogDHCP((" (rapid commit)"));
+
+
+ const OptServerId sid(*this);
+ if (sid.present())
+ LogDHCP((" for server %RTnaipv4", sid.value().u));
+
+ LogDHCP((" xid 0x%08x", m_xid));
+ LogDHCP((" chaddr %RTmac\n", &m_mac));
+
+ const OptClientId cid(*this);
+ if (cid.present()) {
+ if (cid.value().size() > 0)
+ LogDHCP((" client id: %.*Rhxs\n", cid.value().size(), &cid.value().front()));
+ else
+ LogDHCP((" client id: <empty>\n"));
+ }
+
+ LogDHCP((" ciaddr %RTnaipv4", m_ciaddr.u));
+ if (m_yiaddr.u != 0)
+ LogDHCP((" yiaddr %RTnaipv4", m_yiaddr.u));
+ if (m_siaddr.u != 0)
+ LogDHCP((" siaddr %RTnaipv4", m_siaddr.u));
+ if (m_giaddr.u != 0)
+ LogDHCP((" giaddr %RTnaipv4", m_giaddr.u));
+ LogDHCP(("%s\n", broadcast() ? "broadcast" : ""));
+
+
+ const OptRequestedAddress reqAddr(*this);
+ if (reqAddr.present())
+ LogDHCP((" requested address %RTnaipv4", reqAddr.value().u));
+ const OptLeaseTime reqLeaseTime(*this);
+ if (reqLeaseTime.present())
+ LogDHCP((" requested lease time %d", reqAddr.value()));
+ if (reqAddr.present() || reqLeaseTime.present())
+ LogDHCP(("\n"));
+
+ const OptParameterRequest params(*this);
+ if (params.present())
+ {
+ LogDHCP((" params {"));
+ typedef OptParameterRequest::value_t::const_iterator it_t;
+ for (it_t it = params.value().begin(); it != params.value().end(); ++it)
+ LogDHCP((" %d", *it));
+ LogDHCP((" }\n"));
+ }
+
+ bool fHeader = true;
+ for (rawopts_t::const_iterator it = m_rawopts.begin();
+ it != m_rawopts.end(); ++it)
+ {
+ const uint8_t optcode = (*it).first;
+ switch (optcode) {
+ case OptMessageType::optcode: /* FALLTHROUGH */
+ case OptClientId::optcode: /* FALLTHROUGH */
+ case OptRequestedAddress::optcode: /* FALLTHROUGH */
+ case OptLeaseTime::optcode: /* FALLTHROUGH */
+ case OptParameterRequest::optcode: /* FALLTHROUGH */
+ case OptRapidCommit::optcode:
+ break;
+
+ default:
+ if (fHeader)
+ {
+ LogDHCP((" other options:"));
+ fHeader = false;
+ }
+ LogDHCP((" %d", optcode));
+ break;
+ }
+ }
+ if (!fHeader)
+ LogDHCP(("\n"));
+}
+
+
+DhcpServerMessage::DhcpServerMessage(const DhcpClientMessage &req,
+ uint8_t messageTypeParam, RTNETADDRIPV4 serverAddr)
+ : DhcpMessage(),
+ m_optServerId(serverAddr)
+{
+ m_dst.u = 0xffffffff; /* broadcast */
+
+ m_optMessageType = OptMessageType(messageTypeParam);
+
+ /* copy values from the request (cf. RFC2131 Table 3) */
+ m_xid = req.xid();
+ m_flags = req.flags();
+ m_giaddr = req.giaddr();
+ m_mac = req.mac();
+
+ if (req.messageType() == RTNET_DHCP_MT_REQUEST)
+ m_ciaddr = req.ciaddr();
+}
+
+
+void DhcpServerMessage::maybeUnicast(const DhcpClientMessage &req)
+{
+ if (!req.broadcast() && req.ciaddr().u != 0)
+ setDst(req.ciaddr());
+}
+
+
+void DhcpServerMessage::addOption(DhcpOption *opt)
+{
+ m_optmap << opt;
+}
+
+
+void DhcpServerMessage::addOptions(const optmap_t &optmap)
+{
+ for (optmap_t::const_iterator it ( optmap.begin() );
+ it != optmap.end(); ++it)
+ {
+ m_optmap << it->second;
+ }
+}
+
+
+int DhcpServerMessage::encode(octets_t &data)
+{
+ /*
+ * Header, including DHCP cookie.
+ */
+ RTNETBOOTP bp;
+ RT_ZERO(bp);
+
+ bp.bp_op = RTNETBOOTP_OP_REPLY;
+ bp.bp_htype = RTNET_ARP_ETHER;
+ bp.bp_hlen = sizeof(RTMAC);
+
+ bp.bp_xid = m_xid;
+
+ bp.bp_ciaddr = m_ciaddr;
+ bp.bp_yiaddr = m_yiaddr;
+ bp.bp_siaddr = m_siaddr;
+ bp.bp_giaddr = m_giaddr;
+
+ bp.bp_chaddr.Mac = m_mac;
+
+ bp.bp_vend.Dhcp.dhcp_cookie = RT_H2N_U32_C(RTNET_DHCP_COOKIE);
+
+ data.insert(data.end(), (uint8_t *)&bp, (uint8_t *)&bp.bp_vend.Dhcp.dhcp_opts);
+
+ /*
+ * Options
+ */
+ data << m_optServerId
+ << m_optMessageType;
+
+ for (optmap_t::const_iterator it ( m_optmap.begin() );
+ it != m_optmap.end(); ++it)
+ {
+ RTPrintf("encoding option %d\n", it->first);
+ DhcpOption &opt = *it->second;
+ data << opt;
+ }
+
+ data << OptEnd();
+
+ if (data.size() < 548) /* XXX */
+ data.resize(548);
+
+ return VINF_SUCCESS;
+}
diff --git a/src/VBox/NetworkServices/Dhcpd/DhcpMessage.h b/src/VBox/NetworkServices/Dhcpd/DhcpMessage.h
new file mode 100644
index 00000000..303f352c
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/DhcpMessage.h
@@ -0,0 +1,138 @@
+/* $Id: DhcpMessage.h $ */
+/** @file
+ * DHCP Message and its de/serialization.
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_Dhcpd_DhcpMessage_h
+#define VBOX_INCLUDED_SRC_Dhcpd_DhcpMessage_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include "Defs.h"
+#include <iprt/net.h>
+#include <string>
+#include "ClientId.h"
+#include "DhcpOptions.h"
+
+
+/* move to <iptr/net.h>? */
+#define DHCP_OPTION_OVERLOAD_MASK 0x3
+#define DHCP_OPTION_OVERLOAD_FILE 0x1
+#define DHCP_OPTION_OVERLOAD_SNAME 0x2
+
+
+class DhcpMessage
+{
+protected:
+ uint32_t m_xid;
+ uint16_t m_flags;
+
+ RTMAC m_mac;
+
+ RTNETADDRIPV4 m_ciaddr;
+ RTNETADDRIPV4 m_yiaddr;
+ RTNETADDRIPV4 m_siaddr;
+ RTNETADDRIPV4 m_giaddr;
+
+ std::string m_sname;
+ std::string m_file;
+
+ OptMessageType m_optMessageType;
+
+public:
+ DhcpMessage();
+
+
+ uint32_t xid() const { return m_xid; }
+
+ uint16_t flags() const { return m_flags; }
+ bool broadcast() const { return (m_flags & RTNET_DHCP_FLAG_BROADCAST) != 0; }
+
+ const RTMAC &mac() const { return m_mac; }
+
+ RTNETADDRIPV4 ciaddr() const { return m_ciaddr; }
+ RTNETADDRIPV4 yiaddr() const { return m_yiaddr; }
+ RTNETADDRIPV4 siaddr() const { return m_siaddr; }
+ RTNETADDRIPV4 giaddr() const { return m_giaddr; }
+
+ void setCiaddr(RTNETADDRIPV4 addr) { m_ciaddr = addr; }
+ void setYiaddr(RTNETADDRIPV4 addr) { m_yiaddr = addr; }
+ void setSiaddr(RTNETADDRIPV4 addr) { m_siaddr = addr; }
+ void setGiaddr(RTNETADDRIPV4 addr) { m_giaddr = addr; }
+
+ uint8_t messageType() const
+ {
+ Assert(m_optMessageType.present());
+ return m_optMessageType.value();
+ }
+};
+
+
+class DhcpClientMessage
+ : public DhcpMessage
+{
+protected:
+ rawopts_t m_rawopts;
+ ClientId m_id;
+ bool m_broadcasted;
+
+public:
+ static DhcpClientMessage *parse(bool broadcasted, const void *buf, size_t buflen);
+
+ bool broadcasted() const { return m_broadcasted; }
+
+ const rawopts_t &rawopts() const { return m_rawopts; }
+ const ClientId &clientId() const { return m_id; }
+
+ void dump() const;
+
+protected:
+ int parseOptions(const void *buf, size_t buflen);
+};
+
+
+
+class DhcpServerMessage
+ : public DhcpMessage
+{
+protected:
+ RTNETADDRIPV4 m_dst;
+
+ OptServerId m_optServerId;
+
+ optmap_t m_optmap;
+
+public:
+ DhcpServerMessage(const DhcpClientMessage &req,
+ uint8_t messageType, RTNETADDRIPV4 serverAddr);
+
+ RTNETADDRIPV4 dst() const { return m_dst; }
+ void setDst(RTNETADDRIPV4 aDst) { m_dst = aDst; }
+
+ void maybeUnicast(const DhcpClientMessage &req);
+
+ void addOption(DhcpOption *opt);
+ void addOption(const DhcpOption &opt)
+ {
+ addOption(opt.clone());
+ }
+
+ void addOptions(const optmap_t &optmap);
+
+ int encode(octets_t &data);
+};
+
+#endif /* !VBOX_INCLUDED_SRC_Dhcpd_DhcpMessage_h */
diff --git a/src/VBox/NetworkServices/Dhcpd/DhcpOptions.cpp b/src/VBox/NetworkServices/Dhcpd/DhcpOptions.cpp
new file mode 100644
index 00000000..03820569
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/DhcpOptions.cpp
@@ -0,0 +1,242 @@
+/* $Id: DhcpOptions.cpp $ */
+/** @file
+ * DHCP server - DHCP options
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include "DhcpOptions.h"
+#include "DhcpMessage.h"
+
+
+optmap_t &operator<<(optmap_t &optmap, DhcpOption *option)
+{
+ if (option == NULL)
+ return optmap;
+
+ if (option->present())
+ optmap[option->optcode()] = std::shared_ptr<DhcpOption>(option);
+ else
+ optmap.erase(option->optcode());
+
+ return optmap;
+}
+
+
+optmap_t &operator<<(optmap_t &optmap, const std::shared_ptr<DhcpOption> &option)
+{
+ if (!option)
+ return optmap;
+
+ if (option->present())
+ optmap[option->optcode()] = option;
+ else
+ optmap.erase(option->optcode());
+
+ return optmap;
+}
+
+
+int DhcpOption::encode(octets_t &dst) const
+{
+ if (!m_fPresent)
+ return VERR_INVALID_STATE;
+
+ size_t cbOrig = dst.size();
+
+ append(dst, m_OptCode);
+ appendLength(dst, 0); /* placeholder */
+
+ ssize_t cbValue = encodeValue(dst);
+ if (cbValue < 0 || UINT8_MAX <= cbValue)
+ {
+ dst.resize(cbOrig); /* undo */
+ return VERR_INVALID_PARAMETER;
+ }
+
+ dst[cbOrig+1] = cbValue;
+ return VINF_SUCCESS;
+}
+
+
+/* static */
+const octets_t *DhcpOption::findOption(const rawopts_t &aOptMap, uint8_t aOptCode)
+{
+ rawopts_t::const_iterator it(aOptMap.find(aOptCode));
+ if (it == aOptMap.end())
+ return NULL;
+
+ return &it->second;
+}
+
+
+int DhcpOption::decode(const rawopts_t &map)
+{
+ const octets_t *rawopt = DhcpOption::findOption(map, m_OptCode);
+ if (rawopt == NULL)
+ return VERR_NOT_FOUND;
+
+ int rc = decodeValue(*rawopt, rawopt->size());
+ if (RT_FAILURE(rc))
+ return VERR_INVALID_PARAMETER;
+
+ return VINF_SUCCESS;
+}
+
+
+int DhcpOption::decode(const DhcpClientMessage &req)
+{
+ return decode(req.rawopts());
+}
+
+
+int DhcpOption::parse1(uint8_t &aValue, const char *pcszValue)
+{
+ int rc = RTStrToUInt8Full(RTStrStripL(pcszValue), 10, &aValue);
+
+ if (rc == VERR_TRAILING_SPACES)
+ rc = VINF_SUCCESS;
+ return rc;
+}
+
+
+int DhcpOption::parse1(uint16_t &aValue, const char *pcszValue)
+{
+ int rc = RTStrToUInt16Full(RTStrStripL(pcszValue), 10, &aValue);
+
+ if (rc == VERR_TRAILING_SPACES)
+ rc = VINF_SUCCESS;
+ return rc;
+}
+
+
+int DhcpOption::parse1(uint32_t &aValue, const char *pcszValue)
+{
+ int rc = RTStrToUInt32Full(RTStrStripL(pcszValue), 10, &aValue);
+
+ if (rc == VERR_TRAILING_SPACES)
+ rc = VINF_SUCCESS;
+ return rc;
+}
+
+
+int DhcpOption::parse1(RTNETADDRIPV4 &aValue, const char *pcszValue)
+{
+ return RTNetStrToIPv4Addr(pcszValue, &aValue);
+}
+
+
+int DhcpOption::parseList(std::vector<RTNETADDRIPV4> &aList, const char *pcszValue)
+{
+ std::vector<RTNETADDRIPV4> l;
+ int rc;
+
+ pcszValue = RTStrStripL(pcszValue);
+ do {
+ RTNETADDRIPV4 Addr;
+ char *pszNext;
+
+ rc = RTNetStrToIPv4AddrEx(pcszValue, &Addr, &pszNext);
+ if (RT_FAILURE(rc))
+ return VERR_INVALID_PARAMETER;
+
+ if (rc == VWRN_TRAILING_CHARS)
+ {
+ pcszValue = RTStrStripL(pszNext);
+ if (pcszValue == pszNext) /* garbage after address */
+ return VERR_INVALID_PARAMETER;
+ }
+
+ l.push_back(Addr);
+
+ /*
+ * If we got VINF_SUCCESS or VWRN_TRAILING_SPACES then this
+ * was the last address and we are done.
+ */
+ } while (rc == VWRN_TRAILING_CHARS);
+
+ aList.swap(l);
+ return VINF_SUCCESS;
+}
+
+
+/*
+ * XXX: See DHCPServer::encodeOption()
+ */
+int DhcpOption::parseHex(octets_t &aRawValue, const char *pcszValue)
+{
+ octets_t data;
+ char *pszNext;
+ int rc;
+
+ if (pcszValue == NULL || *pcszValue == '\0')
+ return VERR_INVALID_PARAMETER;
+
+ while (*pcszValue != '\0')
+ {
+ if (data.size() > UINT8_MAX)
+ return VERR_INVALID_PARAMETER;
+
+ uint8_t u8Byte;
+ rc = RTStrToUInt8Ex(pcszValue, &pszNext, 16, &u8Byte);
+ if (!RT_SUCCESS(rc))
+ return rc;
+
+ if (*pszNext == ':')
+ ++pszNext;
+ else if (*pszNext != '\0')
+ return VERR_PARSE_ERROR;
+
+ data.push_back(u8Byte);
+ pcszValue = pszNext;
+ }
+
+ aRawValue.swap(data);
+ return VINF_SUCCESS;
+}
+
+
+DhcpOption *DhcpOption::parse(uint8_t aOptCode, int aEnc, const char *pcszValue)
+{
+ switch (aEnc)
+ {
+ case 0: /* DhcpOptEncoding_Legacy */
+ switch (aOptCode)
+ {
+#define HANDLE(_OptClass) \
+ case _OptClass::optcode: \
+ return _OptClass::parse(pcszValue);
+
+ HANDLE(OptSubnetMask);
+ HANDLE(OptRouter);
+ HANDLE(OptDNS);
+ HANDLE(OptHostName);
+ HANDLE(OptDomainName);
+ HANDLE(OptRootPath);
+ HANDLE(OptLeaseTime);
+ HANDLE(OptRenewalTime);
+ HANDLE(OptRebindingTime);
+
+#undef HANDLE
+ default:
+ return NULL;
+ }
+ break;
+
+ case 1:
+ return RawOption::parse(aOptCode, pcszValue);
+
+ default:
+ return NULL;
+ }
+}
diff --git a/src/VBox/NetworkServices/Dhcpd/DhcpOptions.h b/src/VBox/NetworkServices/Dhcpd/DhcpOptions.h
new file mode 100644
index 00000000..b16782a4
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/DhcpOptions.h
@@ -0,0 +1,642 @@
+/* $Id: DhcpOptions.h $ */
+/** @file
+ * DHCP server - DHCP options
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_Dhcpd_DhcpOptions_h
+#define VBOX_INCLUDED_SRC_Dhcpd_DhcpOptions_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include "Defs.h"
+
+#include <string.h>
+
+#include <iprt/err.h>
+#include <iprt/types.h>
+#include <iprt/asm.h>
+#include <iprt/stdint.h>
+#include <iprt/net.h>
+
+#include <string>
+
+class DhcpClientMessage;
+
+
+class DhcpOption
+{
+ protected:
+ uint8_t m_OptCode;
+ bool m_fPresent;
+
+ public:
+ explicit DhcpOption(uint8_t aOptCode)
+ : m_OptCode(aOptCode), m_fPresent(true) {}
+
+ DhcpOption(uint8_t aOptCode, bool fPresent)
+ : m_OptCode(aOptCode), m_fPresent(fPresent) {}
+
+ virtual DhcpOption *clone() const = 0;
+
+ virtual ~DhcpOption() {}
+
+ public:
+ static DhcpOption *parse(uint8_t aOptCode, int aEnc, const char *pcszValue);
+
+ public:
+ uint8_t optcode() const { return m_OptCode; }
+ bool present() const { return m_fPresent; }
+
+ public:
+ int encode(octets_t &dst) const;
+
+ int decode(const rawopts_t &map);
+ int decode(const DhcpClientMessage &req);
+
+ protected:
+ virtual ssize_t encodeValue(octets_t &dst) const = 0;
+ virtual int decodeValue(const octets_t &src, size_t cb) = 0;
+
+ protected:
+ static const octets_t *findOption(const rawopts_t &aOptMap, uint8_t aOptCode);
+
+ protected:
+ /*
+ * Serialization
+ */
+ static void append(octets_t &aDst, uint8_t aValue)
+ {
+ aDst.push_back(aValue);
+ }
+
+ static void append(octets_t &aDst, uint16_t aValue)
+ {
+ RTUINT16U u16 = { RT_H2N_U16(aValue) };
+ aDst.insert(aDst.end(), u16.au8, u16.au8 + sizeof(aValue));
+ }
+
+ static void append(octets_t &aDst, uint32_t aValue)
+ {
+ RTUINT32U u32 = { RT_H2N_U32(aValue) };
+ aDst.insert(aDst.end(), u32.au8, u32.au8 + sizeof(aValue));
+ }
+
+ static void append(octets_t &aDst, RTNETADDRIPV4 aIPv4)
+ {
+ aDst.insert(aDst.end(), aIPv4.au8, aIPv4.au8 + sizeof(aIPv4));
+ }
+
+ static void append(octets_t &aDst, const char *pszString, size_t cb)
+ {
+ aDst.insert(aDst.end(), pszString, pszString + cb);
+ }
+
+ static void append(octets_t &aDst, const std::string &str)
+ {
+ append(aDst, str.c_str(), str.size());
+ }
+
+ /* non-overloaded name to avoid ambiguity */
+ static void appendLength(octets_t &aDst, size_t cb)
+ {
+ append(aDst, static_cast<uint8_t>(cb));
+ }
+
+
+ /*
+ * Deserialization
+ */
+ static void extract(uint8_t &aValue, octets_t::const_iterator &pos)
+ {
+ aValue = *pos;
+ pos += sizeof(uint8_t);
+ }
+
+ static void extract(uint16_t &aValue, octets_t::const_iterator &pos)
+ {
+ RTUINT16U u16;
+ memcpy(u16.au8, &pos[0], sizeof(uint16_t));
+ aValue = RT_N2H_U16(u16.u);
+ pos += sizeof(uint16_t);
+ }
+
+ static void extract(uint32_t &aValue, octets_t::const_iterator &pos)
+ {
+ RTUINT32U u32;
+ memcpy(u32.au8, &pos[0], sizeof(uint32_t));
+ aValue = RT_N2H_U32(u32.u);
+ pos += sizeof(uint32_t);
+ }
+
+ static void extract(RTNETADDRIPV4 &aValue, octets_t::const_iterator &pos)
+ {
+ memcpy(aValue.au8, &pos[0], sizeof(RTNETADDRIPV4));
+ pos += sizeof(RTNETADDRIPV4);
+ }
+
+ static void extract(std::string &aString, octets_t::const_iterator &pos, size_t cb)
+ {
+ aString.replace(aString.begin(), aString.end(), &pos[0], &pos[cb]);
+ pos += cb;
+ }
+
+
+ /*
+ * Parse textual representation (e.g. in config file)
+ */
+ static int parse1(uint8_t &aValue, const char *pcszValue);
+ static int parse1(uint16_t &aValue, const char *pcszValue);
+ static int parse1(uint32_t &aValue, const char *pcszValue);
+ static int parse1(RTNETADDRIPV4 &aValue, const char *pcszValue);
+
+ static int parseList(std::vector<RTNETADDRIPV4> &aList, const char *pcszValue);
+
+ static int parseHex(octets_t &aRawValue, const char *pcszValue);
+};
+
+
+inline octets_t &operator<<(octets_t &dst, const DhcpOption &option)
+{
+ option.encode(dst);
+ return dst;
+}
+
+
+optmap_t &operator<<(optmap_t &optmap, DhcpOption *option);
+optmap_t &operator<<(optmap_t &optmap, const std::shared_ptr<DhcpOption> &option);
+
+
+
+/*
+ * Only for << OptEnd() syntactic sugar...
+ */
+struct OptEnd {};
+inline octets_t &operator<<(octets_t &dst, const OptEnd &end)
+{
+ RT_NOREF(end);
+
+ dst.push_back(RTNET_DHCP_OPT_END);
+ return dst;
+}
+
+
+
+/*
+ * Option that has no value
+ */
+class OptNoValueBase
+ : public DhcpOption
+{
+ public:
+ explicit OptNoValueBase(uint8_t aOptCode)
+ : DhcpOption(aOptCode, false) {}
+
+ OptNoValueBase(uint8_t aOptCode, bool fPresent)
+ : DhcpOption(aOptCode, fPresent) {}
+
+ OptNoValueBase(uint8_t aOptCode, const DhcpClientMessage &req)
+ : DhcpOption(aOptCode, false)
+ {
+ decode(req);
+ }
+
+ virtual OptNoValueBase *clone() const
+ {
+ return new OptNoValueBase(*this);
+ }
+
+ protected:
+ virtual ssize_t encodeValue(octets_t &dst) const
+ {
+ RT_NOREF(dst);
+ return 0;
+ }
+
+ public:
+ static bool isLengthValid(size_t cb)
+ {
+ return cb == 0;
+ }
+
+ virtual int decodeValue(const octets_t &src, size_t cb)
+ {
+ RT_NOREF(src);
+
+ if (!isLengthValid(cb))
+ return VERR_INVALID_PARAMETER;
+
+ m_fPresent = true;
+ return VINF_SUCCESS;
+ }
+};
+
+template <uint8_t _OptCode>
+class OptNoValue
+ : public OptNoValueBase
+{
+ public:
+ static const uint8_t optcode = _OptCode;
+
+ OptNoValue()
+ : OptNoValueBase(optcode) {}
+
+ explicit OptNoValue(bool fPresent) /* there's no overloaded ctor with value */
+ : OptNoValueBase(optcode, fPresent) {}
+
+ explicit OptNoValue(const DhcpClientMessage &req)
+ : OptNoValueBase(optcode, req) {}
+};
+
+
+
+/*
+ * Option that contains single value of fixed-size type T
+ */
+template <typename T>
+class OptValueBase
+ : public DhcpOption
+{
+ public:
+ typedef T value_t;
+
+ protected:
+ T m_Value;
+
+ explicit OptValueBase(uint8_t aOptCode)
+ : DhcpOption(aOptCode, false), m_Value() {}
+
+ OptValueBase(uint8_t aOptCode, const T &aOptValue)
+ : DhcpOption(aOptCode), m_Value(aOptValue) {}
+
+ OptValueBase(uint8_t aOptCode, const DhcpClientMessage &req)
+ : DhcpOption(aOptCode, false), m_Value()
+ {
+ decode(req);
+ }
+
+ public:
+ virtual OptValueBase *clone() const
+ {
+ return new OptValueBase(*this);
+ }
+
+ public:
+ T &value() { return m_Value; }
+ const T &value() const { return m_Value; }
+
+ protected:
+ virtual ssize_t encodeValue(octets_t &dst) const
+ {
+ append(dst, m_Value);
+ return sizeof(T);
+ }
+
+ public:
+ static bool isLengthValid(size_t cb)
+ {
+ return cb == sizeof(T);
+ }
+
+ virtual int decodeValue(const octets_t &src, size_t cb)
+ {
+ if (!isLengthValid(cb))
+ return VERR_INVALID_PARAMETER;
+
+ octets_t::const_iterator pos(src.begin());
+ extract(m_Value, pos);
+
+ m_fPresent = true;
+ return VINF_SUCCESS;
+ }
+};
+
+template<uint8_t _OptCode, typename T>
+class OptValue
+ : public OptValueBase<T>
+{
+ public:
+ using typename OptValueBase<T>::value_t;
+
+ public:
+ static const uint8_t optcode = _OptCode;
+
+ OptValue()
+ : OptValueBase<T>(optcode) {}
+
+ explicit OptValue(const T &aOptValue)
+ : OptValueBase<T>(optcode, aOptValue) {}
+
+ explicit OptValue(const DhcpClientMessage &req)
+ : OptValueBase<T>(optcode, req) {}
+
+ static OptValue *parse(const char *pcszValue)
+ {
+ typename OptValueBase<T>::value_t v;
+ int rc = DhcpOption::parse1(v, pcszValue);
+ if (RT_FAILURE(rc))
+ return NULL;
+ return new OptValue(v);
+ }
+};
+
+
+
+/*
+ * Option that contains a string.
+ */
+class OptStringBase
+ : public DhcpOption
+{
+ public:
+ typedef std::string value_t;
+
+ protected:
+ std::string m_String;
+
+ explicit OptStringBase(uint8_t aOptCode)
+ : DhcpOption(aOptCode, false), m_String() {}
+
+ OptStringBase(uint8_t aOptCode, const std::string &aOptString)
+ : DhcpOption(aOptCode), m_String(aOptString) {}
+
+ OptStringBase(uint8_t aOptCode, const DhcpClientMessage &req)
+ : DhcpOption(aOptCode, false), m_String()
+ {
+ decode(req);
+ }
+
+ public:
+ virtual OptStringBase *clone() const
+ {
+ return new OptStringBase(*this);
+ }
+
+ public:
+ std::string &value() { return m_String; }
+ const std::string &value() const { return m_String; }
+
+ protected:
+ virtual ssize_t encodeValue(octets_t &dst) const
+ {
+ if (!isLengthValid(m_String.size()))
+ return -1;
+
+ append(dst, m_String);
+ return m_String.size();
+ }
+
+ public:
+ static bool isLengthValid(size_t cb)
+ {
+ return cb <= UINT8_MAX;
+ }
+
+ virtual int decodeValue(const octets_t &src, size_t cb)
+ {
+ if (!isLengthValid(cb))
+ return VERR_INVALID_PARAMETER;
+
+ octets_t::const_iterator pos(src.begin());
+ extract(m_String, pos, cb);
+ m_fPresent = true;
+ return VINF_SUCCESS;
+ }
+};
+
+template<uint8_t _OptCode>
+class OptString
+ : public OptStringBase
+{
+ public:
+ static const uint8_t optcode = _OptCode;
+
+ OptString()
+ : OptStringBase(optcode) {}
+
+ explicit OptString(const std::string &aOptString)
+ : OptStringBase(optcode, aOptString) {}
+
+ explicit OptString(const DhcpClientMessage &req)
+ : OptStringBase(optcode, req) {}
+
+ static OptString *parse(const char *pcszValue)
+ {
+ return new OptString(pcszValue);
+ }
+};
+
+
+
+/*
+ * Option that contains a list of values of type T
+ */
+template <typename T>
+class OptListBase
+ : public DhcpOption
+{
+ public:
+ typedef std::vector<T> value_t;
+
+ protected:
+ std::vector<T> m_List;
+
+ explicit OptListBase(uint8_t aOptCode)
+ : DhcpOption(aOptCode, false), m_List() {}
+
+ OptListBase(uint8_t aOptCode, const T &aOptSingle)
+ : DhcpOption(aOptCode), m_List(1, aOptSingle) {}
+
+ OptListBase(uint8_t aOptCode, const std::vector<T> &aOptList)
+ : DhcpOption(aOptCode), m_List(aOptList) {}
+
+ OptListBase(uint8_t aOptCode, const DhcpClientMessage &req)
+ : DhcpOption(aOptCode, false), m_List()
+ {
+ decode(req);
+ }
+
+ public:
+ virtual OptListBase *clone() const
+ {
+ return new OptListBase(*this);
+ }
+
+ public:
+ std::vector<T> &value() { return m_List; }
+ const std::vector<T> &value() const { return m_List; }
+
+ protected:
+ virtual ssize_t encodeValue(octets_t &dst) const
+ {
+ const size_t cbItem = sizeof(T);
+ size_t cbValue = 0;
+
+ for (size_t i = 0; i < m_List.size(); ++i)
+ {
+ if (cbValue + cbItem > UINT8_MAX)
+ break;
+
+ append(dst, m_List[i]);
+ cbValue += cbItem;
+ }
+
+ return cbValue;
+ }
+
+ public:
+ static bool isLengthValid(size_t cb)
+ {
+ return cb % sizeof(T) == 0;
+ }
+
+ virtual int decodeValue(const octets_t &src, size_t cb)
+ {
+ if (!isLengthValid(cb))
+ return VERR_INVALID_PARAMETER;
+
+ m_List.erase(m_List.begin(), m_List.end());
+
+ octets_t::const_iterator pos(src.begin());
+ for (size_t i = 0; i < cb / sizeof(T); ++i)
+ {
+ T item;
+ extract(item, pos);
+ m_List.push_back(item);
+ }
+ m_fPresent = true;
+ return VINF_SUCCESS;
+ }
+};
+
+template<uint8_t _OptCode, typename T>
+class OptList
+ : public OptListBase<T>
+
+{
+ public:
+ using typename OptListBase<T>::value_t;
+
+ public:
+ static const uint8_t optcode = _OptCode;
+
+ OptList()
+ : OptListBase<T>(optcode) {}
+
+ explicit OptList(const T &aOptSingle)
+ : OptListBase<T>(optcode, aOptSingle) {}
+
+ explicit OptList(const std::vector<T> &aOptList)
+ : OptListBase<T>(optcode, aOptList) {}
+
+ explicit OptList(const DhcpClientMessage &req)
+ : OptListBase<T>(optcode, req) {}
+
+ static OptList *parse(const char *pcszValue)
+ {
+ typename OptListBase<T>::value_t v;
+ int rc = DhcpOption::parseList(v, pcszValue);
+ if (RT_FAILURE(rc) || v.empty())
+ return NULL;
+ return new OptList(v);
+ }
+};
+
+
+/*
+ * Options specified by raw binary data that we don't know how to
+ * interpret.
+ */
+class RawOption
+ : public DhcpOption
+{
+ protected:
+ octets_t m_Data;
+
+ public:
+ explicit RawOption(uint8_t aOptCode)
+ : DhcpOption(aOptCode, false), m_Data() {}
+
+ RawOption(uint8_t aOptCode, const octets_t &aSrc)
+ : DhcpOption(aOptCode), m_Data(aSrc) {}
+
+ public:
+ virtual RawOption *clone() const
+ {
+ return new RawOption(*this);
+ }
+
+
+ protected:
+ virtual ssize_t encodeValue(octets_t &dst) const
+ {
+ dst.insert(dst.end(), m_Data.begin(), m_Data.end());
+ return m_Data.size();
+ }
+
+ virtual int decodeValue(const octets_t &src, size_t cb)
+ {
+ octets_t::const_iterator beg(src.begin());
+ octets_t data(beg, beg + cb);
+ m_Data.swap(data);
+
+ m_fPresent = true;
+ return VINF_SUCCESS;
+ }
+
+ public:
+ static RawOption *parse(uint8_t aOptCode, const char *pcszValue)
+ {
+ octets_t data;
+ int rc = DhcpOption::parseHex(data, pcszValue);
+ if (RT_FAILURE(rc))
+ return NULL;
+ return new RawOption(aOptCode, data);
+ }
+};
+
+
+
+/*
+ * Define the DHCP options we want to use.
+ */
+typedef OptValue<1, RTNETADDRIPV4> OptSubnetMask;
+typedef OptValue<2, uint32_t> OptTimeOffset;
+typedef OptList<3, RTNETADDRIPV4> OptRouter;
+typedef OptList<4, RTNETADDRIPV4> OptTimeServer;
+typedef OptList<6, RTNETADDRIPV4> OptDNS;
+typedef OptString<12> OptHostName;
+typedef OptString<15> OptDomainName;
+typedef OptString<17> OptRootPath;
+
+/* DHCP related options */
+typedef OptList<43, uint8_t> OptVendorSpecificInfo;
+typedef OptValue<50, RTNETADDRIPV4> OptRequestedAddress;
+typedef OptValue<51, uint32_t> OptLeaseTime;
+/* 52 - option overload is syntactic and handled internally */
+typedef OptValue<53, uint8_t> OptMessageType;
+typedef OptValue<54, RTNETADDRIPV4> OptServerId;
+typedef OptList<55, uint8_t> OptParameterRequest;
+typedef OptString<56> OptMessage;
+typedef OptValue<57, uint16_t> OptMaxDHCPMessageSize;
+typedef OptValue<58, uint32_t> OptRenewalTime;
+typedef OptValue<59, uint32_t> OptRebindingTime;
+typedef OptList<60, uint8_t> OptVendorClassId;
+typedef OptList<61, uint8_t> OptClientId;
+typedef OptString<66> OptTFTPServer; /* when overloaded */
+typedef OptString<67> OptBootFileName; /* when overloaded */
+typedef OptNoValue<80> OptRapidCommit; /* RFC4039 */
+
+#endif /* !VBOX_INCLUDED_SRC_Dhcpd_DhcpOptions_h */
diff --git a/src/VBox/NetworkServices/Dhcpd/IPv4Pool.cpp b/src/VBox/NetworkServices/Dhcpd/IPv4Pool.cpp
new file mode 100644
index 00000000..f43fbccf
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/IPv4Pool.cpp
@@ -0,0 +1,138 @@
+/* $Id: IPv4Pool.cpp $ */
+/** @file
+ * DHCP server - a pool of IPv4 addresses
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include <iprt/errcore.h>
+#include <iprt/stream.h>
+
+#include "IPv4Pool.h"
+
+
+int IPv4Pool::init(const IPv4Range &aRange)
+{
+ if (!aRange.isValid())
+ return VERR_INVALID_PARAMETER;
+
+ m_range = aRange;
+ m_pool.insert(m_range);
+ return VINF_SUCCESS;
+}
+
+
+int IPv4Pool::init(RTNETADDRIPV4 aFirstAddr, RTNETADDRIPV4 aLastAddr)
+{
+ IPv4Range range(aFirstAddr, aLastAddr);
+
+ if (!range.isValid())
+ return VERR_INVALID_PARAMETER;
+
+ m_range = range;
+ m_pool.insert(m_range);
+ return VINF_SUCCESS;
+}
+
+
+int IPv4Pool::insert(const IPv4Range &range)
+{
+ if (!m_range.isValid())
+ return VERR_INVALID_PARAMETER;
+
+ if (!m_range.contains(range))
+ return VERR_INVALID_PARAMETER;
+
+ it_t it = m_pool.upper_bound(IPv4Range(range.LastAddr)); /* successor */
+ if (it != m_pool.begin())
+ {
+ it_t prev(it);
+ --prev;
+ if (range.FirstAddr <= prev->LastAddr) {
+#if 1 /* XXX */
+ RTPrintf("%08x-%08x conflicts with %08x-%08x\n",
+ range.FirstAddr, range.LastAddr,
+ prev->FirstAddr, prev->LastAddr);
+#endif
+ return VERR_INVALID_PARAMETER;
+ }
+ }
+
+ m_pool.insert(it, range);
+ return VINF_SUCCESS;
+}
+
+
+RTNETADDRIPV4 IPv4Pool::allocate()
+{
+ if (m_pool.empty())
+ {
+ RTNETADDRIPV4 res = { 0 };
+ return res;
+ }
+
+ it_t beg = m_pool.begin();
+ ip_haddr_t addr = beg->FirstAddr;
+
+ if (beg->FirstAddr == beg->LastAddr)
+ {
+ m_pool.erase(beg);
+ }
+ else
+ {
+ IPv4Range trimmed = *beg;
+ ++trimmed.FirstAddr;
+ m_pool.erase(beg);
+ m_pool.insert(trimmed);
+ }
+
+ RTNETADDRIPV4 res = { RT_H2N_U32(addr) };
+ return res;
+}
+
+
+bool IPv4Pool::allocate(RTNETADDRIPV4 addr)
+{
+ it_t it = m_pool.lower_bound(IPv4Range(addr)); /* candidate range */
+ if (it == m_pool.end())
+ return false;
+
+ Assert(RT_N2H_U32(addr.u) <= it->LastAddr); /* by definition of < and lower_bound */
+
+ if (!it->contains(addr))
+ return false;
+
+ const ip_haddr_t haddr = RT_N2H_U32(addr.u);
+ ip_haddr_t first = it->FirstAddr;
+ ip_haddr_t last = it->LastAddr;
+
+ m_pool.erase(it);
+ if (first != last)
+ {
+ if (haddr == first)
+ {
+ insert(++first, last);
+ }
+ else if (haddr == last)
+ {
+ insert(first, --last);
+ }
+ else
+ {
+ insert(first, haddr - 1);
+ insert(haddr + 1, last);
+ }
+ }
+
+ return true;
+}
diff --git a/src/VBox/NetworkServices/Dhcpd/IPv4Pool.h b/src/VBox/NetworkServices/Dhcpd/IPv4Pool.h
new file mode 100644
index 00000000..dec9c6ac
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/IPv4Pool.h
@@ -0,0 +1,126 @@
+/* $Id: IPv4Pool.h $ */
+/** @file
+ * DHCP server - a pool of IPv4 addresses
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_Dhcpd_IPv4Pool_h
+#define VBOX_INCLUDED_SRC_Dhcpd_IPv4Pool_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <iprt/asm.h>
+#include <iprt/stdint.h>
+#include <iprt/net.h>
+#include <set>
+
+typedef uint32_t ip_haddr_t; /* in host order */
+
+
+/*
+ * A range of IPv4 addresses (in host order).
+ */
+struct IPv4Range
+{
+ ip_haddr_t FirstAddr;
+ ip_haddr_t LastAddr; /* inclusive */
+
+ IPv4Range()
+ : FirstAddr(), LastAddr() {}
+
+ explicit IPv4Range(ip_haddr_t aSingleAddr)
+ : FirstAddr(aSingleAddr), LastAddr(aSingleAddr) {}
+
+ IPv4Range(ip_haddr_t aFirstAddr, ip_haddr_t aLastAddr)
+ : FirstAddr(aFirstAddr), LastAddr(aLastAddr) {}
+
+ explicit IPv4Range(RTNETADDRIPV4 aSingleAddr)
+ : FirstAddr(RT_N2H_U32(aSingleAddr.u)), LastAddr(RT_N2H_U32(aSingleAddr.u)) {}
+
+ IPv4Range(RTNETADDRIPV4 aFirstAddr, RTNETADDRIPV4 aLastAddr)
+ : FirstAddr(RT_N2H_U32(aFirstAddr.u)), LastAddr(RT_N2H_U32(aLastAddr.u)) {}
+
+ bool isValid() const
+ {
+ return FirstAddr <= LastAddr;
+ }
+
+ bool contains(ip_haddr_t addr) const
+ {
+ return FirstAddr <= addr && addr <= LastAddr;
+ }
+
+ bool contains(RTNETADDRIPV4 addr) const
+ {
+ return contains(RT_N2H_U32(addr.u));
+ }
+
+ bool contains(const IPv4Range &range) const
+ {
+ return range.isValid() && FirstAddr <= range.FirstAddr && range.LastAddr <= LastAddr;
+ }
+};
+
+
+inline bool operator==(const IPv4Range &l, const IPv4Range &r)
+{
+ return l.FirstAddr == r.FirstAddr && l.LastAddr == r.LastAddr;
+}
+
+
+inline bool operator<(const IPv4Range &l, const IPv4Range &r)
+{
+ return l.LastAddr < r.FirstAddr;
+}
+
+
+class IPv4Pool
+{
+ typedef std::set<IPv4Range> set_t;
+ typedef set_t::iterator it_t;
+
+ IPv4Range m_range;
+ set_t m_pool;
+
+public:
+ IPv4Pool() {}
+
+ int init(const IPv4Range &aRange);
+ int init(RTNETADDRIPV4 aFirstAddr, RTNETADDRIPV4 aLastAddr);
+
+ bool contains(RTNETADDRIPV4 addr) const
+ { return m_range.contains(addr); }
+
+ int insert(const IPv4Range &range);
+
+#if 0
+ int insert(ip_haddr_t single)
+ { return insert(IPv4Range(single)); }
+#endif
+
+ int insert(ip_haddr_t first, ip_haddr_t last)
+ { return insert(IPv4Range(first, last)); }
+
+ int insert(RTNETADDRIPV4 single)
+ { return insert(IPv4Range(single)); }
+
+ int insert(RTNETADDRIPV4 first, RTNETADDRIPV4 last)
+ { return insert(IPv4Range(first, last)); }
+
+ RTNETADDRIPV4 allocate();
+ bool allocate(RTNETADDRIPV4);
+};
+
+#endif /* !VBOX_INCLUDED_SRC_Dhcpd_IPv4Pool_h */
diff --git a/src/VBox/NetworkServices/Dhcpd/Makefile.kmk b/src/VBox/NetworkServices/Dhcpd/Makefile.kmk
new file mode 100644
index 00000000..48e51ffa
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/Makefile.kmk
@@ -0,0 +1,93 @@
+# $Id: Makefile.kmk $
+## @file
+# Sub-makefile for the DHCP server
+#
+
+#
+# Copyright (C) 2006-2019 Oracle Corporation
+#
+# This file is part of VirtualBox Open Source Edition (OSE), as
+# available from http://www.virtualbox.org. This file is free software;
+# you can redistribute it and/or modify it under the terms of the GNU
+# General Public License (GPL) as published by the Free Software
+# Foundation, in version 2 as it comes in the "COPYING" file of the
+# VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+#
+
+SUB_DEPTH = ../../../..
+include $(KBUILD_PATH)/subheader.kmk
+# ifdef VBOX_WITH_...
+
+ VBOX_PATH_Dhcpd_SRC := $(PATH_SUB_CURRENT)
+
+ # XXX: do not depend on order
+ ifndef LWIP_SOURCES
+ include $(PATH_SUB_CURRENT)/../../Devices/Network/lwip-new/Config.kmk
+ endif
+
+ ifdef VBOX_WITH_HARDENING
+ PROGRAMS += VBoxNetDhcpdHardened
+ DLLS += VBoxNetDhcpd
+ else
+ PROGRAMS += VBoxNetDhcpd
+ endif
+
+ VBoxNetDhcpdHardened_TEMPLATE = VBOXR3HARDENEDEXE
+ VBoxNetDhcpdHardened_NAME = VBoxNetDHCP
+ VBoxNetDhcpdHardened_DEFS = SERVICE_NAME=\"VBoxNetDhcpd\"
+ VBoxNetDhcpdHardened_SOURCES = VBoxNetDhcpdHardened.cpp
+ VBoxNetDhcpdHardened_SOURCES.win = $(VBoxNetDhcpd_0_OUTDIR)/VBoxNetDhcpd-icon.rc
+ VBoxNetDhcpdHardened_LDFLAGS.win = /SUBSYSTEM:windows
+
+ VBoxNetDhcpd_TEMPLATE := $(if-expr defined(VBOX_WITH_HARDENING),VBoxR3Dll,VBOXR3EXE)
+ VBoxNetDhcpd_NAME = VBoxNetDHCP
+ # VBoxNetDhcpd_DEFS = IPv6
+ # VBoxNetDhcpd_DEFS.linux = WITH_VALGRIND
+ #VBoxNetDhcpd_DEFS.win = VBOX_COM_OUTOFPROC_MODULE _WIN32_WINNT=0x501 # Windows XP
+
+ # (current dir is for for lwipopts.h)
+ VBoxNetDhcpd_INCS += . $(addprefix ../../Devices/Network/lwip-new/,$(LWIP_INCS))
+
+ VBoxNetDhcpd_DEFS = KBUILD_TYPE=\"$(KBUILD_TYPE)\"
+ ifneq ($(KBUILD_TARGET),win)
+ VBoxNetDhcpd_DEFS += VBOX_WITH_XPCOM
+ VBoxNetDhcpd_INCS += $(VBOX_XPCOM_INCS)
+ ifneq ($(KBUILD_TARGET),darwin)
+ # We want -std=c++11 for 4.7 and newer compilers, and -std=c++0x for older ones.
+ VBoxNetDhcpd_CXXFLAGS += -std=$(if $(VBOX_GCC_VERSION_CXX),$(if $(VBOX_GCC_VERSION_CXX) < 40700,c++0x,c++11),c++0x)
+ endif
+ endif
+ VBoxNetDhcpd_SOURCES = ../../Main/glue/VBoxLogRelCreate.cpp \
+ ../../Main/glue/GetVBoxUserHomeDirectory.cpp \
+ ClientId.cpp \
+ Config.cpp \
+ DHCPD.cpp \
+ Db.cpp \
+ DhcpMessage.cpp \
+ DhcpOptions.cpp \
+ IPv4Pool.cpp \
+ TimeStamp.cpp \
+ VBoxNetDhcpd.cpp \
+ $(addprefix ../../Devices/Network/lwip-new/,$(LWIP_SOURCES))
+
+ VBoxNetDhcpd_LIBS = $(LIB_RUNTIME)
+
+ VBoxNetDhcpd_LIBS.solaris += socket nsl
+ VBoxNetDhcpd_LDFLAGS.win = /SUBSYSTEM:windows
+
+ ifeq ($(KBUILD_TARGET),win)
+ # Icon include file.
+ VBoxNetDhcpd_SOURCES += VBoxNetDhcpd.rc
+ VBoxNetDhcpd.rc_INCS = $(VBoxNetDhcpd_0_OUTDIR)
+ VBoxNetDhcpd.rc_DEPS = $(VBoxNetDhcpd_0_OUTDIR)/VBoxNetDhcpd-icon.rc
+ VBoxNetDhcpd.rc_CLEAN = $(VBoxNetDhcpd_0_OUTDIR)/VBoxNetDhcpd-icon.rc
+
+ $$(VBoxNetDhcpd_0_OUTDIR)/VBoxNetDhcpd-icon.rc: $(VBOX_WINDOWS_ICON_FILE) \
+ $$(VBoxNetDhcpd_DEFPATH)/Makefile.kmk | $$(dir $$@)
+ $(RM) -f $@
+ $(APPEND) $@ 'IDI_VIRTUALBOX ICON DISCARDABLE "$(subst /,\\,$(VBOX_WINDOWS_ICON_FILE))"'
+ endif # win
+
+# endif # VBOX_WITH_...
+include $(FILE_KBUILD_SUB_FOOTER)
diff --git a/src/VBox/NetworkServices/Dhcpd/TimeStamp.cpp b/src/VBox/NetworkServices/Dhcpd/TimeStamp.cpp
new file mode 100644
index 00000000..0a390ef8
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/TimeStamp.cpp
@@ -0,0 +1,36 @@
+/* $Id: TimeStamp.cpp $ */
+/** @file
+ * DHCP server - timestamps
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include "TimeStamp.h"
+
+#include <iprt/string.h>
+
+
+size_t TimeStamp::absStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput) const
+{
+ RTTIMESPEC Spec;
+ getAbsTimeSpec(&Spec);
+
+ RTTIME Time;
+ RTTimeExplode(&Time, &Spec);
+
+ size_t cb = RTStrFormat(pfnOutput, pvArgOutput, NULL, 0,
+ "%RI32-%02u-%02uT%02u:%02u:%02uZ",
+ Time.i32Year, Time.u8Month, Time.u8MonthDay,
+ Time.u8Hour, Time.u8Minute, Time.u8Second);
+ return cb;
+}
diff --git a/src/VBox/NetworkServices/Dhcpd/TimeStamp.h b/src/VBox/NetworkServices/Dhcpd/TimeStamp.h
new file mode 100644
index 00000000..9f95bc47
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/TimeStamp.h
@@ -0,0 +1,101 @@
+/* $Id: TimeStamp.h $ */
+/** @file
+ * DHCP server - timestamps
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_Dhcpd_TimeStamp_h
+#define VBOX_INCLUDED_SRC_Dhcpd_TimeStamp_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <iprt/string.h>
+#include <iprt/time.h>
+
+
+/*
+ * Timestamp API uses unsigned time, but we need to be able to refer
+ * to events in the past. Hide the ugly convertions.
+ */
+class TimeStamp
+{
+ int64_t m_ns;
+
+public:
+ TimeStamp()
+ : m_ns(0) {}
+
+ TimeStamp(uint64_t ns)
+ : m_ns(static_cast<int64_t>(ns)) {}
+
+ static TimeStamp now()
+ {
+ return TimeStamp(RTTimeNanoTS());
+ }
+
+ static TimeStamp absSeconds(int64_t sec)
+ {
+ RTTIMESPEC delta;
+ RTTimeNow(&delta);
+ RTTimeSpecSubSeconds(&delta, sec);
+
+ uint64_t stampNow = RTTimeNanoTS();
+ return TimeStamp(stampNow - RTTimeSpecGetNano(&delta));
+ }
+
+ TimeStamp &addSeconds(int64_t sec)
+ {
+ m_ns += sec * RT_NS_1SEC;
+ return *this;
+ }
+
+ TimeStamp &subSeconds(int64_t sec)
+ {
+ m_ns -= sec * RT_NS_1SEC;
+ return *this;
+ }
+
+
+ RTTIMESPEC *getAbsTimeSpec(RTTIMESPEC *pTime) const
+ {
+ RTTimeNow(pTime);
+
+ uint64_t stampNow = RTTimeNanoTS();
+ uint64_t delta = stampNow - m_ns;
+ RTTimeSpecSubNano(pTime, delta);
+ return pTime;
+ }
+
+ int64_t getAbsSeconds() const
+ {
+ RTTIMESPEC time;
+ return RTTimeSpecGetSeconds(getAbsTimeSpec(&time));
+ }
+
+ size_t absStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput) const;
+
+ friend bool operator<(const TimeStamp &l, const TimeStamp &r);
+ friend bool operator>(const TimeStamp &l, const TimeStamp &r);
+ friend bool operator<=(const TimeStamp &l, const TimeStamp &r);
+ friend bool operator>=(const TimeStamp &l, const TimeStamp &r);
+};
+
+
+inline bool operator<(const TimeStamp &l, const TimeStamp &r) { return l.m_ns < r.m_ns; }
+inline bool operator>(const TimeStamp &l, const TimeStamp &r) { return l.m_ns > r.m_ns; }
+inline bool operator<=(const TimeStamp &l, const TimeStamp &r) { return l.m_ns <= r.m_ns; }
+inline bool operator>=(const TimeStamp &l, const TimeStamp &r) { return l.m_ns >= r.m_ns; }
+
+#endif /* !VBOX_INCLUDED_SRC_Dhcpd_TimeStamp_h */
diff --git a/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpd.cpp b/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpd.cpp
new file mode 100644
index 00000000..9f180410
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpd.cpp
@@ -0,0 +1,805 @@
+/* $Id: VBoxNetDhcpd.cpp $ */
+/** @file
+ * VBoxNetDhcpd - DHCP server for host-only and NAT networks.
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include <iprt/cdefs.h>
+
+/*
+ * Need to get host/network order conversion stuff from Windows headers,
+ * so we do not define them in LWIP and then try to re-define them in
+ * Windows headers.
+ */
+#ifdef RT_OS_WINDOWS
+# include <iprt/win/winsock2.h>
+#endif
+
+
+#include <iprt/param.h>
+#include <iprt/errcore.h>
+
+#include <iprt/initterm.h>
+#include <iprt/message.h>
+
+#include <iprt/net.h>
+#include <iprt/path.h>
+#include <iprt/stream.h>
+
+#include <VBox/sup.h>
+#include <VBox/vmm/vmm.h>
+#include <VBox/vmm/pdmnetinline.h>
+#include <VBox/intnet.h>
+#include <VBox/intnetinline.h>
+
+#include "VBoxLwipCore.h"
+#include "Config.h"
+#include "DHCPD.h"
+#include "DhcpMessage.h"
+
+extern "C"
+{
+#include "lwip/sys.h"
+#include "lwip/pbuf.h"
+#include "lwip/netif.h"
+#include "lwip/tcpip.h"
+#include "lwip/udp.h"
+#include "netif/etharp.h"
+}
+
+#include <string>
+#include <vector>
+#include <memory>
+
+#ifdef RT_OS_WINDOWS
+# include <iprt/win/windows.h>
+#endif
+
+struct delete_pbuf
+{
+ delete_pbuf() {}
+ void operator()(struct pbuf *p) const { pbuf_free(p); }
+};
+
+typedef std::unique_ptr<pbuf, delete_pbuf> unique_ptr_pbuf;
+
+
+#define CALL_VMMR0(op, req) \
+ (SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, (op), 0, &(req).Hdr))
+
+
+class VBoxNetDhcpd
+{
+ DECLARE_CLS_COPY_CTOR_ASSIGN_NOOP(VBoxNetDhcpd);
+
+private:
+ PRTLOGGER m_pStderrReleaseLogger;
+
+ /* intnet plumbing */
+ PSUPDRVSESSION m_pSession;
+ INTNETIFHANDLE m_hIf;
+ PINTNETBUF m_pIfBuf;
+
+ /* lwip stack connected to the intnet */
+ struct netif m_LwipNetif;
+
+ Config *m_Config;
+
+ /* listening pcb */
+ struct udp_pcb *m_Dhcp4Pcb;
+
+ DHCPD m_server;
+
+public:
+ VBoxNetDhcpd();
+ ~VBoxNetDhcpd();
+
+ int main(int argc, char **argv);
+
+private:
+ int logInitStderr();
+
+ /*
+ * Boilerplate code.
+ */
+ int r3Init();
+ void r3Fini();
+
+ int vmmInit();
+
+ int ifInit(const std::string &strNetwork,
+ const std::string &strTrunk = std::string(),
+ INTNETTRUNKTYPE enmTrunkType = kIntNetTrunkType_WhateverNone);
+ int ifOpen(const std::string &strNetwork,
+ const std::string &strTrunk,
+ INTNETTRUNKTYPE enmTrunkType);
+ int ifGetBuf();
+ int ifActivate();
+
+ int ifWait(uint32_t cMillies = RT_INDEFINITE_WAIT);
+ int ifProcessInput();
+ int ifFlush();
+
+ int ifClose();
+
+ void ifPump();
+ int ifInput(void *pvSegFrame, uint32_t cbSegFrame);
+
+ int ifOutput(PCINTNETSEG paSegs, size_t cSegs, size_t cbFrame);
+
+
+ /*
+ * lwIP callbacks
+ */
+ static DECLCALLBACK(void) lwipInitCB(void *pvArg);
+ void lwipInit();
+
+ static err_t netifInitCB(netif *pNetif);
+ err_t netifInit(netif *pNetif);
+
+ static err_t netifLinkOutputCB(netif *pNetif, pbuf *pPBuf);
+ err_t netifLinkOutput(pbuf *pPBuf);
+
+ static void dhcp4RecvCB(void *arg, struct udp_pcb *pcb, struct pbuf *p,
+ ip_addr_t *addr, u16_t port);
+ void dhcp4Recv(struct udp_pcb *pcb, struct pbuf *p, ip_addr_t *addr, u16_t port);
+};
+
+
+VBoxNetDhcpd::VBoxNetDhcpd()
+ : m_pStderrReleaseLogger(NULL),
+ m_pSession(NIL_RTR0PTR),
+ m_hIf(INTNET_HANDLE_INVALID),
+ m_pIfBuf(NULL),
+ m_LwipNetif(),
+ m_Config(NULL),
+ m_Dhcp4Pcb(NULL)
+{
+ int rc;
+
+ logInitStderr();
+
+ rc = r3Init();
+ if (RT_FAILURE(rc))
+ return;
+
+ vmmInit();
+}
+
+
+VBoxNetDhcpd::~VBoxNetDhcpd()
+{
+ ifClose();
+ r3Fini();
+}
+
+
+/*
+ * We don't know the name of the release log file until we parse our
+ * configuration because we use network name as basename. To get
+ * early logging to work, start with stderr-only release logger.
+ *
+ * We disable "sup" for this logger to avoid spam from SUPR3Init().
+ */
+int VBoxNetDhcpd::logInitStderr()
+{
+ static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
+
+ PRTLOGGER pLogger;
+ int rc;
+
+ uint32_t fFlags = 0;
+#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
+ fFlags |= RTLOGFLAGS_USECRLF;
+#endif
+
+ rc = RTLogCreate(&pLogger, fFlags,
+ "all -sup all.restrict -default.restrict",
+ NULL, /* environment base */
+ RT_ELEMENTS(s_apszGroups), s_apszGroups,
+ RTLOGDEST_STDERR, NULL);
+ if (RT_FAILURE(rc))
+ {
+ RTPrintf("Failed to init stderr logger: %Rrs\n", rc);
+ return rc;
+ }
+
+ m_pStderrReleaseLogger = pLogger;
+ RTLogRelSetDefaultInstance(m_pStderrReleaseLogger);
+
+ return VINF_SUCCESS;
+}
+
+
+int VBoxNetDhcpd::r3Init()
+{
+ AssertReturn(m_pSession == NIL_RTR0PTR, VERR_GENERAL_FAILURE);
+
+ int rc = SUPR3Init(&m_pSession);
+ return rc;
+}
+
+
+void VBoxNetDhcpd::r3Fini()
+{
+ if (m_pSession == NIL_RTR0PTR)
+ return;
+
+ SUPR3Term();
+ m_pSession = NIL_RTR0PTR;
+}
+
+
+int VBoxNetDhcpd::vmmInit()
+{
+ int rc;
+ try {
+ std::vector<char> vExecDir(RTPATH_MAX);
+ rc = RTPathExecDir(&vExecDir.front(), vExecDir.size());
+ if (RT_FAILURE(rc))
+ return rc;
+ std::string strPath(&vExecDir.front());
+ strPath.append("/VMMR0.r0");
+
+ rc = SUPR3LoadVMM(strPath.c_str());
+ if (RT_FAILURE(rc))
+ return rc;
+
+ rc = VINF_SUCCESS;
+ }
+ catch (...)
+ {
+ rc = VERR_GENERAL_FAILURE;
+ }
+
+ return rc;
+}
+
+
+int VBoxNetDhcpd::ifInit(const std::string &strNetwork,
+ const std::string &strTrunk,
+ INTNETTRUNKTYPE enmTrunkType)
+{
+ int rc;
+
+ rc = ifOpen(strNetwork, strTrunk, enmTrunkType);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ rc = ifGetBuf();
+ if (RT_FAILURE(rc))
+ return rc;
+
+ rc = ifActivate();
+ if (RT_FAILURE(rc))
+ return rc;
+
+ return VINF_SUCCESS;
+}
+
+
+int VBoxNetDhcpd::ifOpen(const std::string &strNetwork,
+ const std::string &strTrunk,
+ INTNETTRUNKTYPE enmTrunkType)
+{
+ AssertReturn(m_pSession != NIL_RTR0PTR, VERR_GENERAL_FAILURE);
+ AssertReturn(m_hIf == INTNET_HANDLE_INVALID, VERR_GENERAL_FAILURE);
+
+ INTNETOPENREQ OpenReq;
+ int rc;
+
+ OpenReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
+ OpenReq.Hdr.cbReq = sizeof(OpenReq);
+ OpenReq.pSession = m_pSession;
+
+ strncpy(OpenReq.szNetwork, strNetwork.c_str(), sizeof(OpenReq.szNetwork));
+ OpenReq.szNetwork[sizeof(OpenReq.szNetwork) - 1] = '\0';
+
+ strncpy(OpenReq.szTrunk, strTrunk.c_str(), sizeof(OpenReq.szTrunk));
+ OpenReq.szTrunk[sizeof(OpenReq.szTrunk) - 1] = '\0';
+
+ if (enmTrunkType != kIntNetTrunkType_Invalid)
+ OpenReq.enmTrunkType = enmTrunkType;
+ else
+ OpenReq.enmTrunkType = kIntNetTrunkType_WhateverNone;
+
+ OpenReq.fFlags = 0;
+ OpenReq.cbSend = 128 * _1K;
+ OpenReq.cbRecv = 256 * _1K;
+
+ OpenReq.hIf = INTNET_HANDLE_INVALID;
+
+ rc = CALL_VMMR0(VMMR0_DO_INTNET_OPEN, OpenReq);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ m_hIf = OpenReq.hIf;
+ AssertReturn(m_hIf != INTNET_HANDLE_INVALID, VERR_GENERAL_FAILURE);
+
+ return VINF_SUCCESS;
+}
+
+
+int VBoxNetDhcpd::ifGetBuf()
+{
+ AssertReturn(m_pSession != NIL_RTR0PTR, VERR_GENERAL_FAILURE);
+ AssertReturn(m_hIf != INTNET_HANDLE_INVALID, VERR_GENERAL_FAILURE);
+ AssertReturn(m_pIfBuf == NULL, VERR_GENERAL_FAILURE);
+
+ INTNETIFGETBUFFERPTRSREQ GetBufferPtrsReq;
+ int rc;
+
+ GetBufferPtrsReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
+ GetBufferPtrsReq.Hdr.cbReq = sizeof(GetBufferPtrsReq);
+ GetBufferPtrsReq.pSession = m_pSession;
+ GetBufferPtrsReq.hIf = m_hIf;
+
+ GetBufferPtrsReq.pRing0Buf = NIL_RTR0PTR;
+ GetBufferPtrsReq.pRing3Buf = NULL;
+
+ rc = CALL_VMMR0(VMMR0_DO_INTNET_IF_GET_BUFFER_PTRS, GetBufferPtrsReq);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ m_pIfBuf = GetBufferPtrsReq.pRing3Buf;
+ AssertReturn(m_pIfBuf != NULL, VERR_GENERAL_FAILURE);
+
+ return VINF_SUCCESS;
+}
+
+
+int VBoxNetDhcpd::ifActivate()
+{
+ AssertReturn(m_pSession != NIL_RTR0PTR, VERR_GENERAL_FAILURE);
+ AssertReturn(m_hIf != INTNET_HANDLE_INVALID, VERR_GENERAL_FAILURE);
+ AssertReturn(m_pIfBuf != NULL, VERR_GENERAL_FAILURE);
+
+ INTNETIFSETACTIVEREQ ActiveReq;
+ int rc;
+
+ ActiveReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
+ ActiveReq.Hdr.cbReq = sizeof(ActiveReq);
+ ActiveReq.pSession = m_pSession;
+ ActiveReq.hIf = m_hIf;
+
+ ActiveReq.fActive = 1;
+
+ rc = CALL_VMMR0(VMMR0_DO_INTNET_IF_SET_ACTIVE, ActiveReq);
+ return rc;
+}
+
+
+void VBoxNetDhcpd::ifPump()
+{
+ for (;;)
+ {
+ int rc = ifWait();
+
+ if (RT_UNLIKELY(rc == VERR_INTERRUPTED))
+ continue;
+
+#if 0 /* we wait indefinitely */
+ if (rc == VERR_TIMEOUT)
+ ...;
+#endif
+
+ if (RT_FAILURE(rc))
+ return;
+
+ ifProcessInput();
+ }
+}
+
+
+int VBoxNetDhcpd::ifWait(uint32_t cMillies)
+{
+ AssertReturn(m_pSession != NIL_RTR0PTR, VERR_GENERAL_FAILURE);
+ AssertReturn(m_hIf != INTNET_HANDLE_INVALID, VERR_GENERAL_FAILURE);
+
+ INTNETIFWAITREQ WaitReq;
+ int rc;
+
+ WaitReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
+ WaitReq.Hdr.cbReq = sizeof(WaitReq);
+ WaitReq.pSession = m_pSession;
+ WaitReq.hIf = m_hIf;
+
+ WaitReq.cMillies = cMillies;
+
+ rc = CALL_VMMR0(VMMR0_DO_INTNET_IF_WAIT, WaitReq);
+ return rc;
+}
+
+
+int VBoxNetDhcpd::ifProcessInput()
+{
+ AssertReturn(m_pSession != NIL_RTR0PTR, VERR_GENERAL_FAILURE);
+ AssertReturn(m_hIf != INTNET_HANDLE_INVALID, VERR_GENERAL_FAILURE);
+ AssertReturn(m_pIfBuf != NULL, VERR_GENERAL_FAILURE);
+
+ for (PCINTNETHDR pHdr;
+ (pHdr = IntNetRingGetNextFrameToRead(&m_pIfBuf->Recv)) != NULL;
+ IntNetRingSkipFrame(&m_pIfBuf->Recv))
+ {
+ const uint8_t u8Type = pHdr->u8Type;
+ void *pvSegFrame;
+ uint32_t cbSegFrame;
+
+ if (u8Type == INTNETHDR_TYPE_FRAME)
+ {
+ pvSegFrame = IntNetHdrGetFramePtr(pHdr, m_pIfBuf);
+ cbSegFrame = pHdr->cbFrame;
+
+ ifInput(pvSegFrame, cbSegFrame);
+ }
+ else if (u8Type == INTNETHDR_TYPE_GSO)
+ {
+ PCPDMNETWORKGSO pGso;
+ size_t cbGso = pHdr->cbFrame;
+ size_t cbFrame = cbGso - sizeof(PDMNETWORKGSO);
+
+ pGso = IntNetHdrGetGsoContext(pHdr, m_pIfBuf);
+ if (!PDMNetGsoIsValid(pGso, cbGso, cbFrame))
+ continue;
+
+ const uint32_t cSegs = PDMNetGsoCalcSegmentCount(pGso, cbFrame);
+ for (uint32_t i = 0; i < cSegs; ++i)
+ {
+ uint8_t abHdrScratch[256];
+ pvSegFrame = PDMNetGsoCarveSegmentQD(pGso, (uint8_t *)(pGso + 1), cbFrame,
+ abHdrScratch,
+ i, cSegs,
+ &cbSegFrame);
+ ifInput(pvSegFrame, (uint32_t)cbFrame);
+ }
+ }
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/*
+ * Got a frame from the internal network, feed it to the lwIP stack.
+ */
+int VBoxNetDhcpd::ifInput(void *pvFrame, uint32_t cbFrame)
+{
+ if (pvFrame == NULL)
+ return VERR_INVALID_PARAMETER;
+
+ if ( cbFrame <= sizeof(RTNETETHERHDR)
+ || cbFrame > UINT16_MAX - ETH_PAD_SIZE)
+ return VERR_INVALID_PARAMETER;
+
+ struct pbuf *p = pbuf_alloc(PBUF_RAW, (u16_t)cbFrame + ETH_PAD_SIZE, PBUF_POOL);
+ if (RT_UNLIKELY(p == NULL))
+ return VERR_NO_MEMORY;
+
+ /*
+ * The code below is inlined version of:
+ *
+ * pbuf_header(p, -ETH_PAD_SIZE); // hide padding
+ * pbuf_take(p, pvFrame, cbFrame);
+ * pbuf_header(p, ETH_PAD_SIZE); // reveal padding
+ */
+ struct pbuf *q = p;
+ uint8_t *pu8Chunk = (uint8_t *)pvFrame;
+ do {
+ uint8_t *payload = (uint8_t *)q->payload;
+ size_t len = q->len;
+
+#if ETH_PAD_SIZE
+ if (RT_LIKELY(q == p)) /* single pbuf is large enough */
+ {
+ payload += ETH_PAD_SIZE;
+ len -= ETH_PAD_SIZE;
+ }
+#endif
+ memcpy(payload, pu8Chunk, len);
+ pu8Chunk += len;
+ q = q->next;
+ } while (RT_UNLIKELY(q != NULL));
+
+ m_LwipNetif.input(p, &m_LwipNetif);
+ return VINF_SUCCESS;
+}
+
+
+/*
+ * Got a frame from the lwIP stack, feed it to the internal network.
+ */
+err_t VBoxNetDhcpd::netifLinkOutput(pbuf *pPBuf)
+{
+ PINTNETHDR pHdr;
+ void *pvFrame;
+ u16_t cbFrame;
+ int rc;
+
+ if (pPBuf->tot_len < sizeof(struct eth_hdr)) /* includes ETH_PAD_SIZE */
+ return ERR_ARG;
+
+ cbFrame = pPBuf->tot_len - ETH_PAD_SIZE;
+ rc = IntNetRingAllocateFrame(&m_pIfBuf->Send, cbFrame, &pHdr, &pvFrame);
+ if (RT_FAILURE(rc))
+ return ERR_MEM;
+
+ pbuf_copy_partial(pPBuf, pvFrame, cbFrame, ETH_PAD_SIZE);
+ IntNetRingCommitFrameEx(&m_pIfBuf->Send, pHdr, cbFrame);
+
+ ifFlush();
+ return ERR_OK;
+}
+
+
+int VBoxNetDhcpd::ifFlush()
+{
+ INTNETIFSENDREQ SendReq;
+ int rc;
+
+ SendReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
+ SendReq.Hdr.cbReq = sizeof(SendReq);
+ SendReq.pSession = m_pSession;
+
+ SendReq.hIf = m_hIf;
+
+ rc = CALL_VMMR0(VMMR0_DO_INTNET_IF_SEND, SendReq);
+ return rc;
+}
+
+
+int VBoxNetDhcpd::ifClose()
+{
+ if (m_hIf == INTNET_HANDLE_INVALID)
+ return VINF_SUCCESS;
+
+ INTNETIFCLOSEREQ CloseReq;
+
+ CloseReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
+ CloseReq.Hdr.cbReq = sizeof(CloseReq);
+ CloseReq.pSession = m_pSession;
+
+ CloseReq.hIf = m_hIf;
+
+ m_hIf = INTNET_HANDLE_INVALID;
+ m_pIfBuf = NULL;
+
+ CALL_VMMR0(VMMR0_DO_INTNET_IF_CLOSE, CloseReq);
+ return VINF_SUCCESS;
+}
+
+
+/* static */ DECLCALLBACK(void) VBoxNetDhcpd::lwipInitCB(void *pvArg)
+{
+ AssertPtrReturnVoid(pvArg);
+
+ VBoxNetDhcpd *self = static_cast<VBoxNetDhcpd *>(pvArg);
+ self->lwipInit();
+}
+
+
+/* static */ err_t VBoxNetDhcpd::netifInitCB(netif *pNetif)
+{
+ AssertPtrReturn(pNetif, ERR_ARG);
+
+ VBoxNetDhcpd *self = static_cast<VBoxNetDhcpd *>(pNetif->state);
+ return self->netifInit(pNetif);
+}
+
+
+/* static */ err_t VBoxNetDhcpd::netifLinkOutputCB(netif *pNetif, pbuf *pPBuf)
+{
+ AssertPtrReturn(pNetif, ERR_ARG);
+ AssertPtrReturn(pPBuf, ERR_ARG);
+
+ VBoxNetDhcpd *self = static_cast<VBoxNetDhcpd *>(pNetif->state);
+ AssertPtrReturn(self, ERR_IF);
+
+ return self->netifLinkOutput(pPBuf);
+}
+
+
+/* static */ void VBoxNetDhcpd::dhcp4RecvCB(void *arg, struct udp_pcb *pcb,
+ struct pbuf *p,
+ ip_addr_t *addr, u16_t port)
+{
+ AssertPtrReturnVoid(arg);
+
+ VBoxNetDhcpd *self = static_cast<VBoxNetDhcpd *>(arg);
+ self->dhcp4Recv(pcb, p, addr, port);
+ pbuf_free(p);
+}
+
+
+
+
+
+int VBoxNetDhcpd::main(int argc, char **argv)
+{
+ int rc;
+
+ ClientId::registerFormat();
+
+ /* XXX: We no longer need hardcoded and compat methods. We should remove them soon. */
+ if (argc < 2)
+ m_Config = Config::hardcoded();
+ else if ( strcmp(argv[1], "--config") == 0
+ || strcmp(argv[1], "--comment") == 0)
+ m_Config = Config::create(argc, argv);
+ else
+ m_Config = Config::compat(argc, argv);
+
+ if (m_Config == NULL)
+ return VERR_GENERAL_FAILURE;
+
+ rc = m_server.init(m_Config);
+
+ /* connect to the intnet */
+ rc = ifInit(m_Config->getNetwork(),
+ m_Config->getTrunk(),
+ m_Config->getTrunkType());
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /* setup lwip */
+ rc = vboxLwipCoreInitialize(lwipInitCB, this);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ ifPump();
+ return VINF_SUCCESS;
+}
+
+
+void VBoxNetDhcpd::lwipInit()
+{
+ err_t error;
+
+ ip_addr_t addr, mask;
+ ip4_addr_set_u32(&addr, m_Config->getIPv4Address().u);
+ ip4_addr_set_u32(&mask, m_Config->getIPv4Netmask().u);
+
+ netif *pNetif = netif_add(&m_LwipNetif,
+ &addr, &mask,
+ IP_ADDR_ANY, /* gateway */
+ this, /* state */
+ VBoxNetDhcpd::netifInitCB, /* netif_init_fn */
+ tcpip_input); /* netif_input_fn */
+ if (pNetif == NULL)
+ return;
+
+ netif_set_up(pNetif);
+ netif_set_link_up(pNetif);
+
+ m_Dhcp4Pcb = udp_new();
+ if (RT_UNLIKELY(m_Dhcp4Pcb == NULL))
+ return; /* XXX? */
+
+ ip_set_option(m_Dhcp4Pcb, SOF_BROADCAST);
+ udp_recv(m_Dhcp4Pcb, dhcp4RecvCB, this);
+
+ error = udp_bind(m_Dhcp4Pcb, IP_ADDR_ANY, RTNETIPV4_PORT_BOOTPS);
+ if (error != ERR_OK)
+ {
+ udp_remove(m_Dhcp4Pcb);
+ m_Dhcp4Pcb = NULL;
+ return; /* XXX? */
+ }
+}
+
+
+err_t VBoxNetDhcpd::netifInit(netif *pNetif)
+{
+ pNetif->hwaddr_len = sizeof(RTMAC);
+ memcpy(pNetif->hwaddr, &m_Config->getMacAddress(), sizeof(RTMAC));
+
+ pNetif->mtu = 1500;
+
+ pNetif->flags = NETIF_FLAG_BROADCAST
+ | NETIF_FLAG_ETHARP
+ | NETIF_FLAG_ETHERNET;
+
+ pNetif->linkoutput = netifLinkOutputCB;
+ pNetif->output = etharp_output;
+
+ netif_set_default(pNetif);
+ return ERR_OK;
+}
+
+
+void VBoxNetDhcpd::dhcp4Recv(struct udp_pcb *pcb, struct pbuf *p,
+ ip_addr_t *addr, u16_t port)
+{
+ err_t error;
+ int rc;
+
+ RT_NOREF(pcb, addr, port);
+
+ if (RT_UNLIKELY(p->next != NULL))
+ return; /* XXX: we want it in one chunk */
+
+ bool broadcasted = ip_addr_cmp(ip_current_dest_addr(), &ip_addr_broadcast)
+ || ip_addr_cmp(ip_current_dest_addr(), &ip_addr_any);
+
+ DhcpClientMessage *msgIn = DhcpClientMessage::parse(broadcasted, p->payload, p->len);
+ if (msgIn == NULL)
+ return;
+
+ std::unique_ptr<DhcpClientMessage> autoFreeMsgIn(msgIn);
+
+ DhcpServerMessage *msgOut = m_server.process(*msgIn);
+ if (msgOut == NULL)
+ return;
+
+ std::unique_ptr<DhcpServerMessage> autoFreeMsgOut(msgOut);
+
+ ip_addr_t dst = { msgOut->dst().u };
+ if (ip_addr_cmp(&dst, &ip_addr_any))
+ ip_addr_copy(dst, ip_addr_broadcast);
+
+ octets_t data;
+ rc = msgOut->encode(data);
+ if (RT_FAILURE(rc))
+ return;
+
+ unique_ptr_pbuf q ( pbuf_alloc(PBUF_RAW, (u16_t)data.size(), PBUF_RAM) );
+ if (!q)
+ return;
+
+ error = pbuf_take(q.get(), &data.front(), (u16_t)data.size());
+ if (error != ERR_OK)
+ return;
+
+ error = udp_sendto(pcb, q.get(), &dst, RTNETIPV4_PORT_BOOTPC);
+ if (error != ERR_OK)
+ return;
+}
+
+
+
+
+/*
+ * Entry point.
+ */
+extern "C" DECLEXPORT(int) TrustedMain(int argc, char **argv)
+{
+ VBoxNetDhcpd Dhcpd;
+ int rc = Dhcpd.main(argc, argv);
+
+ return RT_SUCCESS(rc) ? RTEXITCODE_SUCCESS : RTEXITCODE_FAILURE;
+}
+
+
+#ifndef VBOX_WITH_HARDENING
+
+int main(int argc, char **argv)
+{
+ int rc = RTR3InitExe(argc, &argv, RTR3INIT_FLAGS_SUPLIB);
+ if (RT_FAILURE(rc))
+ return RTMsgInitFailure(rc);
+
+ return TrustedMain(argc, argv);
+}
+
+
+# ifdef RT_OS_WINDOWS
+/** (We don't want a console usually.) */
+int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nCmdShow)
+{
+ RT_NOREF(hInstance, hPrevInstance, lpCmdLine, nCmdShow);
+
+ return main(__argc, __argv);
+}
+# endif /* RT_OS_WINDOWS */
+
+#endif /* !VBOX_WITH_HARDENING */
diff --git a/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpd.rc b/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpd.rc
new file mode 100644
index 00000000..fa6b3105
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpd.rc
@@ -0,0 +1,55 @@
+/* $Id: VBoxNetDhcpd.rc $ */
+/** @file
+ * VBoxNetDHCP - Resource file containing version info.
+ */
+
+/*
+ * Copyright (C) 2015-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include <windows.h>
+#include <VBox/version.h>
+
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION VBOX_RC_FILE_VERSION
+ PRODUCTVERSION VBOX_RC_FILE_VERSION
+ FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
+ FILEFLAGS VBOX_RC_FILE_FLAGS
+ FILEOS VBOX_RC_FILE_OS
+ FILETYPE VBOX_RC_TYPE_DLL
+ FILESUBTYPE VFT2_UNKNOWN
+BEGIN
+ BLOCK "StringFileInfo"
+ BEGIN
+ BLOCK "040904b0" // Lang=US English, CharSet=Unicode
+ BEGIN
+ VALUE "CompanyName", VBOX_RC_COMPANY_NAME
+ VALUE "FileDescription", "VirtualBox DHCP Server\0"
+ VALUE "FileVersion", VBOX_RC_FILE_VERSION_STR
+ VALUE "InternalName", "VBoxNetDhcpd\0"
+ VALUE "LegalCopyright", VBOX_RC_LEGAL_COPYRIGHT
+ VALUE "OriginalFilename", "VBoxNetDhcpd.dll\0"
+ VALUE "ProductName", VBOX_RC_PRODUCT_NAME_STR
+ VALUE "ProductVersion", VBOX_RC_PRODUCT_VERSION_STR
+ VBOX_RC_MORE_STRINGS
+ END
+ END
+ BLOCK "VarFileInfo"
+ BEGIN
+ VALUE "Translation", 0x409, 1200
+ END
+END
+
+/* Creates the application icon. */
+#include "VBoxNetDhcpd-icon.rc"
+
diff --git a/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpdHardened.cpp b/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpdHardened.cpp
new file mode 100644
index 00000000..26689e20
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/VBoxNetDhcpdHardened.cpp
@@ -0,0 +1,25 @@
+/* $Id: VBoxNetDhcpdHardened.cpp $ */
+/** @file
+ * VBoxNetDhcpd - Hardened main().
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include <VBox/sup.h>
+
+
+int main(int argc, char **argv, char **envp)
+{
+ return SUPR3HardenedMain("VBoxNetDHCP", 0 /* fFlags */, argc, argv, envp);
+}
+
diff --git a/src/VBox/NetworkServices/Dhcpd/lwipopts.h b/src/VBox/NetworkServices/Dhcpd/lwipopts.h
new file mode 100644
index 00000000..2fd714b0
--- /dev/null
+++ b/src/VBox/NetworkServices/Dhcpd/lwipopts.h
@@ -0,0 +1,181 @@
+/* $Id: lwipopts.h $ */
+/** @file
+ * DHCP server - lwIP configuration options.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_Dhcpd_lwipopts_h
+#define VBOX_INCLUDED_SRC_Dhcpd_lwipopts_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <VBox/cdefs.h> /* For VBOX_STRICT. */
+#include <iprt/mem.h>
+#include <iprt/alloca.h> /* This may include malloc.h (msc), which is something that has
+ * to be done before redefining any of the functions therein. */
+#include <iprt/rand.h> /* see LWIP_RAND() definition */
+
+/** Make lwIP use the libc malloc, or more precisely (see below) the IPRT
+ * memory allocation functions. */
+#define MEM_LIBC_MALLOC 1
+
+/** Set proper memory alignment. */
+#if HC_ARCH_BITS == 64
+# define MEM_ALIGNMENT 8
+#else
+#define MEM_ALIGNMENT 4
+#endif
+
+/* Padding before Ethernet header to make IP header aligned */
+#define ETH_PAD_SIZE 2
+
+/* IP */
+#define IP_REASSEMBLY 1
+#define IP_REASS_MAX_PBUFS 128
+
+
+
+/* MEMP_NUM_PBUF: the number of memp struct pbufs. If the application
+ sends a lot of data out of ROM (or other static memory), this
+ should be set high.
+
+ NB: This is for PBUF_ROM and PBUF_REF pbufs only!
+
+ Number of PBUF_POOL pbufs is controlled by PBUF_POOL_SIZE that,
+ somewhat confusingly, breaks MEMP_NUM_* pattern.
+
+ PBUF_RAM pbufs are allocated with mem_malloc (with MEM_LIBC_MALLOC
+ set to 1 this is just system malloc), not memp_malloc. */
+#define MEMP_NUM_PBUF (1024 * 4)
+
+
+/* MEMP_NUM_MLD6_GROUP: Maximum number of IPv6 multicast groups that
+ can be joined.
+
+ We need to be able to join solicited node multicast for each
+ address (potentially different) and two groups for DHCP6. All
+ routers multicast is hardcoded in ip6.c and does not require
+ explicit joining. Provide also for a few extra groups just in
+ case. */
+#define MEMP_NUM_MLD6_GROUP (LWIP_IPV6_NUM_ADDRESSES + /* dhcp6 */ 2 + /* extra */ 8)
+
+
+/* MEMP_NUM_TCPIP_MSG_*: the number of struct tcpip_msg, which is used
+ for sequential API communication and incoming packets. Used in
+ src/api/tcpip.c. */
+#define MEMP_NUM_TCPIP_MSG_API 128
+#define MEMP_NUM_TCPIP_MSG_INPKT 1024
+
+/* MEMP_NUM_UDP_PCB: the number of UDP protocol control blocks. One
+ per active UDP "connection". */
+#define MEMP_NUM_UDP_PCB 32
+
+/* Pbuf options */
+/* PBUF_POOL_SIZE: the number of buffers in the pbuf pool.
+ This is only for PBUF_POOL pbufs, primarily used by netif drivers.
+
+ This should have been named with the MEMP_NUM_ prefix (cf.
+ MEMP_NUM_PBUF for PBUF_ROM and PBUF_REF) as it controls the size of
+ yet another memp_malloc() pool. */
+#define PBUF_POOL_SIZE (1024 * 4)
+
+/* PBUF_POOL_BUFSIZE: the size of each pbuf in the pbuf pool.
+ Use default that is based on TCP_MSS and PBUF_LINK_HLEN. */
+#undef PBUF_POOL_BUFSIZE
+
+/** Turn on support for lightweight critical region protection. Leaving this
+ * off uses synchronization code in pbuf.c which is totally polluted with
+ * races. All the other lwip source files would fall back to semaphore-based
+ * synchronization, but pbuf.c is just broken, leading to incorrect allocation
+ * and as a result to assertions due to buffers being double freed. */
+#define SYS_LIGHTWEIGHT_PROT 1
+
+/** Attempt to get rid of htons etc. macro issues. */
+#undef LWIP_PREFIX_BYTEORDER_FUNCS
+
+#define LWIP_TCPIP_CORE_LOCKING_INPUT 0
+#define LWIP_TCPIP_CORE_LOCKING 0
+
+#define LWIP_NETCONN 0
+#define LWIP_SOCKET 0
+#define LWIP_COMPAT_SOCKETS 0
+#define LWIP_COMPAT_MUTEX 1
+
+#define LWIP_TCP 0
+#define LWI_UDP 1
+#define LWIP_ARP 1
+#define ARP_PROXY 0
+#define LWIP_ETHERNET 1
+
+/* accept any->broadcast */
+#define LWIP_IP_ACCEPT_UDP_PORT(port) ((port) == PP_NTOHS(/*DHCP_SERVER_PORT*/ 67))
+
+#define LWIP_IPV6 0
+#define LWIP_IPV6_FORWARD 0
+#define LWIP_ND6_PROXY 0
+
+#define LWIP_ND6_ALLOW_RA_UPDATES (!LWIP_IPV6_FORWARD)
+#define LWIP_IPV6_SEND_ROUTER_SOLICIT (!LWIP_IPV6_FORWARD)
+/* IPv6 autoconfig we don't need in proxy, but it required for very seldom cases
+ * iSCSI over intnet with IPv6
+ */
+#define LWIP_IPV6_AUTOCONFIG 1
+#if LWIP_IPV6_FORWARD /* otherwise use the default from lwip/opt.h */
+#define LWIP_IPV6_DUP_DETECT_ATTEMPTS 0
+#endif
+
+#define LWIP_IPV6_FRAG 1
+
+/**
+ * aka Slirp mode.
+ */
+#define LWIP_CONNECTION_PROXY 0
+#define IP_FORWARD 0
+
+/* MEMP_NUM_SYS_TIMEOUT: the number of simultaneously active
+ timeouts. */
+#define MEMP_NUM_SYS_TIMEOUT 16
+
+
+/* this is required for IPv6 and IGMP needs */
+#define LWIP_RAND() RTRandU32()
+
+/* Debugging stuff. */
+#ifdef DEBUG
+# define LWIP_DEBUG
+# include "lwip-log.h"
+
+# define LWIP_PROXY_DEBUG LWIP_DBG_OFF
+#endif /* DEBUG */
+
+/* printf formatter definitions */
+#define U16_F "hu"
+#define S16_F "hd"
+#define X16_F "hx"
+#define U32_F "u"
+#define S32_F "d"
+#define X32_F "x"
+
+/* Redirect libc memory alloc functions to IPRT. */
+#define malloc(x) RTMemAlloc(x)
+#define realloc(x,y) RTMemRealloc((x), (y))
+#define free(x) RTMemFree(x)
+
+/* Align VBOX_STRICT and LWIP_NOASSERT. */
+#ifndef VBOX_STRICT
+# define LWIP_NOASSERT 1
+#endif
+
+#endif /* !VBOX_INCLUDED_SRC_Dhcpd_lwipopts_h */
diff --git a/src/VBox/NetworkServices/Makefile.kmk b/src/VBox/NetworkServices/Makefile.kmk
new file mode 100644
index 00000000..9885b9d7
--- /dev/null
+++ b/src/VBox/NetworkServices/Makefile.kmk
@@ -0,0 +1,35 @@
+# $Id: Makefile.kmk $
+## @file
+# Top-level makefile for the VBox Network Services.
+#
+
+#
+# Copyright (C) 2009-2019 Oracle Corporation
+#
+# This file is part of VirtualBox Open Source Edition (OSE), as
+# available from http://www.virtualbox.org. This file is free software;
+# you can redistribute it and/or modify it under the terms of the GNU
+# General Public License (GPL) as published by the Free Software
+# Foundation, in version 2 as it comes in the "COPYING" file of the
+# VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+#
+
+SUB_DEPTH = ../../..
+include $(KBUILD_PATH)/subheader.kmk
+
+# VBoxNetDHCP and VBoxNetNAT require COM/XPCOM
+ifdef VBOX_WITH_MAIN
+ # Include sub-makefiles.
+ ifndef VBOX_WITH_DHCPD
+ include $(PATH_SUB_CURRENT)/DHCP/Makefile.kmk
+ else
+ include $(PATH_SUB_CURRENT)/Dhcpd/Makefile.kmk
+ endif
+ ifdef VBOX_WITH_NAT_SERVICE
+ include $(PATH_SUB_CURRENT)/NAT/Makefile.kmk
+ endif
+endif
+
+include $(FILE_KBUILD_SUB_FOOTER)
+
diff --git a/src/VBox/NetworkServices/NAT/Makefile.kmk b/src/VBox/NetworkServices/NAT/Makefile.kmk
new file mode 100644
index 00000000..8ec9e8ac
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/Makefile.kmk
@@ -0,0 +1,151 @@
+# $Id: Makefile.kmk $
+## @file
+# Sub-makefile for NAT Networking
+#
+
+#
+# Copyright (C) 2006-2019 Oracle Corporation
+#
+# This file is part of VirtualBox Open Source Edition (OSE), as
+# available from http://www.virtualbox.org. This file is free software;
+# you can redistribute it and/or modify it under the terms of the GNU
+# General Public License (GPL) as published by the Free Software
+# Foundation, in version 2 as it comes in the "COPYING" file of the
+# VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+#
+
+SUB_DEPTH = ../../../..
+include $(KBUILD_PATH)/subheader.kmk
+ifdef VBOX_WITH_LWIP_NAT
+
+VBOX_PATH_NAT_SRC := $(PATH_SUB_CURRENT)
+
+ # XXX: do not depend on order
+ ifndef LWIP_SOURCES
+ include $(PATH_SUB_CURRENT)/../../Devices/Network/lwip-new/Config.kmk
+ endif
+
+
+ ifdef VBOX_WITH_HARDENING
+ #
+ # Hardened stub exe for VBoxNetLwipNAT.
+ #
+ PROGRAMS += VBoxNetLwipNATHardened
+ VBoxNetLwipNATHardened_TEMPLATE = VBOXR3HARDENEDEXE
+ VBoxNetLwipNATHardened_NAME = VBoxNetNAT
+ VBoxNetLwipNATHardened_DEFS = SERVICE_NAME=\"VBoxNetNAT\"
+ VBoxNetLwipNATHardened_SOURCES = VBoxNetNATHardened.cpp
+ VBoxNetLwipNATHardened_SOURCES.win = $(VBoxNetLwipNAT_0_OUTDIR)/VBoxNetLwipNAT-icon.rc
+ VBoxNetLwipNATHardened_LDFLAGS.win = /SUBSYSTEM:windows
+ endif
+
+
+ #
+ # VBoxNetLwipNAT.
+ #
+ ifdef VBOX_WITH_HARDENING
+ DLLS += VBoxNetLwipNAT
+ else
+ PROGRAMS += VBoxNetLwipNAT
+ endif
+ VBoxNetLwipNAT_TEMPLATE := $(if-expr defined(VBOX_WITH_HARDENING),VBOXMAINDLL,VBOXMAINCLIENTEXE)
+ VBoxNetLwipNAT_NAME := VBoxNetNAT
+ VBoxNetLwipNAT_DEFS = IPv6
+ # VBoxNetLwipNAT_DEFS.linux = WITH_VALGRIND # instrument lwip memp.c
+ VBoxNetLwipNAT_DEFS.win = VBOX_COM_OUTOFPROC_MODULE _WIN32_WINNT=0x501 # Windows XP
+ # Convince Solaris headers to expose socket stuff we need.
+ #
+ # Setting _XOPEN_SOURCE to either 500 or 600 would always work, but
+ # <sys/feature_tests.h> insists that 600 requires C99 and so it
+ # explodes for older g++. It also insists that 500 is NOT to be used
+ # with C99.
+ #
+ # Newer g++ in C++11 mode (formerly known as C++0x) needs 600, so it
+ # employs sleight of hand to pretend it's C99 to keep feature test
+ # happy.
+ #
+ # Compile the C code with settings that match g++. This probably
+ # should be centralized so that whole codebase uses consistent
+ # settings.
+ ifeq ($(KBUILD_TARGET),solaris)
+ ifneq ($(VBOX_GCC_VERSION_CXX),)
+ ifneq ($(int-ge $(VBOX_GCC_VERSION_CXX),40600),)
+ # we compile C++ code with -std=c++0x / -std=c++11
+ VBoxNetLwipNAT_CFLAGS.solaris += -std=c99
+ VBoxNetLwipNAT_DEFS.solaris += _XOPEN_SOURCE=600
+ else
+ VBoxNetLwipNAT_DEFS.solaris += _XOPEN_SOURCE=500
+ endif
+ endif
+ VBoxNetLwipNAT_DEFS.solaris += __EXTENSIONS__=1
+ endif
+
+ # (current dir is for for lwipopts.h)
+ VBoxNetLwipNAT_INCS += . $(addprefix ../../Devices/Network/lwip-new/,$(LWIP_INCS))
+
+ VBoxNetLwipNAT_SOURCES = \
+ VBoxNetLwipNAT.cpp \
+ ../NetLib/VBoxNetBaseService.cpp \
+ ../NetLib/VBoxNetPortForwardString.cpp \
+ ../NetLib/VBoxNetIntIf.cpp \
+ ../NetLib/VBoxNetUDP.cpp \
+ ../NetLib/VBoxNetARP.cpp \
+ ../NetLib/ComHostUtils.cpp \
+ $(addprefix ../../Devices/Network/lwip-new/,$(LWIP_SOURCES)) \
+ proxy_pollmgr.c \
+ proxy_rtadvd.c \
+ proxy.c \
+ pxremap.c \
+ pxtcp.c \
+ pxudp.c \
+ pxdns.c \
+ fwtcp.c \
+ fwudp.c \
+ portfwd.c \
+ proxy_dhcp6ds.c \
+ proxy_tftpd.c
+
+ ifeq ($(KBUILD_TARGET),win)
+ VBoxNetLwipNAT_SOURCES += pxping_win.c # unprivileged Icmp API
+ else
+ VBoxNetLwipNAT_SOURCES += pxping.c # raw sockets
+ endif
+
+ VBoxNetLwipNAT_SOURCES.darwin += rtmon_bsd.c
+ VBoxNetLwipNAT_SOURCES.freebsd += rtmon_bsd.c
+ VBoxNetLwipNAT_SOURCES.linux += rtmon_linux.c
+ VBoxNetLwipNAT_SOURCES.solaris += rtmon_bsd.c
+ VBoxNetLwipNAT_SOURCES.win += \
+ rtmon_win.c \
+ RTWinPoll.cpp \
+ RTWinSocketPair.cpp
+
+ VBoxNetLwipNAT_LIBS = \
+ $(LIB_RUNTIME)
+ VBoxNetLwipNAT_LIBS.solaris += socket nsl
+
+ VBoxNetLwipNAT_LDFLAGS.win = /SUBSYSTEM:windows
+
+ # ifeq ($(VBOX_WITH_HARDENING),)
+ # ifn1of ($(KBUILD_TARGET), darwin win)
+ # # helper for debugging unprivileged
+ # VBoxNetLwipNAT_DEFS += VBOX_RAWSOCK_DEBUG_HELPER
+ # VBoxNetLwipNAT_SOURCES += getrawsock.c
+ # endif
+ # endif
+
+ ifeq ($(KBUILD_TARGET),win)
+ # Icon include file.
+ VBoxNetLwipNAT_SOURCES += VBoxNetNAT.rc
+ VBoxNetNAT.rc_INCS = $(VBoxNetLwipNAT_0_OUTDIR)
+ VBoxNetNAT.rc_DEPS = $(VBoxNetLwipNAT_0_OUTDIR)/VBoxNetLwipNAT-icon.rc
+ VBoxNetNAT.rc_CLEAN = $(VBoxNetLwipNAT_0_OUTDIR)/VBoxNetLwipNAT-icon.rc
+ $$(VBoxNetLwipNAT_0_OUTDIR)/VBoxNetLwipNAT-icon.rc: $(VBOX_WINDOWS_ICON_FILE) $$(VBoxNetLwipNAT_DEFPATH)/Makefile.kmk | $$(dir $$@)
+ $(RM) -f $@
+ $(APPEND) $@ 'IDI_VIRTUALBOX ICON DISCARDABLE "$(subst /,\\,$(VBOX_WINDOWS_ICON_FILE))"'
+ endif # win
+
+endif # VBOX_WITH_LWIP_NAT
+include $(FILE_KBUILD_SUB_FOOTER)
+
diff --git a/src/VBox/NetworkServices/NAT/RTWinPoll.cpp b/src/VBox/NetworkServices/NAT/RTWinPoll.cpp
new file mode 100644
index 00000000..5e6f5789
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/RTWinPoll.cpp
@@ -0,0 +1,154 @@
+/* $Id: RTWinPoll.cpp $ */
+/** @file
+ * NAT Network - poll(2) implementation for winsock.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/cdefs.h>
+#include <iprt/errcore.h>
+#include <iprt/string.h>
+
+#include <iprt/errcore.h>
+#include <VBox/log.h>
+
+#include <iprt/win/winsock2.h>
+#include <iprt/win/windows.h>
+#include "winpoll.h"
+
+static HANDLE g_hNetworkEvent;
+
+int
+RTWinPoll(struct pollfd *pFds, unsigned int nfds, int timeout, int *pNready)
+{
+ AssertPtrReturn(pFds, VERR_INVALID_PARAMETER);
+
+ if (g_hNetworkEvent == WSA_INVALID_EVENT)
+ {
+ g_hNetworkEvent = WSACreateEvent();
+ AssertReturn(g_hNetworkEvent != WSA_INVALID_EVENT, VERR_INTERNAL_ERROR);
+ }
+
+ for (unsigned int i = 0; i < nfds; ++i)
+ {
+ long eventMask = 0;
+ short pollEvents = pFds[i].events;
+
+ /* clean revents */
+ pFds[i].revents = 0;
+
+ /* ignore invalid sockets */
+ if (pFds[i].fd == INVALID_SOCKET)
+ continue;
+
+ /*
+ * POLLIN Data other than high priority data may be read without blocking.
+ * This is equivalent to ( POLLRDNORM | POLLRDBAND ).
+ * POLLRDBAND Priority data may be read without blocking.
+ * POLLRDNORM Normal data may be read without blocking.
+ */
+ if (pollEvents & POLLIN)
+ eventMask |= FD_READ | FD_ACCEPT;
+
+ /*
+ * POLLOUT Normal data may be written without blocking. This is equivalent
+ * to POLLWRNORM.
+ * POLLWRNORM Normal data may be written without blocking.
+ */
+ if (pollEvents & POLLOUT)
+ eventMask |= FD_WRITE | FD_CONNECT;
+
+ /*
+ * This is "moral" equivalent to POLLHUP.
+ */
+ eventMask |= FD_CLOSE;
+ WSAEventSelect(pFds[i].fd, g_hNetworkEvent, eventMask);
+ }
+
+ DWORD index = WSAWaitForMultipleEvents(1,
+ &g_hNetworkEvent,
+ FALSE,
+ timeout == RT_INDEFINITE_WAIT ? WSA_INFINITE : timeout,
+ FALSE);
+ if (index != WSA_WAIT_EVENT_0)
+ {
+ if (index == WSA_WAIT_TIMEOUT)
+ return VERR_TIMEOUT;
+ }
+
+ int nready = 0;
+ for (unsigned int i = 0; i < nfds; ++i)
+ {
+ short revents = 0;
+ WSANETWORKEVENTS NetworkEvents;
+ int err;
+
+ if (pFds[i].fd == INVALID_SOCKET)
+ continue;
+
+ RT_ZERO(NetworkEvents);
+
+ err = WSAEnumNetworkEvents(pFds[i].fd,
+ g_hNetworkEvent,
+ &NetworkEvents);
+
+ if (err == SOCKET_ERROR)
+ {
+ if (WSAGetLastError() == WSAENOTSOCK)
+ {
+ pFds[i].revents = POLLNVAL;
+ ++nready;
+ }
+ continue;
+ }
+
+ /* deassociate socket with event */
+ WSAEventSelect(pFds[i].fd, g_hNetworkEvent, 0);
+
+#define WSA_TO_POLL(_wsaev, _pollev) \
+ do { \
+ if (NetworkEvents.lNetworkEvents & (_wsaev)) { \
+ revents |= (_pollev); \
+ if (NetworkEvents.iErrorCode[_wsaev##_BIT] != 0) { \
+ Log2(("sock %d: %s: %R[sockerr]\n", \
+ pFds[i].fd, #_wsaev, \
+ NetworkEvents.iErrorCode[_wsaev##_BIT])); \
+ revents |= POLLERR; \
+ } \
+ } \
+ } while (0)
+
+ WSA_TO_POLL(FD_READ, POLLIN);
+ WSA_TO_POLL(FD_ACCEPT, POLLIN);
+ WSA_TO_POLL(FD_WRITE, POLLOUT);
+ WSA_TO_POLL(FD_CONNECT, POLLOUT);
+ WSA_TO_POLL(FD_CLOSE, POLLHUP | (pFds[i].events & POLLIN));
+
+ Assert((revents & ~(pFds[i].events | POLLHUP | POLLERR)) == 0);
+
+ if (revents != 0)
+ {
+ pFds[i].revents = revents;
+ ++nready;
+ }
+ }
+ WSAResetEvent(g_hNetworkEvent);
+
+ if (pNready)
+ *pNready = nready;
+
+ return VINF_SUCCESS;
+}
diff --git a/src/VBox/NetworkServices/NAT/RTWinSocketPair.cpp b/src/VBox/NetworkServices/NAT/RTWinSocketPair.cpp
new file mode 100644
index 00000000..c05f9656
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/RTWinSocketPair.cpp
@@ -0,0 +1,225 @@
+/* $Id: RTWinSocketPair.cpp $ */
+/** @file
+ * NAT Network - socketpair(2) emulation for winsock.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/cdefs.h>
+#include <iprt/errcore.h>
+
+#include <iprt/errcore.h>
+
+#include <iprt/win/winsock2.h>
+#include <iprt/win/windows.h>
+
+#include <stdio.h>
+#include <iprt/log.h>
+
+extern "C" int RTWinSocketPair(int domain, int type, int protocol, SOCKET socket_vector[2])
+{
+ LogFlowFunc(("ENTER: domain:%d, type:%d, protocol:%d, socket_vector:%p\n",
+ domain, type, protocol, socket_vector));
+ switch (domain)
+ {
+ case AF_INET:
+ break;
+ case AF_INET6: /* I dobt we really need it. */
+ default:
+ AssertMsgFailedReturn(("Unsuported domain:%d\n", domain),
+ VERR_INVALID_PARAMETER);
+ }
+
+ switch(type)
+ {
+ case SOCK_STREAM:
+ case SOCK_DGRAM:
+ break;
+ default:
+ AssertMsgFailedReturn(("Unsuported type:%d\n", type),
+ VERR_INVALID_PARAMETER);
+ }
+
+ AssertPtrReturn(socket_vector, VERR_INVALID_PARAMETER);
+ if (!socket_vector)
+ return VERR_INVALID_PARAMETER;
+
+ socket_vector[0] = socket_vector[1] = INVALID_SOCKET;
+
+ SOCKET listener = INVALID_SOCKET;
+
+ union {
+ struct sockaddr_in in_addr;
+ struct sockaddr addr;
+ } sa[2];
+
+ int cb = sizeof(sa);
+ memset(&sa, 0, cb);
+
+ sa[0].in_addr.sin_family = domain;
+ sa[0].in_addr.sin_addr.s_addr = RT_H2N_U32(INADDR_LOOPBACK);
+ sa[0].in_addr.sin_port = 0;
+ cb = sizeof(sa[0]);
+
+ if (type == SOCK_STREAM)
+ {
+ listener = WSASocket(domain, type, protocol, 0, NULL, 0);
+
+ if (listener == INVALID_SOCKET)
+ {
+ return VERR_INTERNAL_ERROR;
+ }
+
+ int reuse = 1;
+ cb = sizeof(int);
+ int rc = setsockopt(listener, SOL_SOCKET, SO_REUSEADDR, (char *)&reuse, cb);
+
+ if (rc)
+ {
+ goto close_socket;
+ }
+
+ cb = sizeof(sa[0]);
+ rc = bind(listener, &sa[0].addr, cb);
+ if(rc)
+ {
+ goto close_socket;
+ }
+
+ memset(&sa[0], 0, cb);
+ rc = getsockname(listener, &sa[0].addr, &cb);
+ if (rc)
+ {
+ goto close_socket;
+ }
+
+ rc = listen(listener, 1);
+ if (rc)
+ {
+ goto close_socket;
+ }
+
+ socket_vector[0] = WSASocket(domain, type, protocol, 0, NULL, 0);
+ if (socket_vector[0] == INVALID_SOCKET)
+ {
+ goto close_socket;
+ }
+
+ rc = connect(socket_vector[0], &sa[0].addr, cb);
+ if (rc)
+ goto close_socket;
+
+
+ socket_vector[1] = accept(listener, NULL, NULL);
+ if (socket_vector[1] == INVALID_SOCKET)
+ {
+ goto close_socket;
+ }
+
+ closesocket(listener);
+ }
+ else
+ {
+ socket_vector[0] = WSASocket(domain, type, protocol, 0, NULL, 0);
+
+ cb = sizeof(sa[0]);
+ int rc = bind(socket_vector[0], &sa[0].addr, cb);
+ Assert(rc != SOCKET_ERROR);
+ if (rc == SOCKET_ERROR)
+ {
+ goto close_socket;
+ }
+
+ sa[1].in_addr.sin_family = domain;
+ sa[1].in_addr.sin_addr.s_addr = RT_H2N_U32(INADDR_LOOPBACK);
+ sa[1].in_addr.sin_port = 0;
+
+ socket_vector[1] = WSASocket(domain, type, protocol, 0, NULL, 0);
+ rc = bind(socket_vector[1], &sa[1].addr, cb);
+ Assert(rc != SOCKET_ERROR);
+ if (rc == SOCKET_ERROR)
+ {
+ goto close_socket;
+ }
+
+ {
+ u_long mode = 0;
+ rc = ioctlsocket(socket_vector[0], FIONBIO, &mode);
+ AssertMsgReturn(rc != SOCKET_ERROR,
+ ("ioctl error: %d\n", WSAGetLastError()),
+ VERR_INTERNAL_ERROR);
+
+ rc = ioctlsocket(socket_vector[1], FIONBIO, &mode);
+ AssertMsgReturn(rc != SOCKET_ERROR,
+ ("ioctl error: %d\n", WSAGetLastError()),
+ VERR_INTERNAL_ERROR);
+ }
+
+ memset(&sa, 0, 2 * cb);
+ rc = getsockname(socket_vector[0], &sa[0].addr, &cb);
+ Assert(rc != SOCKET_ERROR);
+ if (rc == SOCKET_ERROR)
+ {
+ goto close_socket;
+ }
+
+ rc = getsockname(socket_vector[1], &sa[1].addr, &cb);
+ Assert(rc != SOCKET_ERROR);
+ if (rc == SOCKET_ERROR)
+ {
+ goto close_socket;
+ }
+
+ rc = connect(socket_vector[0], &sa[1].addr, cb);
+ Assert(rc != SOCKET_ERROR);
+ if (rc == SOCKET_ERROR)
+ {
+ goto close_socket;
+ }
+
+ rc = connect(socket_vector[1], &sa[0].addr, cb);
+ Assert(rc != SOCKET_ERROR);
+ if (rc == SOCKET_ERROR)
+ {
+ goto close_socket;
+ }
+ }
+
+ for (int i = 0; i < 2; ++i) {
+ SOCKET s = socket_vector[i];
+ u_long mode = 1;
+
+ int status = ioctlsocket(s, FIONBIO, &mode);
+ if (status == SOCKET_ERROR) {
+ LogRel(("FIONBIO: %R[sockerr]\n", WSAGetLastError()));
+ }
+ }
+
+ LogFlowFuncLeaveRC(VINF_SUCCESS);
+ return VINF_SUCCESS;
+
+close_socket:
+ if (listener != INVALID_SOCKET)
+ closesocket(listener);
+
+ if (socket_vector[0] != INVALID_SOCKET)
+ closesocket(socket_vector[0]);
+
+ if (socket_vector[1] != INVALID_SOCKET)
+ closesocket(socket_vector[1]);
+
+ LogFlowFuncLeaveRC(VERR_INTERNAL_ERROR);
+ return VERR_INTERNAL_ERROR;
+}
diff --git a/src/VBox/NetworkServices/NAT/VBoxNetLwipNAT.cpp b/src/VBox/NetworkServices/NAT/VBoxNetLwipNAT.cpp
new file mode 100644
index 00000000..019f5f0f
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/VBoxNetLwipNAT.cpp
@@ -0,0 +1,1504 @@
+/* $Id: VBoxNetLwipNAT.cpp $ */
+/** @file
+ * VBoxNetNAT - NAT Service for connecting to IntNet.
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+/* Must be included before winutils.h (lwip/def.h), otherwise Windows build breaks. */
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+
+#include <VBox/com/assert.h>
+#include <VBox/com/com.h>
+#include <VBox/com/listeners.h>
+#include <VBox/com/string.h>
+#include <VBox/com/Guid.h>
+#include <VBox/com/array.h>
+#include <VBox/com/ErrorInfo.h>
+#include <VBox/com/errorprint.h>
+#include <VBox/com/VirtualBox.h>
+
+#include <iprt/net.h>
+#include <iprt/initterm.h>
+#include <iprt/alloca.h>
+#ifndef RT_OS_WINDOWS
+# include <arpa/inet.h>
+#endif
+#include <iprt/err.h>
+#include <iprt/time.h>
+#include <iprt/timer.h>
+#include <iprt/thread.h>
+#include <iprt/stream.h>
+#include <iprt/path.h>
+#include <iprt/param.h>
+#include <iprt/pipe.h>
+#include <iprt/getopt.h>
+#include <iprt/string.h>
+#include <iprt/mem.h>
+#include <iprt/message.h>
+#include <iprt/req.h>
+#include <iprt/file.h>
+#include <iprt/semaphore.h>
+#include <iprt/cpp/utils.h>
+#include <VBox/log.h>
+
+#include <VBox/sup.h>
+#include <VBox/intnet.h>
+#include <VBox/intnetinline.h>
+#include <VBox/vmm/pdmnetinline.h>
+#include <VBox/vmm/vmm.h>
+#include <VBox/version.h>
+
+#ifndef RT_OS_WINDOWS
+# include <sys/poll.h>
+# include <sys/socket.h>
+# include <netinet/in.h>
+# ifdef RT_OS_LINUX
+# include <linux/icmp.h> /* ICMP_FILTER */
+# endif
+# include <netinet/icmp6.h>
+#endif
+
+#include <map>
+#include <vector>
+#include <string>
+
+#include <stdio.h>
+
+#include "../NetLib/VBoxNetLib.h"
+#include "../NetLib/VBoxNetBaseService.h"
+#include "../NetLib/utils.h"
+#include "VBoxLwipCore.h"
+
+extern "C"
+{
+/* bunch of LWIP headers */
+#include "lwip/sys.h"
+#include "lwip/pbuf.h"
+#include "lwip/netif.h"
+#include "lwip/ethip6.h"
+#include "lwip/nd6.h" // for proxy_na_hook
+#include "lwip/mld6.h"
+#include "lwip/tcpip.h"
+#include "netif/etharp.h"
+
+#include "proxy.h"
+#include "pxremap.h"
+#include "portfwd.h"
+}
+
+
+#if defined(VBOX_RAWSOCK_DEBUG_HELPER) \
+ && (defined(VBOX_WITH_HARDENING) \
+ || defined(RT_OS_WINDOWS) \
+ || defined(RT_OS_DARWIN))
+# error Have you forgotten to turn off VBOX_RAWSOCK_DEBUG_HELPER?
+#endif
+
+#ifdef VBOX_RAWSOCK_DEBUG_HELPER
+extern "C" int getrawsock(int type);
+#endif
+
+#include "../NetLib/VBoxPortForwardString.h"
+
+static RTGETOPTDEF g_aGetOptDef[] =
+{
+ { "--port-forward4", 'p', RTGETOPT_REQ_STRING },
+ { "--port-forward6", 'P', RTGETOPT_REQ_STRING }
+};
+
+typedef struct NATSEVICEPORTFORWARDRULE
+{
+ PORTFORWARDRULE Pfr;
+ fwspec FWSpec;
+} NATSEVICEPORTFORWARDRULE, *PNATSEVICEPORTFORWARDRULE;
+
+typedef std::vector<NATSEVICEPORTFORWARDRULE> VECNATSERVICEPF;
+typedef VECNATSERVICEPF::iterator ITERATORNATSERVICEPF;
+typedef VECNATSERVICEPF::const_iterator CITERATORNATSERVICEPF;
+
+static int fetchNatPortForwardRules(const ComNatPtr&, bool, VECNATSERVICEPF&);
+
+static int vboxNetNATLogInit(int argc, char **argv);
+
+
+class VBoxNetLwipNAT: public VBoxNetBaseService, public NATNetworkEventAdapter
+{
+ friend class NATNetworkListener;
+ public:
+ VBoxNetLwipNAT(SOCKET icmpsock4, SOCKET icmpsock6);
+ virtual ~VBoxNetLwipNAT();
+ void usage(){ /** @todo should be implemented */ };
+ int run();
+ virtual int init(void);
+ virtual int parseOpt(int rc, const RTGETOPTUNION& getOptVal);
+ /* VBoxNetNAT always needs Main */
+ virtual bool isMainNeeded() const { return true; }
+ virtual int processFrame(void *, size_t);
+ virtual int processGSO(PCPDMNETWORKGSO, size_t);
+ virtual int processUDP(void *, size_t) { return VERR_IGNORED; }
+
+ private:
+ struct proxy_options m_ProxyOptions;
+ struct sockaddr_in m_src4;
+ struct sockaddr_in6 m_src6;
+ /**
+ * place for registered local interfaces.
+ */
+ ip4_lomap m_lo2off[10];
+ ip4_lomap_desc m_loOptDescriptor;
+
+ uint16_t m_u16Mtu;
+ netif m_LwipNetIf;
+
+ /* Our NAT network descriptor in Main */
+ ComPtr<INATNetwork> m_net;
+ ComPtr<IHost> m_host;
+
+ ComNatListenerPtr m_NatListener;
+ ComNatListenerPtr m_VBoxListener;
+ ComNatListenerPtr m_VBoxClientListener;
+ static INTNETSEG aXmitSeg[64];
+
+ HRESULT HandleEvent(VBoxEventType_T aEventType, IEvent *pEvent);
+
+ const char **getHostNameservers();
+
+ /* Only for debug needs, by default NAT service should load rules from SVC
+ * on startup, and then on sync them on events.
+ */
+ bool fDontLoadRulesOnStartup;
+ static DECLCALLBACK(void) onLwipTcpIpInit(void *arg);
+ static DECLCALLBACK(void) onLwipTcpIpFini(void *arg);
+ static err_t netifInit(netif *pNetif);
+ static err_t netifLinkoutput(netif *pNetif, pbuf *pBuf);
+ /* static int intNetThreadRecv(RTTHREAD, void *); - unused */
+
+ VECNATSERVICEPF m_vecPortForwardRule4;
+ VECNATSERVICEPF m_vecPortForwardRule6;
+
+ static int natServicePfRegister(NATSEVICEPORTFORWARDRULE& natServicePf);
+ static int natServiceProcessRegisteredPf(VECNATSERVICEPF& vecPf);
+};
+
+
+static VBoxNetLwipNAT *g_pLwipNat;
+INTNETSEG VBoxNetLwipNAT::aXmitSeg[64];
+
+/**
+ * @note: this work on Event thread.
+ */
+HRESULT VBoxNetLwipNAT::HandleEvent(VBoxEventType_T aEventType, IEvent *pEvent)
+{
+ HRESULT hrc = S_OK;
+ switch (aEventType)
+ {
+ case VBoxEventType_OnNATNetworkSetting:
+ {
+ ComPtr<INATNetworkSettingEvent> pSettingsEvent(pEvent);
+
+ com::Bstr networkName;
+ hrc = pSettingsEvent->COMGETTER(NetworkName)(networkName.asOutParam());
+ AssertComRCReturn(hrc, hrc);
+ if (networkName.compare(getNetworkName().c_str()))
+ break; /* change not for our network */
+
+ // XXX: only handle IPv6 default route for now
+ if (!m_ProxyOptions.ipv6_enabled)
+ break;
+
+ BOOL fIPv6DefaultRoute = FALSE;
+ hrc = pSettingsEvent->COMGETTER(AdvertiseDefaultIPv6RouteEnabled)(&fIPv6DefaultRoute);
+ AssertComRCReturn(hrc, hrc);
+
+ if (m_ProxyOptions.ipv6_defroute == fIPv6DefaultRoute)
+ break;
+
+ m_ProxyOptions.ipv6_defroute = fIPv6DefaultRoute;
+ tcpip_callback_with_block(proxy_rtadvd_do_quick, &m_LwipNetIf, 0);
+ break;
+ }
+
+ case VBoxEventType_OnNATNetworkPortForward:
+ {
+ ComPtr<INATNetworkPortForwardEvent> pForwardEvent = pEvent;
+
+ com::Bstr networkName;
+ hrc = pForwardEvent->COMGETTER(NetworkName)(networkName.asOutParam());
+ AssertComRCReturn(hrc, hrc);
+ if (networkName.compare(getNetworkName().c_str()))
+ break; /* change not for our network */
+
+ BOOL fCreateFW;
+ hrc = pForwardEvent->COMGETTER(Create)(&fCreateFW);
+ AssertComRCReturn(hrc, hrc);
+
+ BOOL fIPv6FW;
+ hrc = pForwardEvent->COMGETTER(Ipv6)(&fIPv6FW);
+ AssertComRCReturn(hrc, hrc);
+
+ com::Bstr name;
+ hrc = pForwardEvent->COMGETTER(Name)(name.asOutParam());
+ AssertComRCReturn(hrc, hrc);
+
+ NATProtocol_T proto = NATProtocol_TCP;
+ hrc = pForwardEvent->COMGETTER(Proto)(&proto);
+ AssertComRCReturn(hrc, hrc);
+
+ com::Bstr strHostAddr;
+ hrc = pForwardEvent->COMGETTER(HostIp)(strHostAddr.asOutParam());
+ AssertComRCReturn(hrc, hrc);
+
+ LONG lHostPort;
+ hrc = pForwardEvent->COMGETTER(HostPort)(&lHostPort);
+ AssertComRCReturn(hrc, hrc);
+
+ com::Bstr strGuestAddr;
+ hrc = pForwardEvent->COMGETTER(GuestIp)(strGuestAddr.asOutParam());
+ AssertComRCReturn(hrc, hrc);
+
+ LONG lGuestPort;
+ hrc = pForwardEvent->COMGETTER(GuestPort)(&lGuestPort);
+ AssertComRCReturn(hrc, hrc);
+
+ VECNATSERVICEPF& rules = fIPv6FW ? m_vecPortForwardRule6
+ : m_vecPortForwardRule4;
+
+ NATSEVICEPORTFORWARDRULE r;
+ RT_ZERO(r);
+
+ r.Pfr.fPfrIPv6 = fIPv6FW;
+
+ switch (proto)
+ {
+ case NATProtocol_TCP:
+ r.Pfr.iPfrProto = IPPROTO_TCP;
+ break;
+ case NATProtocol_UDP:
+ r.Pfr.iPfrProto = IPPROTO_UDP;
+ break;
+
+ default:
+ LogRel(("Event: %s %s port-forwarding rule \"%s\": invalid protocol %d\n",
+ fCreateFW ? "Add" : "Remove",
+ fIPv6FW ? "IPv6" : "IPv4",
+ com::Utf8Str(name).c_str(),
+ (int)proto));
+ goto port_forward_done;
+ }
+
+ LogRel(("Event: %s %s port-forwarding rule \"%s\": %s %s%s%s:%d -> %s%s%s:%d\n",
+ fCreateFW ? "Add" : "Remove",
+ fIPv6FW ? "IPv6" : "IPv4",
+ com::Utf8Str(name).c_str(),
+ proto == NATProtocol_TCP ? "TCP" : "UDP",
+ /* from */
+ fIPv6FW ? "[" : "",
+ com::Utf8Str(strHostAddr).c_str(),
+ fIPv6FW ? "]" : "",
+ lHostPort,
+ /* to */
+ fIPv6FW ? "[" : "",
+ com::Utf8Str(strGuestAddr).c_str(),
+ fIPv6FW ? "]" : "",
+ lGuestPort));
+
+ if (name.length() > sizeof(r.Pfr.szPfrName))
+ {
+ hrc = E_INVALIDARG;
+ goto port_forward_done;
+ }
+
+ RTStrPrintf(r.Pfr.szPfrName, sizeof(r.Pfr.szPfrName),
+ "%s", com::Utf8Str(name).c_str());
+
+ RTStrPrintf(r.Pfr.szPfrHostAddr, sizeof(r.Pfr.szPfrHostAddr),
+ "%s", com::Utf8Str(strHostAddr).c_str());
+
+ /* XXX: limits should be checked */
+ r.Pfr.u16PfrHostPort = (uint16_t)lHostPort;
+
+ RTStrPrintf(r.Pfr.szPfrGuestAddr, sizeof(r.Pfr.szPfrGuestAddr),
+ "%s", com::Utf8Str(strGuestAddr).c_str());
+
+ /* XXX: limits should be checked */
+ r.Pfr.u16PfrGuestPort = (uint16_t)lGuestPort;
+
+ if (fCreateFW) /* Addition */
+ {
+ int rc = natServicePfRegister(r);
+ if (RT_SUCCESS(rc))
+ rules.push_back(r);
+ }
+ else /* Deletion */
+ {
+ ITERATORNATSERVICEPF it;
+ for (it = rules.begin(); it != rules.end(); ++it)
+ {
+ /* compare */
+ NATSEVICEPORTFORWARDRULE &natFw = *it;
+ if ( natFw.Pfr.iPfrProto == r.Pfr.iPfrProto
+ && natFw.Pfr.u16PfrHostPort == r.Pfr.u16PfrHostPort
+ && strncmp(natFw.Pfr.szPfrHostAddr, r.Pfr.szPfrHostAddr, INET6_ADDRSTRLEN) == 0
+ && natFw.Pfr.u16PfrGuestPort == r.Pfr.u16PfrGuestPort
+ && strncmp(natFw.Pfr.szPfrGuestAddr, r.Pfr.szPfrGuestAddr, INET6_ADDRSTRLEN) == 0)
+ {
+ fwspec *pFwCopy = (fwspec *)RTMemDup(&natFw.FWSpec, sizeof(natFw.FWSpec));
+ if (pFwCopy)
+ {
+ int status = portfwd_rule_del(pFwCopy);
+ if (status == 0)
+ rules.erase(it); /* (pFwCopy is owned by lwip thread now.) */
+ else
+ RTMemFree(pFwCopy);
+ }
+ break;
+ }
+ } /* loop over vector elements */
+ } /* condition add or delete */
+ port_forward_done:
+ /* clean up strings */
+ name.setNull();
+ strHostAddr.setNull();
+ strGuestAddr.setNull();
+ break;
+ }
+
+ case VBoxEventType_OnHostNameResolutionConfigurationChange:
+ {
+ const char **ppcszNameServers = getHostNameservers();
+ err_t error;
+
+ error = tcpip_callback_with_block(pxdns_set_nameservers,
+ ppcszNameServers,
+ /* :block */ 0);
+ if (error != ERR_OK && ppcszNameServers != NULL)
+ RTMemFree(ppcszNameServers);
+ break;
+ }
+
+ case VBoxEventType_OnNATNetworkStartStop:
+ {
+ ComPtr <INATNetworkStartStopEvent> pStartStopEvent = pEvent;
+
+ com::Bstr networkName;
+ hrc = pStartStopEvent->COMGETTER(NetworkName)(networkName.asOutParam());
+ AssertComRCReturn(hrc, hrc);
+ if (networkName.compare(getNetworkName().c_str()))
+ break; /* change not for our network */
+
+ BOOL fStart = TRUE;
+ hrc = pStartStopEvent->COMGETTER(StartEvent)(&fStart);
+ AssertComRCReturn(hrc, hrc);
+
+ if (!fStart)
+ shutdown();
+ break;
+ }
+
+ case VBoxEventType_OnVBoxSVCAvailabilityChanged:
+ {
+ LogRel(("VBoxSVC became unavailable, exiting.\n"));
+ shutdown();
+ break;
+ }
+
+ default: break; /* Shut up MSC. */
+ }
+ return hrc;
+}
+
+
+/*static*/ DECLCALLBACK(void) VBoxNetLwipNAT::onLwipTcpIpInit(void *arg)
+{
+ AssertPtrReturnVoid(arg);
+ VBoxNetLwipNAT *pNat = static_cast<VBoxNetLwipNAT *>(arg);
+
+ HRESULT hrc = com::Initialize();
+ Assert(!FAILED(hrc)); NOREF(hrc);
+
+ proxy_arp_hook = pxremap_proxy_arp;
+ proxy_ip4_divert_hook = pxremap_ip4_divert;
+
+ proxy_na_hook = pxremap_proxy_na;
+ proxy_ip6_divert_hook = pxremap_ip6_divert;
+
+ /* lwip thread */
+ RTNETADDRIPV4 network;
+ RTNETADDRIPV4 address = g_pLwipNat->getIpv4Address();
+ RTNETADDRIPV4 netmask = g_pLwipNat->getIpv4Netmask();
+ network.u = address.u & netmask.u;
+
+ ip_addr LwipIpAddr, LwipIpNetMask, LwipIpNetwork;
+
+ memcpy(&LwipIpAddr, &address, sizeof(ip_addr));
+ memcpy(&LwipIpNetMask, &netmask, sizeof(ip_addr));
+ memcpy(&LwipIpNetwork, &network, sizeof(ip_addr));
+
+ netif *pNetif = netif_add(&g_pLwipNat->m_LwipNetIf /* Lwip Interface */,
+ &LwipIpAddr /* IP address*/,
+ &LwipIpNetMask /* Network mask */,
+ &LwipIpAddr /* gateway address, @todo: is self IP acceptable? */,
+ g_pLwipNat /* state */,
+ VBoxNetLwipNAT::netifInit /* netif_init_fn */,
+ tcpip_input /* netif_input_fn */);
+
+ AssertPtrReturnVoid(pNetif);
+
+ LogRel(("netif %c%c%d: mac %RTmac\n",
+ pNetif->name[0], pNetif->name[1], pNetif->num,
+ pNetif->hwaddr));
+ LogRel(("netif %c%c%d: inet %RTnaipv4 netmask %RTnaipv4\n",
+ pNetif->name[0], pNetif->name[1], pNetif->num,
+ pNetif->ip_addr, pNetif->netmask));
+ for (int i = 0; i < LWIP_IPV6_NUM_ADDRESSES; ++i) {
+ if (!ip6_addr_isinvalid(netif_ip6_addr_state(pNetif, i))) {
+ LogRel(("netif %c%c%d: inet6 %RTnaipv6\n",
+ pNetif->name[0], pNetif->name[1], pNetif->num,
+ netif_ip6_addr(pNetif, i)));
+ }
+ }
+
+ netif_set_up(pNetif);
+ netif_set_link_up(pNetif);
+
+ if (pNat->m_ProxyOptions.ipv6_enabled) {
+ /*
+ * XXX: lwIP currently only ever calls mld6_joingroup() in
+ * nd6_tmr() for fresh tentative addresses, which is a wrong place
+ * to do it - but I'm not keen on fixing this properly for now
+ * (with correct handling of interface up and down transitions,
+ * etc). So stick it here as a kludge.
+ */
+ for (int i = 0; i <= 1; ++i) {
+ ip6_addr_t *paddr = netif_ip6_addr(pNetif, i);
+
+ ip6_addr_t solicited_node_multicast_address;
+ ip6_addr_set_solicitednode(&solicited_node_multicast_address,
+ paddr->addr[3]);
+ mld6_joingroup(paddr, &solicited_node_multicast_address);
+ }
+
+ /*
+ * XXX: We must join the solicited-node multicast for the
+ * addresses we do IPv6 NA-proxy for. We map IPv6 loopback to
+ * proxy address + 1. We only need the low 24 bits, and those are
+ * fixed.
+ */
+ {
+ ip6_addr_t solicited_node_multicast_address;
+
+ ip6_addr_set_solicitednode(&solicited_node_multicast_address,
+ /* last 24 bits of the address */
+ PP_HTONL(0x00000002));
+ mld6_netif_joingroup(pNetif, &solicited_node_multicast_address);
+ }
+ }
+
+ proxy_init(&g_pLwipNat->m_LwipNetIf, &g_pLwipNat->m_ProxyOptions);
+
+ natServiceProcessRegisteredPf(g_pLwipNat->m_vecPortForwardRule4);
+ natServiceProcessRegisteredPf(g_pLwipNat->m_vecPortForwardRule6);
+}
+
+
+/*static*/ DECLCALLBACK(void) VBoxNetLwipNAT::onLwipTcpIpFini(void* arg)
+{
+ AssertPtrReturnVoid(arg);
+
+ /* XXX: proxy finalization */
+ netif_set_link_down(&g_pLwipNat->m_LwipNetIf);
+ netif_set_down(&g_pLwipNat->m_LwipNetIf);
+ netif_remove(&g_pLwipNat->m_LwipNetIf);
+
+}
+
+/*
+ * Callback for netif_add() to initialize the interface.
+ */
+/*static*/ err_t VBoxNetLwipNAT::netifInit(netif *pNetif)
+{
+ err_t rcLwip = ERR_OK;
+
+ AssertPtrReturn(pNetif, ERR_ARG);
+
+ VBoxNetLwipNAT *pNat = static_cast<VBoxNetLwipNAT *>(pNetif->state);
+ AssertPtrReturn(pNat, ERR_ARG);
+
+ LogFlowFunc(("ENTER: pNetif[%c%c%d]\n", pNetif->name[0], pNetif->name[1], pNetif->num));
+ /* validity */
+ AssertReturn( pNetif->name[0] == 'N'
+ && pNetif->name[1] == 'T', ERR_ARG);
+
+
+ pNetif->hwaddr_len = sizeof(RTMAC);
+ RTMAC mac = g_pLwipNat->getMacAddress();
+ memcpy(pNetif->hwaddr, &mac, sizeof(RTMAC));
+
+ pNat->m_u16Mtu = 1500; // XXX: FIXME
+ pNetif->mtu = pNat->m_u16Mtu;
+
+ pNetif->flags = NETIF_FLAG_BROADCAST
+ | NETIF_FLAG_ETHARP /* Don't bother driver with ARP and let Lwip resolve ARP handling */
+ | NETIF_FLAG_ETHERNET; /* Lwip works with ethernet too */
+
+ pNetif->linkoutput = netifLinkoutput; /* ether-level-pipe */
+ pNetif->output = etharp_output; /* ip-pipe */
+
+ if (pNat->m_ProxyOptions.ipv6_enabled) {
+ pNetif->output_ip6 = ethip6_output;
+
+ /* IPv6 link-local address in slot 0 */
+ netif_create_ip6_linklocal_address(pNetif, /* :from_mac_48bit */ 1);
+ netif_ip6_addr_set_state(pNetif, 0, IP6_ADDR_PREFERRED); // skip DAD
+
+ /*
+ * RFC 4193 Locally Assigned Global ID (ULA) in slot 1
+ * [fd17:625c:f037:XXXX::1] where XXXX, 16 bit Subnet ID, are two
+ * bytes from the middle of the IPv4 address, e.g. :dead: for
+ * 10.222.173.1
+ */
+ u8_t nethi = ip4_addr2(&pNetif->ip_addr);
+ u8_t netlo = ip4_addr3(&pNetif->ip_addr);
+
+ ip6_addr_t *paddr = netif_ip6_addr(pNetif, 1);
+ IP6_ADDR(paddr, 0, 0xFD, 0x17, 0x62, 0x5C);
+ IP6_ADDR(paddr, 1, 0xF0, 0x37, nethi, netlo);
+ IP6_ADDR(paddr, 2, 0x00, 0x00, 0x00, 0x00);
+ IP6_ADDR(paddr, 3, 0x00, 0x00, 0x00, 0x01);
+ netif_ip6_addr_set_state(pNetif, 1, IP6_ADDR_PREFERRED);
+
+#if LWIP_IPV6_SEND_ROUTER_SOLICIT
+ pNetif->rs_count = 0;
+#endif
+ }
+
+ LogFlowFunc(("LEAVE: %d\n", rcLwip));
+ return rcLwip;
+}
+
+
+/*static*/ err_t VBoxNetLwipNAT::netifLinkoutput(netif *pNetif, pbuf *pPBuf)
+{
+ AssertPtrReturn(pNetif, ERR_ARG);
+ AssertPtrReturn(pPBuf, ERR_ARG);
+
+ VBoxNetLwipNAT *self = static_cast<VBoxNetLwipNAT *>(pNetif->state);
+ AssertPtrReturn(self, ERR_IF);
+ AssertReturn(self == g_pLwipNat, ERR_ARG);
+
+ LogFlowFunc(("ENTER: pNetif[%c%c%d], pPbuf:%p\n",
+ pNetif->name[0],
+ pNetif->name[1],
+ pNetif->num,
+ pPBuf));
+
+ RT_ZERO(VBoxNetLwipNAT::aXmitSeg);
+
+ size_t idx = 0;
+ for (struct pbuf *q = pPBuf; q != NULL; q = q->next, ++idx)
+ {
+ AssertReturn(idx < RT_ELEMENTS(VBoxNetLwipNAT::aXmitSeg), ERR_MEM);
+
+#if ETH_PAD_SIZE
+ if (q == pPBuf)
+ {
+ VBoxNetLwipNAT::aXmitSeg[idx].pv = (uint8_t *)q->payload + ETH_PAD_SIZE;
+ VBoxNetLwipNAT::aXmitSeg[idx].cb = q->len - ETH_PAD_SIZE;
+ }
+ else
+#endif
+ {
+ VBoxNetLwipNAT::aXmitSeg[idx].pv = q->payload;
+ VBoxNetLwipNAT::aXmitSeg[idx].cb = q->len;
+ }
+ }
+
+ int rc = self->sendBufferOnWire(VBoxNetLwipNAT::aXmitSeg, idx,
+ pPBuf->tot_len - ETH_PAD_SIZE);
+ AssertRCReturn(rc, ERR_IF);
+
+ self->flushWire();
+
+ LogFlowFunc(("LEAVE: %d\n", ERR_OK));
+ return ERR_OK;
+}
+
+
+VBoxNetLwipNAT::VBoxNetLwipNAT(SOCKET icmpsock4, SOCKET icmpsock6) : VBoxNetBaseService("VBoxNetNAT", "nat-network")
+{
+ LogFlowFuncEnter();
+
+ m_ProxyOptions.ipv6_enabled = 0;
+ m_ProxyOptions.ipv6_defroute = 0;
+ m_ProxyOptions.icmpsock4 = icmpsock4;
+ m_ProxyOptions.icmpsock6 = icmpsock6;
+ m_ProxyOptions.tftp_root = NULL;
+ m_ProxyOptions.src4 = NULL;
+ m_ProxyOptions.src6 = NULL;
+ RT_ZERO(m_src4);
+ RT_ZERO(m_src6);
+ m_src4.sin_family = AF_INET;
+ m_src6.sin6_family = AF_INET6;
+#if HAVE_SA_LEN
+ m_src4.sin_len = sizeof(m_src4);
+ m_src6.sin6_len = sizeof(m_src6);
+#endif
+ m_ProxyOptions.nameservers = NULL;
+
+ m_LwipNetIf.name[0] = 'N';
+ m_LwipNetIf.name[1] = 'T';
+
+ RTMAC mac;
+ mac.au8[0] = 0x52;
+ mac.au8[1] = 0x54;
+ mac.au8[2] = 0;
+ mac.au8[3] = 0x12;
+ mac.au8[4] = 0x35;
+ mac.au8[5] = 0;
+ setMacAddress(mac);
+
+ RTNETADDRIPV4 address;
+ address.u = RT_MAKE_U32_FROM_U8( 10, 0, 2, 2); // NB: big-endian
+ setIpv4Address(address);
+
+ address.u = RT_H2N_U32_C(0xffffff00);
+ setIpv4Netmask(address);
+
+ fDontLoadRulesOnStartup = false;
+
+ for(unsigned int i = 0; i < RT_ELEMENTS(g_aGetOptDef); ++i)
+ addCommandLineOption(&g_aGetOptDef[i]);
+
+ LogFlowFuncLeave();
+}
+
+
+VBoxNetLwipNAT::~VBoxNetLwipNAT()
+{
+ if (m_ProxyOptions.tftp_root)
+ {
+ RTStrFree((char *)m_ProxyOptions.tftp_root);
+ m_ProxyOptions.tftp_root = NULL;
+ }
+ if (m_ProxyOptions.nameservers)
+ {
+ const char **pv = m_ProxyOptions.nameservers;
+ while (*pv)
+ {
+ RTStrFree((char*)*pv);
+ pv++;
+ }
+ RTMemFree(m_ProxyOptions.nameservers);
+ m_ProxyOptions.nameservers = NULL;
+ }
+}
+
+
+/*static*/ int VBoxNetLwipNAT::natServicePfRegister(NATSEVICEPORTFORWARDRULE& natPf)
+{
+ int lrc;
+
+ int sockFamily = (natPf.Pfr.fPfrIPv6 ? PF_INET6 : PF_INET);
+ int socketSpec;
+ switch(natPf.Pfr.iPfrProto)
+ {
+ case IPPROTO_TCP:
+ socketSpec = SOCK_STREAM;
+ break;
+ case IPPROTO_UDP:
+ socketSpec = SOCK_DGRAM;
+ break;
+ default:
+ return VERR_IGNORED;
+ }
+
+ const char *pszHostAddr = natPf.Pfr.szPfrHostAddr;
+ if (pszHostAddr[0] == '\0')
+ {
+ if (sockFamily == PF_INET)
+ pszHostAddr = "0.0.0.0";
+ else
+ pszHostAddr = "::";
+ }
+
+ lrc = fwspec_set(&natPf.FWSpec,
+ sockFamily,
+ socketSpec,
+ pszHostAddr,
+ natPf.Pfr.u16PfrHostPort,
+ natPf.Pfr.szPfrGuestAddr,
+ natPf.Pfr.u16PfrGuestPort);
+ if (lrc != 0)
+ return VERR_IGNORED;
+
+ fwspec *pFwCopy = (fwspec *)RTMemDup(&natPf.FWSpec, sizeof(natPf.FWSpec));
+ if (pFwCopy)
+ {
+ lrc = portfwd_rule_add(pFwCopy);
+ if (lrc == 0)
+ return VINF_SUCCESS; /* (pFwCopy is owned by lwip thread now.) */
+ RTMemFree(pFwCopy);
+ }
+ else
+ LogRel(("Unable to allocate memory for %s rule \"%s\"\n",
+ natPf.Pfr.fPfrIPv6 ? "IPv6" : "IPv4",
+ natPf.Pfr.szPfrName));
+ return VERR_IGNORED;
+}
+
+
+/*static*/ int VBoxNetLwipNAT::natServiceProcessRegisteredPf(VECNATSERVICEPF& vecRules)
+{
+ ITERATORNATSERVICEPF it;
+ for (it = vecRules.begin(); it != vecRules.end(); ++it)
+ {
+ NATSEVICEPORTFORWARDRULE &natPf = *it;
+
+ LogRel(("Loading %s port-forwarding rule \"%s\": %s %s%s%s:%d -> %s%s%s:%d\n",
+ natPf.Pfr.fPfrIPv6 ? "IPv6" : "IPv4",
+ natPf.Pfr.szPfrName,
+ natPf.Pfr.iPfrProto == IPPROTO_TCP ? "TCP" : "UDP",
+ /* from */
+ natPf.Pfr.fPfrIPv6 ? "[" : "",
+ natPf.Pfr.szPfrHostAddr,
+ natPf.Pfr.fPfrIPv6 ? "]" : "",
+ natPf.Pfr.u16PfrHostPort,
+ /* to */
+ natPf.Pfr.fPfrIPv6 ? "[" : "",
+ natPf.Pfr.szPfrGuestAddr,
+ natPf.Pfr.fPfrIPv6 ? "]" : "",
+ natPf.Pfr.u16PfrGuestPort));
+
+ natServicePfRegister(natPf);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Main thread. Starts also the LWIP thread.
+ */
+int VBoxNetLwipNAT::init()
+{
+ LogFlowFuncEnter();
+
+ /* virtualbox initialized in super class */
+ int rc = ::VBoxNetBaseService::init();
+ AssertRCReturn(rc, rc);
+
+ std::string networkName = getNetworkName();
+ rc = findNatNetwork(virtualbox, networkName, m_net);
+ AssertRCReturn(rc, rc);
+
+ {
+ ComEventTypeArray eventTypes;
+ eventTypes.push_back(VBoxEventType_OnNATNetworkPortForward);
+ eventTypes.push_back(VBoxEventType_OnNATNetworkSetting);
+ rc = createNatListener(m_NatListener, virtualbox, this, eventTypes);
+ AssertRCReturn(rc, rc);
+ }
+
+
+ // resolver changes are reported on vbox but are retrieved from
+ // host so stash a pointer for future lookups
+ HRESULT hrc = virtualbox->COMGETTER(Host)(m_host.asOutParam());
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+
+ {
+ ComEventTypeArray eventTypes;
+ eventTypes.push_back(VBoxEventType_OnHostNameResolutionConfigurationChange);
+ eventTypes.push_back(VBoxEventType_OnNATNetworkStartStop);
+ rc = createNatListener(m_VBoxListener, virtualbox, this, eventTypes);
+ AssertRCReturn(rc, rc);
+ }
+
+ {
+ ComEventTypeArray eventTypes;
+ eventTypes.push_back(VBoxEventType_OnVBoxSVCAvailabilityChanged);
+ rc = createClientListener(m_VBoxClientListener, virtualboxClient, this, eventTypes);
+ AssertRCReturn(rc, rc);
+ }
+
+ BOOL fIPv6Enabled = FALSE;
+ hrc = m_net->COMGETTER(IPv6Enabled)(&fIPv6Enabled);
+ AssertComRCReturn(hrc, VERR_NOT_FOUND);
+
+ BOOL fIPv6DefaultRoute = FALSE;
+ if (fIPv6Enabled)
+ {
+ hrc = m_net->COMGETTER(AdvertiseDefaultIPv6RouteEnabled)(&fIPv6DefaultRoute);
+ AssertComRCReturn(hrc, VERR_NOT_FOUND);
+ }
+
+ m_ProxyOptions.ipv6_enabled = fIPv6Enabled;
+ m_ProxyOptions.ipv6_defroute = fIPv6DefaultRoute;
+
+
+ /*
+ * Bind outgoing connections to the specified IP.
+ */
+ com::Bstr bstrSourceIpX;
+
+ /* IPv4 */
+ com::Bstr bstrSourceIp4Key = com::BstrFmt("NAT/%s/SourceIp4", networkName.c_str());
+ hrc = virtualbox->GetExtraData(bstrSourceIp4Key.raw(), bstrSourceIpX.asOutParam());
+ if (SUCCEEDED(hrc) && bstrSourceIpX.isNotEmpty())
+ {
+ RTNETADDRIPV4 addr;
+ rc = RTNetStrToIPv4Addr(com::Utf8Str(bstrSourceIpX).c_str(), &addr);
+ if (RT_SUCCESS(rc))
+ {
+ m_src4.sin_addr.s_addr = addr.u;
+ m_ProxyOptions.src4 = &m_src4;
+
+ LogRel(("Will use %RTnaipv4 as IPv4 source address\n",
+ m_src4.sin_addr.s_addr));
+ }
+ else
+ {
+ LogRel(("Failed to parse \"%s\" IPv4 source address specification\n",
+ com::Utf8Str(bstrSourceIpX).c_str()));
+ }
+
+ bstrSourceIpX.setNull();
+ }
+
+ /* IPv6 */
+ com::Bstr bstrSourceIp6Key = com::BstrFmt("NAT/%s/SourceIp6", networkName.c_str());
+ hrc = virtualbox->GetExtraData(bstrSourceIp6Key.raw(), bstrSourceIpX.asOutParam());
+ if (SUCCEEDED(hrc) && bstrSourceIpX.isNotEmpty())
+ {
+ RTNETADDRIPV6 addr;
+ char *pszZone = NULL;
+ rc = RTNetStrToIPv6Addr(com::Utf8Str(bstrSourceIpX).c_str(), &addr, &pszZone);
+ if (RT_SUCCESS(rc))
+ {
+ memcpy(&m_src6.sin6_addr, &addr, sizeof(addr));
+ m_ProxyOptions.src6 = &m_src6;
+
+ LogRel(("Will use %RTnaipv6 as IPv6 source address\n",
+ &m_src6.sin6_addr));
+ }
+ else
+ {
+ LogRel(("Failed to parse \"%s\" IPv6 source address specification\n",
+ com::Utf8Str(bstrSourceIpX).c_str()));
+ }
+
+ bstrSourceIpX.setNull();
+ }
+
+
+ if (!fDontLoadRulesOnStartup)
+ {
+ fetchNatPortForwardRules(m_net, false, m_vecPortForwardRule4);
+ fetchNatPortForwardRules(m_net, true, m_vecPortForwardRule6);
+ } /* if (!fDontLoadRulesOnStartup) */
+
+ AddressToOffsetMapping tmp;
+ rc = localMappings(m_net, tmp);
+ if (RT_SUCCESS(rc) && !tmp.empty())
+ {
+ unsigned long i = 0;
+ for (AddressToOffsetMapping::iterator it = tmp.begin();
+ it != tmp.end() && i < RT_ELEMENTS(m_lo2off);
+ ++it, ++i)
+ {
+ ip4_addr_set_u32(&m_lo2off[i].loaddr, it->first.u);
+ m_lo2off[i].off = it->second;
+ }
+
+ m_loOptDescriptor.lomap = m_lo2off;
+ m_loOptDescriptor.num_lomap = i;
+ m_ProxyOptions.lomap_desc = &m_loOptDescriptor;
+ }
+
+ com::Bstr bstr;
+ hrc = virtualbox->COMGETTER(HomeFolder)(bstr.asOutParam());
+ AssertComRCReturn(hrc, VERR_NOT_FOUND);
+ if (!bstr.isEmpty())
+ {
+ com::Utf8Str strTftpRoot(com::Utf8StrFmt("%ls%c%s",
+ bstr.raw(), RTPATH_DELIMITER, "TFTP"));
+ char *pszStrTemp; // avoid const char ** vs char **
+ rc = RTStrUtf8ToCurrentCP(&pszStrTemp, strTftpRoot.c_str());
+ AssertRC(rc);
+ m_ProxyOptions.tftp_root = pszStrTemp;
+ }
+
+ m_ProxyOptions.nameservers = getHostNameservers();
+
+ /* end of COM initialization */
+
+ rc = g_pLwipNat->tryGoOnline();
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /* this starts LWIP thread */
+ vboxLwipCoreInitialize(VBoxNetLwipNAT::onLwipTcpIpInit, this);
+
+ LogFlowFuncLeaveRC(rc);
+ return rc;
+}
+
+
+const char **VBoxNetLwipNAT::getHostNameservers()
+{
+ if (m_host.isNull())
+ return NULL;
+
+ com::SafeArray<BSTR> aNameServers;
+ HRESULT hrc = m_host->COMGETTER(NameServers)(ComSafeArrayAsOutParam(aNameServers));
+ if (FAILED(hrc))
+ return NULL;
+
+ const size_t cNameServers = aNameServers.size();
+ if (cNameServers == 0)
+ return NULL;
+
+ const char **ppcszNameServers =
+ (const char **)RTMemAllocZ(sizeof(char *) * (cNameServers + 1));
+ if (ppcszNameServers == NULL)
+ return NULL;
+
+ size_t idxLast = 0;
+ for (size_t i = 0; i < cNameServers; ++i)
+ {
+ com::Utf8Str strNameServer(aNameServers[i]);
+ ppcszNameServers[idxLast] = RTStrDup(strNameServer.c_str());
+ if (ppcszNameServers[idxLast] != NULL)
+ ++idxLast;
+ }
+
+ if (idxLast == 0)
+ {
+ RTMemFree(ppcszNameServers);
+ return NULL;
+ }
+
+ return ppcszNameServers;
+}
+
+
+int VBoxNetLwipNAT::parseOpt(int rc, const RTGETOPTUNION& Val)
+{
+ switch (rc)
+ {
+ case 'p':
+ case 'P':
+ {
+ NATSEVICEPORTFORWARDRULE Rule;
+ VECNATSERVICEPF& rules = (rc == 'P'?
+ m_vecPortForwardRule6
+ : m_vecPortForwardRule4);
+
+ fDontLoadRulesOnStartup = true;
+
+ RT_ZERO(Rule);
+
+ int rc2 = netPfStrToPf(Val.psz, (rc == 'P'), &Rule.Pfr);
+ RT_NOREF_PV(rc2);
+ rules.push_back(Rule);
+ return VINF_SUCCESS;
+ }
+ default:;
+ }
+ return VERR_NOT_FOUND;
+}
+
+
+int VBoxNetLwipNAT::processFrame(void *pvFrame, size_t cbFrame)
+{
+ AssertPtrReturn(pvFrame, VERR_INVALID_PARAMETER);
+ AssertReturn(cbFrame != 0, VERR_INVALID_PARAMETER);
+
+ struct pbuf *p = pbuf_alloc(PBUF_RAW, (u16_t)cbFrame + ETH_PAD_SIZE, PBUF_POOL);
+ if (RT_UNLIKELY(p == NULL))
+ return VERR_NO_MEMORY;
+
+ /*
+ * The code below is inlined version of:
+ *
+ * pbuf_header(p, -ETH_PAD_SIZE); // hide padding
+ * pbuf_take(p, pvFrame, cbFrame);
+ * pbuf_header(p, ETH_PAD_SIZE); // reveal padding
+ */
+ struct pbuf *q = p;
+ uint8_t *pu8Chunk = (uint8_t *)pvFrame;
+ do {
+ uint8_t *payload = (uint8_t *)q->payload;
+ size_t len = q->len;
+
+#if ETH_PAD_SIZE
+ if (RT_LIKELY(q == p)) // single pbuf is large enough
+ {
+ payload += ETH_PAD_SIZE;
+ len -= ETH_PAD_SIZE;
+ }
+#endif
+ memcpy(payload, pu8Chunk, len);
+ pu8Chunk += len;
+ q = q->next;
+ } while (RT_UNLIKELY(q != NULL));
+
+ m_LwipNetIf.input(p, &m_LwipNetIf);
+ return VINF_SUCCESS;
+}
+
+
+int VBoxNetLwipNAT::processGSO(PCPDMNETWORKGSO pGso, size_t cbFrame)
+{
+ if (!PDMNetGsoIsValid(pGso, cbFrame, cbFrame - sizeof(PDMNETWORKGSO)))
+ return VERR_INVALID_PARAMETER;
+
+ cbFrame -= sizeof(PDMNETWORKGSO);
+ uint8_t abHdrScratch[256];
+ uint32_t const cSegs = PDMNetGsoCalcSegmentCount(pGso,
+ cbFrame);
+ for (uint32_t iSeg = 0; iSeg < cSegs; iSeg++)
+ {
+ uint32_t cbSegFrame;
+ void *pvSegFrame = PDMNetGsoCarveSegmentQD(pGso,
+ (uint8_t *)(pGso + 1),
+ cbFrame,
+ abHdrScratch,
+ iSeg,
+ cSegs,
+ &cbSegFrame);
+
+ int rc = processFrame(pvSegFrame, cbSegFrame);
+ if (RT_FAILURE(rc))
+ {
+ return rc;
+ }
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+int VBoxNetLwipNAT::run()
+{
+ /* Father starts receiving thread and enter event loop. */
+ VBoxNetBaseService::run();
+
+ vboxLwipCoreFinalize(VBoxNetLwipNAT::onLwipTcpIpFini, this);
+
+ m_vecPortForwardRule4.clear();
+ m_vecPortForwardRule6.clear();
+
+ destroyNatListener(m_NatListener, virtualbox);
+ destroyNatListener(m_VBoxListener, virtualbox);
+ destroyClientListener(m_VBoxClientListener, virtualboxClient);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Entry point.
+ */
+extern "C" DECLEXPORT(int) TrustedMain(int argc, char **argv, char **envp)
+{
+ int rc;
+
+ LogFlowFuncEnter();
+
+ NOREF(envp);
+
+#ifdef RT_OS_WINDOWS
+ WSADATA wsaData;
+ int err;
+
+ err = WSAStartup(MAKEWORD(2,2), &wsaData);
+ if (err)
+ {
+ fprintf(stderr, "wsastartup: failed (%d)\n", err);
+ return 1;
+ }
+#endif
+
+ SOCKET icmpsock4 = INVALID_SOCKET;
+ SOCKET icmpsock6 = INVALID_SOCKET;
+#ifndef RT_OS_DARWIN
+ const int icmpstype = SOCK_RAW;
+#else
+ /* on OS X it's not privileged */
+ const int icmpstype = SOCK_DGRAM;
+#endif
+
+ icmpsock4 = socket(AF_INET, icmpstype, IPPROTO_ICMP);
+ if (icmpsock4 == INVALID_SOCKET)
+ {
+ perror("IPPROTO_ICMP");
+#ifdef VBOX_RAWSOCK_DEBUG_HELPER
+ icmpsock4 = getrawsock(AF_INET);
+#endif
+ }
+
+ if (icmpsock4 != INVALID_SOCKET)
+ {
+#ifdef ICMP_FILTER // Linux specific
+ struct icmp_filter flt = {
+ ~(uint32_t)(
+ (1U << ICMP_ECHOREPLY)
+ | (1U << ICMP_DEST_UNREACH)
+ | (1U << ICMP_TIME_EXCEEDED)
+ )
+ };
+
+ int status = setsockopt(icmpsock4, SOL_RAW, ICMP_FILTER,
+ &flt, sizeof(flt));
+ if (status < 0)
+ {
+ perror("ICMP_FILTER");
+ }
+#endif
+ }
+
+ icmpsock6 = socket(AF_INET6, icmpstype, IPPROTO_ICMPV6);
+ if (icmpsock6 == INVALID_SOCKET)
+ {
+ perror("IPPROTO_ICMPV6");
+#ifdef VBOX_RAWSOCK_DEBUG_HELPER
+ icmpsock6 = getrawsock(AF_INET6);
+#endif
+ }
+
+ if (icmpsock6 != INVALID_SOCKET)
+ {
+#ifdef ICMP6_FILTER // Windows doesn't support RFC 3542 API
+ /*
+ * XXX: We do this here for now, not in pxping.c, to avoid
+ * name clashes between lwIP and system headers.
+ */
+ struct icmp6_filter flt;
+ ICMP6_FILTER_SETBLOCKALL(&flt);
+
+ ICMP6_FILTER_SETPASS(ICMP6_ECHO_REPLY, &flt);
+
+ ICMP6_FILTER_SETPASS(ICMP6_DST_UNREACH, &flt);
+ ICMP6_FILTER_SETPASS(ICMP6_PACKET_TOO_BIG, &flt);
+ ICMP6_FILTER_SETPASS(ICMP6_TIME_EXCEEDED, &flt);
+ ICMP6_FILTER_SETPASS(ICMP6_PARAM_PROB, &flt);
+
+ int status = setsockopt(icmpsock6, IPPROTO_ICMPV6, ICMP6_FILTER,
+ &flt, sizeof(flt));
+ if (status < 0)
+ {
+ perror("ICMP6_FILTER");
+ }
+#endif
+ }
+
+ HRESULT hrc = com::Initialize();
+ if (FAILED(hrc))
+ {
+#ifdef VBOX_WITH_XPCOM
+ if (hrc == NS_ERROR_FILE_ACCESS_DENIED)
+ {
+ char szHome[RTPATH_MAX] = "";
+ int vrc = com::GetVBoxUserHomeDirectory(szHome, sizeof(szHome), false);
+ if (RT_SUCCESS(vrc))
+ {
+ closesocket(icmpsock4);
+ closesocket(icmpsock6);
+ return RTMsgErrorExit(RTEXITCODE_FAILURE,
+ "Failed to initialize COM: %s: %Rhrf",
+ szHome, hrc);
+ }
+ }
+#endif // VBOX_WITH_XPCOM
+ closesocket(icmpsock4);
+ closesocket(icmpsock6);
+ return RTMsgErrorExit(RTEXITCODE_FAILURE,
+ "Failed to initialize COM: %Rhrf", hrc);
+ }
+
+ rc = vboxNetNATLogInit(argc, argv);
+ // shall we bail if we failed to init logging?
+
+ g_pLwipNat = new VBoxNetLwipNAT(icmpsock4, icmpsock6);
+
+ Log2(("NAT: initialization\n"));
+ rc = g_pLwipNat->parseArgs(argc - 1, argv + 1);
+ rc = (rc == 0) ? VINF_SUCCESS : VERR_GENERAL_FAILURE; /* XXX: FIXME */
+
+ if (RT_SUCCESS(rc))
+ rc = g_pLwipNat->init();
+
+ if (RT_SUCCESS(rc))
+ g_pLwipNat->run();
+
+ delete g_pLwipNat;
+ return 0;
+}
+
+
+static int vboxNetNATLogInit(int argc, char **argv)
+{
+ size_t cch;
+ int rc;
+
+ char szHome[RTPATH_MAX];
+ rc = com::GetVBoxUserHomeDirectory(szHome, sizeof(szHome), false);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ const char *pcszNetwork = NULL;
+
+ // XXX: This duplicates information from VBoxNetBaseService.cpp.
+ // Perhaps option definitions should be exported as public static
+ // member of VBoxNetBaseService?
+ static const RTGETOPTDEF s_aOptions[] = {
+ { "--network", 'n', RTGETOPT_REQ_STRING }
+ };
+
+ RTGETOPTSTATE GetState;
+ RTGetOptInit(&GetState, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1,
+ RTGETOPTINIT_FLAGS_NO_STD_OPTS);
+
+ RTGETOPTUNION ValueUnion;
+ int ch;
+ while ((ch = RTGetOpt(&GetState, &ValueUnion)))
+ {
+ if (ch == 'n')
+ {
+ pcszNetwork = ValueUnion.psz;
+ break;
+ }
+ }
+
+ if (pcszNetwork == NULL)
+ return VERR_MISSING;
+
+ char szNetwork[RTPATH_MAX];
+ rc = RTStrCopy(szNetwork, sizeof(szNetwork), pcszNetwork);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ // sanitize network name to be usable as a path component
+ for (char *p = szNetwork; *p != '\0'; ++p)
+ {
+ if (RTPATH_IS_SEP(*p))
+ *p = '_';
+ }
+
+ char szLogFile[RTPATH_MAX];
+ cch = RTStrPrintf(szLogFile, sizeof(szLogFile),
+ "%s%c%s.log", szHome, RTPATH_DELIMITER, szNetwork);
+ if (cch >= sizeof(szLogFile))
+ {
+ return VERR_BUFFER_OVERFLOW;
+ }
+
+ // sanitize network name some more to be usable as environment variable
+ for (char *p = szNetwork; *p != '\0'; ++p)
+ {
+ if (*p != '_'
+ && (*p < '0' || '9' < *p)
+ && (*p < 'a' || 'z' < *p)
+ && (*p < 'A' || 'Z' < *p))
+ {
+ *p = '_';
+ }
+ }
+
+ char szEnvVarBase[128];
+ cch = RTStrPrintf(szEnvVarBase, sizeof(szEnvVarBase),
+ "VBOXNET_%s_RELEASE_LOG", szNetwork);
+ if (cch >= sizeof(szEnvVarBase))
+ return VERR_BUFFER_OVERFLOW;
+
+ rc = com::VBoxLogRelCreate("NAT Network",
+ szLogFile,
+ RTLOGFLAGS_PREFIX_TIME_PROG,
+ "all all.restrict -default.restrict",
+ szEnvVarBase,
+ RTLOGDEST_FILE,
+ 32768 /* cMaxEntriesPerGroup */,
+ 0 /* cHistory */,
+ 0 /* uHistoryFileTime */,
+ 0 /* uHistoryFileSize */,
+ NULL /*pErrInfo*/);
+
+ /*
+ * Provide immediate feedback if corresponding LogRel level is
+ * enabled. It's frustrating when you chase some rare event and
+ * discover you didn't actually have the corresponding log level
+ * enabled because of a typo in the environment variable name or
+ * its content.
+ */
+#define LOG_PING(_log) _log((#_log " enabled\n"))
+ LOG_PING(LogRel2);
+ LOG_PING(LogRel3);
+ LOG_PING(LogRel4);
+ LOG_PING(LogRel5);
+ LOG_PING(LogRel6);
+ LOG_PING(LogRel7);
+ LOG_PING(LogRel8);
+ LOG_PING(LogRel9);
+ LOG_PING(LogRel10);
+ LOG_PING(LogRel11);
+ LOG_PING(LogRel12);
+
+ return rc;
+}
+
+
+static int fetchNatPortForwardRules(const ComNatPtr& nat, bool fIsIPv6, VECNATSERVICEPF& vec)
+{
+ HRESULT hrc;
+ com::SafeArray<BSTR> rules;
+ if (fIsIPv6)
+ hrc = nat->COMGETTER(PortForwardRules6)(ComSafeArrayAsOutParam(rules));
+ else
+ hrc = nat->COMGETTER(PortForwardRules4)(ComSafeArrayAsOutParam(rules));
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+
+ NATSEVICEPORTFORWARDRULE Rule;
+ for (size_t idxRules = 0; idxRules < rules.size(); ++idxRules)
+ {
+ Log(("%d-%s rule: %ls\n", idxRules, (fIsIPv6 ? "IPv6" : "IPv4"), rules[idxRules]));
+ RT_ZERO(Rule);
+
+ int rc = netPfStrToPf(com::Utf8Str(rules[idxRules]).c_str(), fIsIPv6,
+ &Rule.Pfr);
+ if (RT_FAILURE(rc))
+ continue;
+
+ vec.push_back(Rule);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+#ifndef VBOX_WITH_HARDENING
+
+int main(int argc, char **argv, char **envp)
+{
+ int rc = RTR3InitExe(argc, &argv, RTR3INIT_FLAGS_SUPLIB);
+ if (RT_FAILURE(rc))
+ return RTMsgInitFailure(rc);
+
+ return TrustedMain(argc, argv, envp);
+}
+
+# if defined(RT_OS_WINDOWS)
+
+# if 0 /* Some copy and paste from DHCP that nobody explained why was diabled. */
+static LRESULT CALLBACK WindowProc(HWND hwnd,
+ UINT uMsg,
+ WPARAM wParam,
+ LPARAM lParam
+)
+{
+ if(uMsg == WM_DESTROY)
+ {
+ PostQuitMessage(0);
+ return 0;
+ }
+ return DefWindowProc (hwnd, uMsg, wParam, lParam);
+}
+
+static LPCWSTR g_WndClassName = L"VBoxNetNatLwipClass";
+
+static DWORD WINAPI MsgThreadProc(__in LPVOID lpParameter)
+{
+ HWND hwnd = 0;
+ HINSTANCE hInstance = (HINSTANCE)GetModuleHandle (NULL);
+ bool bExit = false;
+
+ /* Register the Window Class. */
+ WNDCLASS wc;
+ wc.style = 0;
+ wc.lpfnWndProc = WindowProc;
+ wc.cbClsExtra = 0;
+ wc.cbWndExtra = sizeof(void *);
+ wc.hInstance = hInstance;
+ wc.hIcon = NULL;
+ wc.hCursor = NULL;
+ wc.hbrBackground = (HBRUSH)(COLOR_BACKGROUND + 1);
+ wc.lpszMenuName = NULL;
+ wc.lpszClassName = g_WndClassName;
+
+ ATOM atomWindowClass = RegisterClass(&wc);
+
+ if (atomWindowClass != 0)
+ {
+ /* Create the window. */
+ hwnd = CreateWindowEx(WS_EX_TOOLWINDOW | WS_EX_TRANSPARENT | WS_EX_TOPMOST,
+ g_WndClassName, g_WndClassName, WS_POPUPWINDOW,
+ -200, -200, 100, 100, NULL, NULL, hInstance, NULL);
+
+ if (hwnd)
+ {
+ SetWindowPos(hwnd, HWND_TOPMOST, -200, -200, 0, 0,
+ SWP_NOACTIVATE | SWP_HIDEWINDOW | SWP_NOCOPYBITS | SWP_NOREDRAW | SWP_NOSIZE);
+
+ MSG msg;
+ while (GetMessage(&msg, NULL, 0, 0))
+ {
+ TranslateMessage(&msg);
+ DispatchMessage(&msg);
+ }
+
+ DestroyWindow (hwnd);
+
+ bExit = true;
+ }
+
+ UnregisterClass (g_WndClassName, hInstance);
+ }
+
+ if(bExit)
+ {
+ /* no need any accuracy here, in anyway the DHCP server usually gets terminated with TerminateProcess */
+ exit(0);
+ }
+
+ return 0;
+}
+# endif
+
+
+/** (We don't want a console usually.) */
+int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nCmdShow)
+{
+ RT_NOREF(hInstance, hPrevInstance, lpCmdLine, nCmdShow);
+# if 0 /* some copy and paste from DHCP that nobody explained why was diabled. */
+ NOREF(hInstance); NOREF(hPrevInstance); NOREF(lpCmdLine); NOREF(nCmdShow);
+
+ HANDLE hThread = CreateThread(
+ NULL, /*__in_opt LPSECURITY_ATTRIBUTES lpThreadAttributes, */
+ 0, /*__in SIZE_T dwStackSize, */
+ MsgThreadProc, /*__in LPTHREAD_START_ROUTINE lpStartAddress,*/
+ NULL, /*__in_opt LPVOID lpParameter,*/
+ 0, /*__in DWORD dwCreationFlags,*/
+ NULL /*__out_opt LPDWORD lpThreadId*/
+ );
+
+ if(hThread != NULL)
+ CloseHandle(hThread);
+
+# endif
+ return main(__argc, __argv, environ);
+}
+# endif /* RT_OS_WINDOWS */
+
+#endif /* !VBOX_WITH_HARDENING */
diff --git a/src/VBox/NetworkServices/NAT/VBoxNetNAT.rc b/src/VBox/NetworkServices/NAT/VBoxNetNAT.rc
new file mode 100644
index 00000000..cdcd2bd7
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/VBoxNetNAT.rc
@@ -0,0 +1,55 @@
+/* $Id: VBoxNetNAT.rc $ */
+/** @file
+ * VBoxNetNAT - Resource file containing version info.
+ */
+
+/*
+ * Copyright (C) 2015-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include <windows.h>
+#include <VBox/version.h>
+
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION VBOX_RC_FILE_VERSION
+ PRODUCTVERSION VBOX_RC_FILE_VERSION
+ FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
+ FILEFLAGS VBOX_RC_FILE_FLAGS
+ FILEOS VBOX_RC_FILE_OS
+ FILETYPE VBOX_RC_TYPE_DLL
+ FILESUBTYPE VFT2_UNKNOWN
+BEGIN
+ BLOCK "StringFileInfo"
+ BEGIN
+ BLOCK "040904b0" // Lang=US English, CharSet=Unicode
+ BEGIN
+ VALUE "FileDescription", "VirtualBox NAT Engine\0"
+ VALUE "InternalName", "VBoxNetNAT\0"
+ VALUE "OriginalFilename", "VBoxNetNAT.dll\0"
+ VALUE "CompanyName", VBOX_RC_COMPANY_NAME
+ VALUE "FileVersion", VBOX_RC_FILE_VERSION_STR
+ VALUE "LegalCopyright", VBOX_RC_LEGAL_COPYRIGHT
+ VALUE "ProductName", VBOX_RC_PRODUCT_NAME_STR
+ VALUE "ProductVersion", VBOX_RC_PRODUCT_VERSION_STR
+ VBOX_RC_MORE_STRINGS
+ END
+ END
+ BLOCK "VarFileInfo"
+ BEGIN
+ VALUE "Translation", 0x409, 1200
+ END
+END
+
+/* Creates the application icon. */
+#include "VBoxNetLwipNAT-icon.rc"
+
diff --git a/src/VBox/NetworkServices/NAT/VBoxNetNATHardened.cpp b/src/VBox/NetworkServices/NAT/VBoxNetNATHardened.cpp
new file mode 100644
index 00000000..a451c5a5
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/VBoxNetNATHardened.cpp
@@ -0,0 +1,27 @@
+/* $Id: VBoxNetNATHardened.cpp $ */
+/** @file
+ * VBoxNetNAT - Hardened main().
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include <VBox/sup.h>
+
+#ifndef SERVICE_NAME
+# error "Please define SERVICE_NAME"
+#endif
+
+int main(int argc, char **argv, char **envp)
+{
+ return SUPR3HardenedMain(SERVICE_NAME, 0 /* fFlags */, argc, argv, envp);
+}
diff --git a/src/VBox/NetworkServices/NAT/dhcp6.h b/src/VBox/NetworkServices/NAT/dhcp6.h
new file mode 100644
index 00000000..8f7dc92e
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/dhcp6.h
@@ -0,0 +1,51 @@
+/* $Id: dhcp6.h $ */
+/** @file
+ * NAT Network - DHCPv6 protocol definitions.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NAT_dhcp6_h
+#define VBOX_INCLUDED_SRC_NAT_dhcp6_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+/* UDP ports */
+#define DHCP6_CLIENT_PORT 546
+#define DHCP6_SERVER_PORT 547
+
+/* Message types */
+#define DHCP6_REPLY 7
+#define DHCP6_INFORMATION_REQUEST 11
+#define DHCP6_RELAY_FORW 12
+#define DHCP6_RELAY_REPLY 13
+
+/* DUID types */
+#define DHCP6_DUID_LLT 1
+#define DHCP6_DUID_EN 2
+#define DHCP6_DUID_LL 3
+
+/* Hardware type for DUID-LLT and DUID-LL */
+#define ARES_HRD_ETHERNET 1 /* RFC 826*/
+
+/* Options */
+#define DHCP6_OPTION_CLIENTID 1
+#define DHCP6_OPTION_SERVERID 2
+#define DHCP6_OPTION_ORO 6
+#define DHCP6_OPTION_ELAPSED_TIME 8
+#define DHCP6_OPTION_STATUS_CODE 13
+#define DHCP6_OPTION_DNS_SERVERS 23 /* RFC 3646 */
+#define DHCP6_OPTION_DOMAIN_LIST 24 /* RFC 3646 */
+
+#endif /* !VBOX_INCLUDED_SRC_NAT_dhcp6_h */
diff --git a/src/VBox/NetworkServices/NAT/fwtcp.c b/src/VBox/NetworkServices/NAT/fwtcp.c
new file mode 100644
index 00000000..373a7d42
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/fwtcp.c
@@ -0,0 +1,316 @@
+/* $Id: fwtcp.c $ */
+/** @file
+ * NAT Network - TCP port-forwarding.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+#include "proxy.h"
+#include "proxy_pollmgr.h"
+#include "portfwd.h"
+#include "pxtcp.h"
+
+#ifndef RT_OS_WINDOWS
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <stdio.h>
+#include <poll.h>
+
+#include <err.h> /* BSD'ism */
+#else
+#include <stdio.h>
+#include "winpoll.h"
+#endif
+
+#include "lwip/opt.h"
+
+#include "lwip/sys.h"
+#include "lwip/tcpip.h"
+
+
+/**
+ */
+struct fwtcp {
+ /**
+ * Our poll manager handler.
+ */
+ struct pollmgr_handler pmhdl;
+
+ /**
+ * Forwarding specification.
+ */
+ struct fwspec fwspec;
+
+ /**
+ * Listening socket.
+ */
+ SOCKET sock;
+
+ /**
+ * Mailbox for new inbound connections.
+ *
+ * XXX: since we have single producer and single consumer we can
+ * use lockless ringbuf like for pxtcp.
+ */
+ sys_mbox_t connmbox;
+
+ struct tcpip_msg msg_connect;
+ struct tcpip_msg msg_delete;
+
+ /**
+ * Linked list entry.
+ */
+ struct fwtcp *next;
+};
+
+
+static struct fwtcp *fwtcp_create(struct fwspec *);
+
+/* poll manager callback for fwtcp listening socket */
+static int fwtcp_pmgr_listen(struct pollmgr_handler *, SOCKET, int);
+
+/* lwip thread callbacks called via proxy_lwip_post() */
+static void fwtcp_pcb_connect(void *);
+static void fwtcp_pcb_delete(void *);
+
+
+/**
+ * Linked list of active fwtcp forwarders.
+ */
+struct fwtcp *fwtcp_list = NULL;
+
+
+void
+fwtcp_init(void)
+{
+ return;
+}
+
+
+void
+fwtcp_add(struct fwspec *fwspec)
+{
+ struct fwtcp *fwtcp;
+
+ fwtcp = fwtcp_create(fwspec);
+ if (fwtcp == NULL) {
+ DPRINTF0(("%s: failed to add rule for TCP ...\n", __func__));
+ return;
+ }
+
+ DPRINTF0(("%s\n", __func__));
+ /* fwtcp_create has put fwtcp on the linked list */
+}
+
+
+void
+fwtcp_del(struct fwspec *fwspec)
+{
+ struct fwtcp *fwtcp;
+ struct fwtcp **pprev;
+
+ for (pprev = &fwtcp_list; (fwtcp = *pprev) != NULL; pprev = &fwtcp->next) {
+ if (fwspec_equal(&fwtcp->fwspec, fwspec)) {
+ *pprev = fwtcp->next;
+ fwtcp->next = NULL;
+ break;
+ }
+ }
+
+ if (fwtcp == NULL) {
+ DPRINTF0(("%s: not found\n", __func__));
+ return;
+ }
+
+ DPRINTF0(("%s\n", __func__));
+
+ pollmgr_del_slot(fwtcp->pmhdl.slot);
+ fwtcp->pmhdl.slot = -1;
+
+ closesocket(fwtcp->sock);
+ fwtcp->sock = INVALID_SOCKET;
+
+ /* let pending msg_connect be processed before we delete fwtcp */
+ proxy_lwip_post(&fwtcp->msg_delete);
+}
+
+
+struct fwtcp *
+fwtcp_create(struct fwspec *fwspec)
+{
+ struct fwtcp *fwtcp;
+ SOCKET lsock;
+ int status;
+ err_t error;
+
+ lsock = proxy_bound_socket(fwspec->sdom, fwspec->stype, &fwspec->src.sa);
+ if (lsock == INVALID_SOCKET) {
+ return NULL;
+ }
+
+ fwtcp = (struct fwtcp *)malloc(sizeof(*fwtcp));
+ if (fwtcp == NULL) {
+ closesocket(lsock);
+ return NULL;
+ }
+
+ fwtcp->pmhdl.callback = fwtcp_pmgr_listen;
+ fwtcp->pmhdl.data = (void *)fwtcp;
+ fwtcp->pmhdl.slot = -1;
+
+ fwtcp->sock = lsock;
+ fwtcp->fwspec = *fwspec; /* struct copy */
+
+ error = sys_mbox_new(&fwtcp->connmbox, 16);
+ if (error != ERR_OK) {
+ closesocket(lsock);
+ free(fwtcp);
+ return (NULL);
+ }
+
+#define CALLBACK_MSG(MSG, FUNC) \
+ do { \
+ fwtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \
+ fwtcp->MSG.sem = NULL; \
+ fwtcp->MSG.msg.cb.function = FUNC; \
+ fwtcp->MSG.msg.cb.ctx = (void *)fwtcp; \
+ } while (0)
+
+ CALLBACK_MSG(msg_connect, fwtcp_pcb_connect);
+ CALLBACK_MSG(msg_delete, fwtcp_pcb_delete);
+
+#undef CALLBACK_MSG
+
+ status = pollmgr_add(&fwtcp->pmhdl, fwtcp->sock, POLLIN);
+ if (status < 0) {
+ sys_mbox_free(&fwtcp->connmbox);
+ closesocket(lsock);
+ free(fwtcp);
+ return NULL;
+ }
+
+ fwtcp->next = fwtcp_list;
+ fwtcp_list = fwtcp;
+
+ return fwtcp;
+}
+
+
+int
+fwtcp_pmgr_listen(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct fwtcp *fwtcp;
+ struct sockaddr_storage ss;
+ socklen_t sslen;
+ struct pxtcp *pxtcp;
+ SOCKET newsock;
+ int status;
+ err_t error;
+
+ fwtcp = (struct fwtcp *)handler->data;
+ pxtcp = NULL;
+
+ LWIP_ASSERT1(fwtcp != NULL);
+ LWIP_ASSERT1(fd == fwtcp->sock);
+ LWIP_ASSERT1(revents == POLLIN);
+ LWIP_UNUSED_ARG(fd);
+ LWIP_UNUSED_ARG(revents);
+
+ LWIP_ASSERT1(sys_mbox_valid(&fwtcp->connmbox));
+
+ sslen = sizeof(ss);
+ newsock = accept(fwtcp->sock, (struct sockaddr *)&ss, &sslen);
+ if (newsock == INVALID_SOCKET) {
+ return POLLIN;
+ }
+
+
+ if (ss.ss_family == PF_INET) {
+ struct sockaddr_in *peer4 = (struct sockaddr_in *)&ss;
+ RT_NOREF(peer4);
+ DPRINTF(("<--- TCP %RTnaipv4:%d\n",
+ peer4->sin_addr.s_addr, ntohs(peer4->sin_port)));
+ }
+ else { /* PF_INET6 */
+ struct sockaddr_in6 *peer6 = (struct sockaddr_in6 *)&ss;
+ RT_NOREF(peer6);
+ DPRINTF(("<--- TCP %RTnaipv6:%d\n",
+ &peer6->sin6_addr, ntohs(peer6->sin6_port)));
+ }
+
+ pxtcp = pxtcp_create_forwarded(newsock);
+ if (pxtcp == NULL) {
+ proxy_reset_socket(newsock);
+ return POLLIN;
+ }
+
+ status = pxtcp_pmgr_add(pxtcp);
+ if (status < 0) {
+ pxtcp_cancel_forwarded(pxtcp);
+ return POLLIN;
+ }
+
+ error = sys_mbox_trypost(&fwtcp->connmbox, (void *)pxtcp);
+ if (error != ERR_OK) {
+ pxtcp_pmgr_del(pxtcp);
+ pxtcp_cancel_forwarded(pxtcp);
+ return POLLIN;
+ }
+
+ proxy_lwip_post(&fwtcp->msg_connect);
+ return POLLIN;
+}
+
+
+void
+fwtcp_pcb_connect(void *arg)
+{
+ struct fwtcp *fwtcp = (struct fwtcp *)arg;
+ struct pxtcp *pxtcp;
+ u32_t timo;
+
+ if (!sys_mbox_valid(&fwtcp->connmbox)) {
+ return;
+ }
+
+ pxtcp = NULL;
+ timo = sys_mbox_tryfetch(&fwtcp->connmbox, (void **)&pxtcp);
+ if (timo == SYS_MBOX_EMPTY) {
+ return;
+ }
+
+ LWIP_ASSERT1(pxtcp != NULL);
+
+ /* hand off to pxtcp */
+ pxtcp_pcb_connect(pxtcp, &fwtcp->fwspec);
+}
+
+
+static void
+fwtcp_pcb_delete(void *arg)
+{
+ struct fwtcp *fwtcp = (struct fwtcp *)arg;
+ void *data;
+ u32_t timo;
+
+ timo = sys_mbox_tryfetch(&fwtcp->connmbox, &data);
+ LWIP_ASSERT1(timo == SYS_MBOX_EMPTY);
+ LWIP_UNUSED_ARG(timo); /* only in assert */
+
+ sys_mbox_free(&fwtcp->connmbox);
+ free(fwtcp);
+}
diff --git a/src/VBox/NetworkServices/NAT/fwudp.c b/src/VBox/NetworkServices/NAT/fwudp.c
new file mode 100644
index 00000000..c0e56927
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/fwudp.c
@@ -0,0 +1,545 @@
+/* $Id: fwudp.c $ */
+/** @file
+ * NAT Network - UDP port-forwarding.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+#include "proxy.h"
+#include "proxy_pollmgr.h"
+#include "portfwd.h"
+#include "pxremap.h"
+
+#ifndef RT_OS_WINDOWS
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <stdio.h>
+#include <string.h>
+#include <poll.h>
+
+#include <err.h> /* BSD'ism */
+#else
+#include <stdio.h>
+#include <string.h>
+#include "winpoll.h"
+#endif
+
+#include "lwip/opt.h"
+#include "lwip/memp.h" /* XXX: for bulk delete of pcbs */
+
+#include "lwip/sys.h"
+#include "lwip/tcpip.h"
+#include "lwip/udp.h"
+
+struct fwudp_dgram {
+ struct pbuf *p;
+ ipX_addr_t src_addr;
+ u16_t src_port;
+};
+
+/**
+ * UDP port-forwarding.
+ *
+ * Unlike pxudp that uses 1:1 mapping between pcb and socket, for
+ * port-forwarded UDP the setup is bit more elaborated.
+ *
+ * For fwtcp things are simple since incoming TCP connection get a new
+ * socket that we just hand off to pxtcp. Thus fwtcp only handles
+ * connection initiation.
+ *
+ * For fwudp all proxied UDP conversations share the same socket, so
+ * single fwudp multiplexes to several UDP pcbs.
+ *
+ * XXX: TODO: Currently pcbs point back directly to fwudp. It might
+ * make sense to introduce a per-pcb structure that points to fwudp
+ * and carries additional information, like pre-mapped peer address.
+ */
+struct fwudp {
+ /**
+ * Our poll manager handler.
+ */
+ struct pollmgr_handler pmhdl;
+
+ /**
+ * Forwarding specification.
+ */
+ struct fwspec fwspec;
+
+ /**
+ * XXX: lwip-format copy of destination
+ */
+ ipX_addr_t dst_addr;
+ u16_t dst_port;
+
+ /**
+ * Listening socket.
+ */
+ SOCKET sock;
+
+ /**
+ * Ring-buffer for inbound datagrams.
+ */
+ struct {
+ struct fwudp_dgram *buf;
+ size_t bufsize;
+ volatile size_t vacant;
+ volatile size_t unsent;
+ } inbuf;
+
+ struct tcpip_msg msg_send;
+ struct tcpip_msg msg_delete;
+
+ struct fwudp *next;
+};
+
+
+struct fwudp *fwudp_create(struct fwspec *);
+
+/* poll manager callback for fwudp socket */
+static int fwudp_pmgr_pump(struct pollmgr_handler *, SOCKET, int);
+
+/* lwip thread callbacks called via proxy_lwip_post() */
+static void fwudp_pcb_send(void *);
+static void fwudp_pcb_delete(void *);
+
+static void fwudp_pcb_recv(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t);
+static void fwudp_pcb_forward_outbound(struct fwudp *, struct udp_pcb *, struct pbuf *);
+
+
+/**
+ * Linked list of active fwtcp forwarders.
+ */
+struct fwudp *fwudp_list = NULL;
+
+
+void
+fwudp_init(void)
+{
+ return;
+}
+
+
+void
+fwudp_add(struct fwspec *fwspec)
+{
+ struct fwudp *fwudp;
+
+ fwudp = fwudp_create(fwspec);
+ if (fwudp == NULL) {
+ DPRINTF0(("%s: failed to add rule for UDP ...\n", __func__));
+ return;
+ }
+
+ DPRINTF0(("%s\n", __func__));
+ /* fwudp_create has put fwudp on the linked list */
+}
+
+
+void
+fwudp_del(struct fwspec *fwspec)
+{
+ struct fwudp *fwudp;
+ struct fwudp **pprev;
+
+ for (pprev = &fwudp_list; (fwudp = *pprev) != NULL; pprev = &fwudp->next) {
+ if (fwspec_equal(&fwudp->fwspec, fwspec)) {
+ *pprev = fwudp->next;
+ fwudp->next = NULL;
+ break;
+ }
+ }
+
+ if (fwudp == NULL) {
+ DPRINTF0(("%s: not found\n", __func__));
+ return;
+ }
+
+ DPRINTF0(("%s\n", __func__));
+
+ pollmgr_del_slot(fwudp->pmhdl.slot);
+ fwudp->pmhdl.slot = -1;
+
+ /* let pending msg_send be processed before we delete fwudp */
+ proxy_lwip_post(&fwudp->msg_delete);
+}
+
+
+struct fwudp *
+fwudp_create(struct fwspec *fwspec)
+{
+ struct fwudp *fwudp;
+ SOCKET sock;
+ int status;
+
+ sock = proxy_bound_socket(fwspec->sdom, fwspec->stype, &fwspec->src.sa);
+ if (sock == INVALID_SOCKET) {
+ return NULL;
+ }
+
+ fwudp = (struct fwudp *)malloc(sizeof(*fwudp));
+ if (fwudp == NULL) {
+ closesocket(sock);
+ return NULL;
+ }
+
+ fwudp->pmhdl.callback = fwudp_pmgr_pump;
+ fwudp->pmhdl.data = (void *)fwudp;
+ fwudp->pmhdl.slot = -1;
+
+ fwudp->sock = sock;
+ fwudp->fwspec = *fwspec; /* struct copy */
+
+ /* XXX */
+ if (fwspec->sdom == PF_INET) {
+ struct sockaddr_in *dst4 = &fwspec->dst.sin;
+ memcpy(&fwudp->dst_addr.ip4, &dst4->sin_addr, sizeof(ip_addr_t));
+ fwudp->dst_port = htons(dst4->sin_port);
+ }
+ else { /* PF_INET6 */
+ struct sockaddr_in6 *dst6 = &fwspec->dst.sin6;
+ memcpy(&fwudp->dst_addr.ip6, &dst6->sin6_addr, sizeof(ip6_addr_t));
+ fwudp->dst_port = htons(dst6->sin6_port);
+ }
+
+ fwudp->inbuf.bufsize = 256; /* elements */
+ fwudp->inbuf.buf
+ = (struct fwudp_dgram *)calloc(fwudp->inbuf.bufsize,
+ sizeof(struct fwudp_dgram));
+ if (fwudp->inbuf.buf == NULL) {
+ closesocket(sock);
+ free(fwudp);
+ return (NULL);
+ }
+ fwudp->inbuf.vacant = 0;
+ fwudp->inbuf.unsent = 0;
+
+#define CALLBACK_MSG(MSG, FUNC) \
+ do { \
+ fwudp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \
+ fwudp->MSG.sem = NULL; \
+ fwudp->MSG.msg.cb.function = FUNC; \
+ fwudp->MSG.msg.cb.ctx = (void *)fwudp; \
+ } while (0)
+
+ CALLBACK_MSG(msg_send, fwudp_pcb_send);
+ CALLBACK_MSG(msg_delete, fwudp_pcb_delete);
+
+#undef CALLBACK_MSG
+
+ status = pollmgr_add(&fwudp->pmhdl, fwudp->sock, POLLIN);
+ if (status < 0) {
+ closesocket(sock);
+ free(fwudp->inbuf.buf);
+ free(fwudp);
+ return NULL;
+ }
+
+ fwudp->next = fwudp_list;
+ fwudp_list = fwudp;
+
+ return fwudp;
+}
+
+
+/**
+ * Poll manager callaback for fwudp::sock
+ */
+int
+fwudp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct fwudp *fwudp;
+ struct sockaddr_storage ss;
+ socklen_t sslen = sizeof(ss);
+ size_t beg, lim;
+ struct fwudp_dgram *dgram;
+ struct pbuf *p;
+ ssize_t nread;
+ int status;
+ err_t error;
+
+ fwudp = (struct fwudp *)handler->data;
+
+ LWIP_ASSERT1(fwudp != NULL);
+ LWIP_ASSERT1(fd == fwudp->sock);
+ LWIP_ASSERT1(revents == POLLIN);
+ LWIP_UNUSED_ARG(fd);
+ LWIP_UNUSED_ARG(revents);
+
+#ifdef RT_OS_WINDOWS
+ nread = recvfrom(fwudp->sock, (char *)pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0,
+ (struct sockaddr *)&ss, &sslen);
+#else
+ nread = recvfrom(fwudp->sock, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0,
+ (struct sockaddr *)&ss, &sslen);
+#endif
+ if (nread < 0) {
+ DPRINTF(("%s: %R[sockerr]\n", __func__, SOCKERRNO()));
+ return POLLIN;
+ }
+
+ /* Check that ring buffer is not full */
+ lim = fwudp->inbuf.unsent;
+ if (lim == 0) {
+ lim = fwudp->inbuf.bufsize - 1; /* guard slot at the end */
+ }
+ else {
+ --lim;
+ }
+
+ beg = fwudp->inbuf.vacant;
+ if (beg == lim) { /* no vacant slot */
+ return POLLIN;
+ }
+
+
+ dgram = &fwudp->inbuf.buf[beg];
+
+
+ status = fwany_ipX_addr_set_src(&dgram->src_addr, (struct sockaddr *)&ss);
+ if (status == PXREMAP_FAILED) {
+ return POLLIN;
+ }
+
+ if (ss.ss_family == AF_INET) {
+ const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss;
+ dgram->src_port = htons(peer4->sin_port);
+ }
+ else { /* PF_INET6 */
+ const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss;
+ dgram->src_port = htons(peer6->sin6_port);
+ }
+
+ p = pbuf_alloc(PBUF_RAW, nread, PBUF_RAM);
+ if (p == NULL) {
+ DPRINTF(("%s: pbuf_alloc(%d) failed\n", __func__, (int)nread));
+ return POLLIN;
+ }
+
+ error = pbuf_take(p, pollmgr_udpbuf, nread);
+ if (error != ERR_OK) {
+ DPRINTF(("%s: pbuf_take(%d) failed\n", __func__, (int)nread));
+ pbuf_free(p);
+ return POLLIN;
+ }
+
+ dgram->p = p;
+
+ ++beg;
+ if (beg == fwudp->inbuf.bufsize) {
+ beg = 0;
+ }
+ fwudp->inbuf.vacant = beg;
+
+ proxy_lwip_post(&fwudp->msg_send);
+
+ return POLLIN;
+}
+
+
+/**
+ * Lwip thread callback invoked via fwudp::msg_send
+ */
+void
+fwudp_pcb_send(void *arg)
+{
+ struct fwudp *fwudp = (struct fwudp *)arg;
+ struct fwudp_dgram dgram;
+ struct udp_pcb *pcb;
+ struct udp_pcb **pprev;
+ int isv6;
+ size_t idx;
+
+ idx = fwudp->inbuf.unsent;
+
+ if (idx == fwudp->inbuf.vacant) {
+ /* empty buffer - shouldn't happen! */
+ DPRINTF(("%s: ring buffer empty!\n", __func__));
+ return;
+ }
+
+ dgram = fwudp->inbuf.buf[idx]; /* struct copy */
+#if 1 /* valgrind hint */
+ fwudp->inbuf.buf[idx].p = NULL;
+#endif
+ if (++idx == fwudp->inbuf.bufsize) {
+ idx = 0;
+ }
+ fwudp->inbuf.unsent = idx;
+
+ /* XXX: this is *STUPID* */
+ isv6 = (fwudp->fwspec.sdom == PF_INET6);
+ pprev = &udp_proxy_pcbs;
+ for (pcb = udp_proxy_pcbs; pcb != NULL; pcb = pcb->next) {
+ if (PCB_ISIPV6(pcb) == isv6
+ && pcb->remote_port == fwudp->dst_port
+ && ipX_addr_cmp(isv6, &fwudp->dst_addr, &pcb->remote_ip)
+ && pcb->local_port == dgram.src_port
+ && ipX_addr_cmp(isv6, &dgram.src_addr, &pcb->local_ip))
+ {
+ break;
+ }
+ else {
+ pprev = &pcb->next;
+ }
+ }
+
+ if (pcb != NULL) {
+ *pprev = pcb->next;
+ pcb->next = udp_proxy_pcbs;
+ udp_proxy_pcbs = pcb;
+
+ /*
+ * XXX: check that its ours and not accidentally created by
+ * outbound traffic.
+ *
+ * ???: Otherwise? Expire it and set pcb = NULL; to create a
+ * new one below?
+ */
+ }
+
+ if (pcb == NULL) {
+ pcb = udp_new();
+ if (pcb == NULL) {
+ goto out;
+ }
+
+ ip_set_v6(pcb, isv6);
+
+ /* equivalent of udp_bind */
+ ipX_addr_set(isv6, &pcb->local_ip, &dgram.src_addr);
+ pcb->local_port = dgram.src_port;
+
+ /* equivalent to udp_connect */
+ ipX_addr_set(isv6, &pcb->remote_ip, &fwudp->dst_addr);
+ pcb->remote_port = fwudp->dst_port;
+ pcb->flags |= UDP_FLAGS_CONNECTED;
+
+ udp_recv(pcb, fwudp_pcb_recv, fwudp);
+
+ pcb->next = udp_proxy_pcbs;
+ udp_proxy_pcbs = pcb;
+ udp_proxy_timer_needed();
+ }
+
+ udp_send(pcb, dgram.p);
+
+ out:
+ pbuf_free(dgram.p);
+}
+
+
+/**
+ * udp_recv() callback.
+ */
+void
+fwudp_pcb_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p,
+ ip_addr_t *addr, u16_t port)
+{
+ struct fwudp *fwudp = (struct fwudp *)arg;
+
+ LWIP_UNUSED_ARG(addr);
+ LWIP_UNUSED_ARG(port);
+
+ LWIP_ASSERT1(fwudp != NULL);
+
+ if (p == NULL) {
+ DPRINTF(("%s: pcb %p (fwudp %p); sock %d: expired\n",
+ __func__, (void *)pcb, (void *)fwudp, fwudp->sock));
+ /* NB: fwudp is "global" and not deleted */
+ /* XXX: TODO: delete local reference when we will keep one */
+ udp_remove(pcb);
+ return;
+ }
+ else {
+ fwudp_pcb_forward_outbound(fwudp, pcb, p);
+ }
+}
+
+
+/*
+ * XXX: This is pxudp_pcb_forward_outbound modulo:
+ * - s/pxudp/fwudp/g
+ * - addr/port (unused in either) dropped
+ * - destination is specified since host socket is not connected
+ */
+static void
+fwudp_pcb_forward_outbound(struct fwudp *fwudp, struct udp_pcb *pcb,
+ struct pbuf *p)
+{
+ union {
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+ } peer;
+ socklen_t namelen;
+
+ memset(&peer, 0, sizeof(peer)); /* XXX: shut up valgrind */
+
+ if (fwudp->fwspec.sdom == PF_INET) {
+ peer.sin.sin_family = AF_INET;
+#if HAVE_SA_LEN
+ peer.sin.sin_len =
+#endif
+ namelen = sizeof(peer.sin);
+ pxremap_outbound_ip4((ip_addr_t *)&peer.sin.sin_addr, &pcb->local_ip.ip4);
+ peer.sin.sin_port = htons(pcb->local_port);
+ }
+ else {
+ peer.sin6.sin6_family = AF_INET6;
+#if HAVE_SA_LEN
+ peer.sin6.sin6_len =
+#endif
+ namelen = sizeof(peer.sin6);
+
+ pxremap_outbound_ip6((ip6_addr_t *)&peer.sin6.sin6_addr, &pcb->local_ip.ip6);
+ peer.sin6.sin6_port = htons(pcb->local_port);
+ }
+
+ proxy_sendto(fwudp->sock, p, &peer, namelen);
+ pbuf_free(p);
+}
+
+
+/**
+ * Lwip thread callback invoked via fwudp::msg_delete
+ */
+static void
+fwudp_pcb_delete(void *arg)
+{
+ struct fwudp *fwudp = (struct fwudp *)arg;
+ struct udp_pcb *pcb;
+ struct udp_pcb **pprev;
+
+ LWIP_ASSERT1(fwudp->inbuf.unsent == fwudp->inbuf.vacant);
+
+ pprev = &udp_proxy_pcbs;
+ pcb = udp_proxy_pcbs;
+ while (pcb != NULL) {
+ if (pcb->recv_arg != fwudp) {
+ pprev = &pcb->next;
+ pcb = pcb->next;
+ }
+ else {
+ struct udp_pcb *dead = pcb;
+ pcb = pcb->next;
+ *pprev = pcb;
+ memp_free(MEMP_UDP_PCB, dead);
+ }
+ }
+
+ closesocket(fwudp->sock);
+ free(fwudp->inbuf.buf);
+ free(fwudp);
+}
diff --git a/src/VBox/NetworkServices/NAT/getrawsock.c b/src/VBox/NetworkServices/NAT/getrawsock.c
new file mode 100644
index 00000000..a882f67a
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/getrawsock.c
@@ -0,0 +1,155 @@
+/* $Id: getrawsock.c $ */
+/** @file
+ * Obtain raw-sockets from a server when debugging unprivileged.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <netinet/in.h>
+#include <errno.h>
+#include <pwd.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+/* XXX: this should be in a header, but isn't. naughty me. :( */
+int getrawsock(int type);
+
+
+int
+getrawsock(int type)
+{
+ struct sockaddr_un sux; /* because solaris */
+ struct passwd *pw;
+ size_t pathlen;
+ int rawsock, server;
+ struct msghdr mh;
+ struct iovec iov[1];
+ char buf[1];
+ struct cmsghdr *cmh;
+ char cmsg[CMSG_SPACE(sizeof(int))];
+ ssize_t nread, nsent;
+ int status;
+
+ server = -1;
+ rawsock = -1;
+
+ memset(&sux, 0, sizeof(sux));
+ sux.sun_family = AF_UNIX;
+
+ if (geteuid() == 0) {
+ return -1;
+ }
+
+ if (type == AF_INET) {
+ buf[0] = '4';
+ }
+ else if (type == AF_INET6) {
+ buf[0] = '6';
+ }
+ else {
+ return -1;
+ }
+
+ errno = 0;
+ pw = getpwuid(getuid());
+ if (pw == NULL) {
+ perror("getpwuid");
+ return -1;
+ }
+
+ pathlen = snprintf(sux.sun_path, sizeof(sux.sun_path),
+ "/tmp/.vbox-%s-aux/mkrawsock", pw->pw_name);
+ if (pathlen > sizeof(sux.sun_path)) {
+ fprintf(stderr, "socket pathname truncated\n");
+ return -1;
+ }
+
+ server = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (server < 0) {
+ perror("socket");
+ return -1;
+ }
+
+ status = connect(server, (struct sockaddr *)&sux,
+ (sizeof(sux) - sizeof(sux.sun_path)
+ + strlen(sux.sun_path) + 1));
+ if (status < 0) {
+ perror(sux.sun_path);
+ goto out;
+ }
+
+ nsent = send(server, buf, 1, 0);
+ if (nsent != 1) {
+ if (nsent < 0) {
+ perror("send");
+ }
+ else {
+ fprintf(stderr, "failed to contact mkrawsock\n");
+ }
+ goto out;
+ }
+
+ buf[0] = '\0';
+
+ iov[0].iov_base = buf;
+ iov[0].iov_len = 1;
+
+ memset(&mh, 0, sizeof(mh));
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+ mh.msg_control = cmsg;
+ mh.msg_controllen = sizeof(cmsg);
+
+ nread = recvmsg(server, &mh, 0);
+ if (nread != 1) {
+ if (nread < 0) {
+ perror("recvmsg");
+ }
+ else {
+ fprintf(stderr, "EOF from mkrawsock\n");
+ }
+ goto out;
+ }
+
+ if ((type == AF_INET && buf[0] != '4')
+ || (type == AF_INET6 && buf[0] != '6')
+ || mh.msg_controllen == 0)
+ {
+ goto out;
+ }
+
+ for (cmh = CMSG_FIRSTHDR(&mh); cmh != NULL; cmh = CMSG_NXTHDR(&mh, cmh)) {
+ if ((cmh->cmsg_level == SOL_SOCKET)
+ && (cmh->cmsg_type == SCM_RIGHTS)
+ && (cmh->cmsg_len == CMSG_LEN(sizeof(rawsock))))
+ {
+ rawsock = *((int *)CMSG_DATA(cmh));
+ break;
+ }
+ }
+
+ out:
+ if (server != -1) {
+ close(server);
+ }
+ if (rawsock != -1) {
+ printf("%s: got ICMPv%c socket %d\n",
+ __func__, type == AF_INET ? '4' : '6', rawsock);
+ }
+ return rawsock;
+}
diff --git a/src/VBox/NetworkServices/NAT/lwipopts.h b/src/VBox/NetworkServices/NAT/lwipopts.h
new file mode 100644
index 00000000..51c8b916
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/lwipopts.h
@@ -0,0 +1,205 @@
+/* $Id: lwipopts.h $ */
+/** @file
+ * NAT Network - lwIP configuration options.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NAT_lwipopts_h
+#define VBOX_INCLUDED_SRC_NAT_lwipopts_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <VBox/cdefs.h> /* For VBOX_STRICT. */
+#include <iprt/mem.h>
+#include <iprt/alloca.h> /* This may include malloc.h (msc), which is something that has
+ * to be done before redefining any of the functions therein. */
+#include <iprt/rand.h> /* see LWIP_RAND() definition */
+
+/* lwip/sockets.h assumes that if FD_SET is defined (in case of Innotek GCC
+ * its definition is dragged through iprt/types.h) then struct timeval is
+ * defined as well, but it's not the case. So include it manually. */
+#ifdef RT_OS_OS2
+# include <sys/time.h>
+#endif
+
+/** Make lwIP use the libc malloc, or more precisely (see below) the IPRT
+ * memory allocation functions. */
+#define MEM_LIBC_MALLOC 1
+
+/** Set proper memory alignment. */
+#if HC_ARCH_BITS == 64
+# define MEM_ALIGNMENT 8
+#else
+#define MEM_ALIGNMENT 4
+#endif
+
+/* Padding before Ethernet header to make IP header aligned */
+#define ETH_PAD_SIZE 2
+
+/* IP */
+#define IP_REASSEMBLY 1
+#define IP_REASS_MAX_PBUFS 128
+
+
+
+/** Increase maximum TCP window size. */
+#define TCP_WND 32768
+
+/** Increase TCP maximum segment size. */
+#define TCP_MSS 1460
+
+/** Enable queueing of out-of-order segments. */
+#define TCP_QUEUE_OOSEQ 1
+
+/** TCP sender buffer space (bytes). */
+#define TCP_SND_BUF (32 * TCP_MSS)
+
+/* TCP sender buffer space (pbufs). This must be at least = 2 *
+ TCP_SND_BUF/TCP_MSS for things to work. */
+#define TCP_SND_QUEUELEN 128
+
+/* MEMP_NUM_PBUF: the number of memp struct pbufs. If the application
+ sends a lot of data out of ROM (or other static memory), this
+ should be set high.
+
+ NB: This is for PBUF_ROM and PBUF_REF pbufs only!
+
+ Number of PBUF_POOL pbufs is controlled by PBUF_POOL_SIZE that,
+ somewhat confusingly, breaks MEMP_NUM_* pattern.
+
+ PBUF_RAM pbufs are allocated with mem_malloc (with MEM_LIBC_MALLOC
+ set to 1 this is just system malloc), not memp_malloc. */
+#define MEMP_NUM_PBUF (1024 * 4)
+
+
+/* MEMP_NUM_MLD6_GROUP: Maximum number of IPv6 multicast groups that
+ can be joined.
+
+ We need to be able to join solicited node multicast for each
+ address (potentially different) and two groups for DHCP6. All
+ routers multicast is hardcoded in ip6.c and does not require
+ explicit joining. Provide also for a few extra groups just in
+ case. */
+#define MEMP_NUM_MLD6_GROUP (LWIP_IPV6_NUM_ADDRESSES + /* dhcp6 */ 2 + /* extra */ 8)
+
+
+/* MEMP_NUM_TCP_SEG: the number of simultaneously queued TCP
+ segments. */
+#define MEMP_NUM_TCP_SEG (MEMP_NUM_TCP_PCB * TCP_SND_QUEUELEN / 2)
+
+/* MEMP_NUM_TCP_PCB: the number of simulatenously active TCP
+ connections. */
+#define MEMP_NUM_TCP_PCB 128
+
+/* MEMP_NUM_TCPIP_MSG_*: the number of struct tcpip_msg, which is used
+ for sequential API communication and incoming packets. Used in
+ src/api/tcpip.c. */
+#define MEMP_NUM_TCPIP_MSG_API 128
+#define MEMP_NUM_TCPIP_MSG_INPKT 1024
+
+/* MEMP_NUM_UDP_PCB: the number of UDP protocol control blocks. One
+ per active UDP "connection". */
+#define MEMP_NUM_UDP_PCB 32
+
+/* Pbuf options */
+/* PBUF_POOL_SIZE: the number of buffers in the pbuf pool.
+ This is only for PBUF_POOL pbufs, primarily used by netif drivers.
+
+ This should have been named with the MEMP_NUM_ prefix (cf.
+ MEMP_NUM_PBUF for PBUF_ROM and PBUF_REF) as it controls the size of
+ yet another memp_malloc() pool. */
+#define PBUF_POOL_SIZE (1024 * 4)
+
+/* PBUF_POOL_BUFSIZE: the size of each pbuf in the pbuf pool.
+ Use default that is based on TCP_MSS and PBUF_LINK_HLEN. */
+#undef PBUF_POOL_BUFSIZE
+
+/** Turn on support for lightweight critical region protection. Leaving this
+ * off uses synchronization code in pbuf.c which is totally polluted with
+ * races. All the other lwip source files would fall back to semaphore-based
+ * synchronization, but pbuf.c is just broken, leading to incorrect allocation
+ * and as a result to assertions due to buffers being double freed. */
+#define SYS_LIGHTWEIGHT_PROT 1
+
+/** Attempt to get rid of htons etc. macro issues. */
+#undef LWIP_PREFIX_BYTEORDER_FUNCS
+
+#define LWIP_TCPIP_CORE_LOCKING_INPUT 0
+#define LWIP_TCPIP_CORE_LOCKING 0
+#define LWIP_TCP 1
+#define LWIP_SOCKET 0
+#define LWIP_ARP 1
+#define ARP_PROXY 1
+#define LWIP_ETHERNET 1
+#define LWIP_COMPAT_SOCKETS 0
+#define LWIP_COMPAT_MUTEX 1
+
+#define LWIP_IPV6 1
+#define LWIP_IPV6_FORWARD 1
+#define LWIP_ND6_PROXY 1
+
+#define LWIP_ND6_ALLOW_RA_UPDATES (!LWIP_IPV6_FORWARD)
+#define LWIP_IPV6_SEND_ROUTER_SOLICIT (!LWIP_IPV6_FORWARD)
+/* IPv6 autoconfig we don't need in proxy, but it required for very seldom cases
+ * iSCSI over intnet with IPv6
+ */
+#define LWIP_IPV6_AUTOCONFIG 1
+#if LWIP_IPV6_FORWARD /* otherwise use the default from lwip/opt.h */
+#define LWIP_IPV6_DUP_DETECT_ATTEMPTS 0
+#endif
+
+#define LWIP_IPV6_FRAG 1
+
+/**
+ * aka Slirp mode.
+ */
+#define LWIP_CONNECTION_PROXY 1
+#define IP_FORWARD 1
+
+/* MEMP_NUM_SYS_TIMEOUT: the number of simultaneously active
+ timeouts. */
+#define MEMP_NUM_SYS_TIMEOUT 256
+
+
+/* this is required for IPv6 and IGMP needs */
+#define LWIP_RAND() RTRandU32()
+
+/* Debugging stuff. */
+#ifdef DEBUG
+# define LWIP_DEBUG
+# include "lwip-log.h"
+
+# define LWIP_PROXY_DEBUG LWIP_DBG_OFF
+#endif /* DEBUG */
+
+/* printf formatter definitions */
+#define U16_F "hu"
+#define S16_F "hd"
+#define X16_F "hx"
+#define U32_F "u"
+#define S32_F "d"
+#define X32_F "x"
+
+/* Redirect libc memory alloc functions to IPRT. */
+#define malloc(x) RTMemAlloc(x)
+#define realloc(x,y) RTMemRealloc((x), (y))
+#define free(x) RTMemFree(x)
+
+/* Align VBOX_STRICT and LWIP_NOASSERT. */
+#ifndef VBOX_STRICT
+# define LWIP_NOASSERT 1
+#endif
+
+#endif /* !VBOX_INCLUDED_SRC_NAT_lwipopts_h */
diff --git a/src/VBox/NetworkServices/NAT/mkrawsock.c b/src/VBox/NetworkServices/NAT/mkrawsock.c
new file mode 100644
index 00000000..ced265c2
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/mkrawsock.c
@@ -0,0 +1,339 @@
+/* $Id: mkrawsock.c $ */
+/** @file
+ * Auxiliary server to create raw-sockets when debugging unprivileged.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifdef __linux__
+#define _GNU_SOURCE
+#endif
+
+#ifdef __sun__
+#if __STDC_VERSION__ - 0 >= 199901L
+#define _XOPEN_SOURCE 600
+#else
+#define _XOPEN_SOURCE 500
+#endif
+#define __EXTENSIONS__ 1
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <netinet/in.h>
+#ifdef __linux__
+#include <linux/icmp.h> /* for ICMP_FILTER */
+#endif
+#include <errno.h>
+#include <fcntl.h>
+#include <pwd.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+
+static void handler(int sig);
+static void serve(int s);
+static int mkrawsock(int family);
+
+volatile sig_atomic_t signaled = 0;
+
+int
+main(int argc, char **argv)
+{
+ struct sigaction sa;
+ struct sockaddr_un sux; /* because solaris */
+ struct passwd *pw;
+ size_t pathlen;
+ char *slash;
+ int s, client;
+ int status;
+
+ memset(&sux, 0, sizeof(sux));
+ sux.sun_family = AF_UNIX;
+
+ if (getuid() == 0) {
+ if (argc != 2) {
+ fprintf(stderr, "username required when run as root\n");
+ return EXIT_FAILURE;
+ }
+
+ errno = 0;
+ pw = getpwnam(argv[1]);
+ if (pw == NULL) {
+ perror("getpwnam");
+ return EXIT_FAILURE;
+ }
+ if (pw->pw_uid == 0) {
+ fprintf(stderr, "%s is superuser\n", pw->pw_name);
+ return EXIT_FAILURE;
+ }
+ }
+ else {
+ errno = 0;
+ pw = getpwuid(getuid());
+ if (pw == NULL) {
+ perror("getpwuid");
+ return EXIT_FAILURE;
+ }
+ }
+
+ pathlen = snprintf(sux.sun_path, sizeof(sux.sun_path),
+ "/tmp/.vbox-%s-aux/mkrawsock", pw->pw_name);
+ if (pathlen > sizeof(sux.sun_path)) {
+ fprintf(stderr, "socket pathname truncated\n");
+ return EXIT_FAILURE;
+ }
+
+ slash = strrchr(sux.sun_path, '/');
+ if (slash == NULL) {
+ fprintf(stderr, "%s: no directory separator\n", sux.sun_path);
+ return EXIT_FAILURE;
+ }
+
+ *slash = '\0';
+
+ status = mkdir(sux.sun_path, 0700);
+ if (status == 0) {
+ status = chown(sux.sun_path, pw->pw_uid, pw->pw_gid);
+ if (status < 0) {
+ perror("chown");
+ return EXIT_FAILURE;
+ }
+ }
+ else if (errno != EEXIST) {
+ perror("mkdir");
+ return EXIT_FAILURE;
+ }
+ else {
+ int dirfd;
+ struct stat st;
+
+ dirfd = open(sux.sun_path, O_RDONLY, O_DIRECTORY);
+ if (dirfd < 0) {
+ perror(sux.sun_path);
+ return EXIT_FAILURE;
+ }
+
+ status = fstat(dirfd, &st);
+ close(dirfd);
+
+ if (status < 0) {
+ perror(sux.sun_path);
+ return EXIT_FAILURE;
+ }
+
+ if (st.st_uid != pw->pw_uid) {
+ fprintf(stderr, "%s: exists but not owned by %s\n",
+ sux.sun_path, pw->pw_name);
+ return EXIT_FAILURE;
+ }
+
+ if ((st.st_mode & 0777) != 0700) {
+ fprintf(stderr, "%s: bad mode %04o\n",
+ sux.sun_path, (unsigned int)(st.st_mode & 0777));
+ return EXIT_FAILURE;
+ }
+ }
+
+ *slash = '/';
+
+#if 0
+ status = unlink(sux.sun_path);
+ if (status < 0 && errno != ENOENT) {
+ perror("unlink");
+ }
+#endif
+
+ s = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (s < 0) {
+ perror("socket");
+ return EXIT_FAILURE;
+ }
+
+ status = bind(s, (struct sockaddr *)&sux,
+ (sizeof(sux) - sizeof(sux.sun_path)
+ + strlen(sux.sun_path) + 1));
+ if (status < 0) {
+ perror(sux.sun_path);
+ close(s);
+ return EXIT_FAILURE;
+ }
+
+ status = chown(sux.sun_path, pw->pw_uid, pw->pw_gid);
+ if (status < 0) {
+ perror("chown");
+ close(s);
+ return EXIT_FAILURE;
+ }
+
+ status = chmod(sux.sun_path, 0600);
+ if (status < 0) {
+ perror("chmod");
+ close(s);
+ return EXIT_FAILURE;
+ }
+
+ status = listen(s, 1);
+ if (status < 0) {
+ perror("listen");
+ close(s);
+ return EXIT_FAILURE;
+ }
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = handler;
+ sigemptyset(&sa.sa_mask);
+
+ sigaction(SIGINT, &sa, NULL);
+ sigaction(SIGTERM, &sa, NULL);
+
+ while (!signaled) {
+ client = accept(s, NULL, 0);
+ if (client < 0) {
+ perror("accept");
+ continue;
+ }
+
+ serve(client);
+ close(client);
+ }
+
+ close(s);
+ status = unlink(sux.sun_path);
+ if (status < 0) {
+ perror("unlink");
+ }
+
+ return EXIT_SUCCESS;
+}
+
+
+static void
+handler(int sig)
+{
+ signaled = 1;
+}
+
+
+static void
+serve(int client)
+{
+#ifdef SO_PEERCRED
+ struct ucred cr;
+ socklen_t crlen;
+#endif
+ ssize_t nread, nsent;
+ struct msghdr mh;
+ struct iovec iov[1];
+ char buf[1];
+ struct cmsghdr *cmh;
+ char cmsg[CMSG_SPACE(sizeof(int))];
+ int fd;
+ int status;
+
+#ifdef SO_PEERCRED
+ crlen = sizeof(cr);
+ status = getsockopt(client, SOL_SOCKET, SO_PEERCRED, &cr, &crlen);
+ if (status < 0) {
+ perror("SO_PEERCRED");
+ return;
+ }
+
+ fprintf(stderr, "request from pid %lu uid %lu ",
+ (unsigned long)cr.pid, (unsigned long)cr.uid);
+#endif
+
+ nread = read(client, buf, 1);
+ if (nread < 0) {
+ perror("recv");
+ return;
+ }
+
+ fd = -1;
+ switch (buf[0]) {
+
+ case '4':
+ fprintf(stderr, "for ICMPv4 socket\n");
+ fd = mkrawsock(PF_INET);
+ break;
+
+ case '6':
+ fprintf(stderr, "for ICMPv6 socket\n");
+ fd = mkrawsock(PF_INET6);
+ break;
+
+ default:
+ fprintf(stderr, "bad request 0x%02x\n", (unsigned int)buf[0]);
+ return;
+ }
+
+ if (fd < 0) {
+ buf[0] = '\0'; /* NAK */
+ nsent = write(client, buf, 1);
+ (void)nsent;
+ return;
+ }
+
+ memset(&mh, 0, sizeof(mh));
+ memset(cmsg, 0, sizeof(cmsg));
+
+ iov[0].iov_base = buf;
+ iov[0].iov_len = 1;
+
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+ mh.msg_control = cmsg;
+ mh.msg_controllen = sizeof(cmsg);
+
+ cmh = CMSG_FIRSTHDR(&mh);
+ cmh->cmsg_level = SOL_SOCKET;
+ cmh->cmsg_type = SCM_RIGHTS;
+ cmh->cmsg_len = CMSG_LEN(sizeof(fd));
+ *((int *) CMSG_DATA(cmh)) = fd;
+
+ nsent = sendmsg(client, &mh, 0);
+ if (nsent < 0) {
+ perror("sendmsg");
+ }
+
+ close(fd);
+}
+
+
+static int
+mkrawsock(int family)
+{
+ int fd;
+
+ if (family == PF_INET) {
+ fd = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP);
+ if (fd < 0) {
+ perror("IPPROTO_ICMP");
+ return -1;
+ }
+ }
+ else {
+ fd = socket(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6);
+ if (fd < 0) {
+ perror("IPPROTO_ICMPV6");
+ return -1;
+ }
+ }
+
+ return fd;
+}
diff --git a/src/VBox/NetworkServices/NAT/portfwd.c b/src/VBox/NetworkServices/NAT/portfwd.c
new file mode 100644
index 00000000..ab0d7374
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/portfwd.c
@@ -0,0 +1,263 @@
+/* $Id: portfwd.c $ */
+/** @file
+ * NAT Network - port-forwarding rules.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+#include "portfwd.h"
+
+#ifndef RT_OS_WINDOWS
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <poll.h>
+#else
+# include "winpoll.h"
+#endif
+#include <stdio.h>
+#include <string.h>
+
+#include "proxy.h"
+#include "proxy_pollmgr.h"
+#include "pxremap.h"
+
+#include "lwip/netif.h"
+
+
+struct portfwd_msg {
+ struct fwspec *fwspec;
+ int add;
+};
+
+
+static int portfwd_chan_send(struct portfwd_msg *);
+static int portfwd_rule_add_del(struct fwspec *, int);
+static int portfwd_pmgr_chan(struct pollmgr_handler *, SOCKET, int);
+
+
+static struct pollmgr_handler portfwd_pmgr_chan_hdl;
+
+
+void
+portfwd_init(void)
+{
+ portfwd_pmgr_chan_hdl.callback = portfwd_pmgr_chan;
+ portfwd_pmgr_chan_hdl.data = NULL;
+ portfwd_pmgr_chan_hdl.slot = -1;
+ pollmgr_add_chan(POLLMGR_CHAN_PORTFWD, &portfwd_pmgr_chan_hdl);
+
+ /* add preconfigured forwarders */
+ fwtcp_init();
+ fwudp_init();
+}
+
+
+static int
+portfwd_chan_send(struct portfwd_msg *msg)
+{
+ ssize_t nsent;
+
+ nsent = pollmgr_chan_send(POLLMGR_CHAN_PORTFWD, &msg, sizeof(msg));
+ if (nsent < 0) {
+ free(msg);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+static int
+portfwd_rule_add_del(struct fwspec *fwspec, int add)
+{
+ struct portfwd_msg *msg;
+
+ msg = (struct portfwd_msg *)malloc(sizeof(*msg));
+ if (msg == NULL) {
+ DPRINTF0(("%s: failed to allocate message\n", __func__));
+ return -1;
+ }
+
+ msg->fwspec = fwspec;
+ msg->add = add;
+
+ return portfwd_chan_send(msg);
+}
+
+
+int
+portfwd_rule_add(struct fwspec *fwspec)
+{
+ return portfwd_rule_add_del(fwspec, 1);
+}
+
+
+int
+portfwd_rule_del(struct fwspec *fwspec)
+{
+ return portfwd_rule_add_del(fwspec, 0);
+}
+
+
+/**
+ * POLLMGR_CHAN_PORTFWD handler.
+ */
+static int
+portfwd_pmgr_chan(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ void *ptr = pollmgr_chan_recv_ptr(handler, fd, revents);
+ struct portfwd_msg *msg = (struct portfwd_msg *)ptr;
+
+ if (msg->fwspec->stype == SOCK_STREAM) {
+ if (msg->add) {
+ fwtcp_add(msg->fwspec);
+ }
+ else {
+ fwtcp_del(msg->fwspec);
+ }
+ }
+ else { /* SOCK_DGRAM */
+ if (msg->add) {
+ fwudp_add(msg->fwspec);
+ }
+ else {
+ fwudp_del(msg->fwspec);
+ }
+ }
+
+ free(msg->fwspec);
+ free(msg);
+
+ return POLLIN;
+}
+
+
+int
+fwspec_set(struct fwspec *fwspec, int sdom, int stype,
+ const char *src_addr_str, uint16_t src_port,
+ const char *dst_addr_str, uint16_t dst_port)
+{
+ struct addrinfo hints;
+ struct addrinfo *ai;
+ int status;
+
+ LWIP_ASSERT1(sdom == PF_INET || sdom == PF_INET6);
+ LWIP_ASSERT1(stype == SOCK_STREAM || stype == SOCK_DGRAM);
+
+ fwspec->sdom = sdom;
+ fwspec->stype = stype;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = (sdom == PF_INET) ? AF_INET : AF_INET6;
+ hints.ai_socktype = stype;
+ hints.ai_flags = AI_NUMERICHOST;
+
+ status = getaddrinfo(src_addr_str, NULL, &hints, &ai);
+ if (status != 0) {
+ LogRel(("\"%s\": %s\n", src_addr_str, gai_strerror(status)));
+ return -1;
+ }
+ LWIP_ASSERT1(ai != NULL);
+ LWIP_ASSERT1(ai->ai_addrlen <= sizeof(fwspec->src));
+ memcpy(&fwspec->src, ai->ai_addr, ai->ai_addrlen);
+ freeaddrinfo(ai);
+ ai = NULL;
+
+ status = getaddrinfo(dst_addr_str, NULL, &hints, &ai);
+ if (status != 0) {
+ LogRel(("\"%s\": %s\n", dst_addr_str, gai_strerror(status)));
+ return -1;
+ }
+ LWIP_ASSERT1(ai != NULL);
+ LWIP_ASSERT1(ai->ai_addrlen <= sizeof(fwspec->dst));
+ memcpy(&fwspec->dst, ai->ai_addr, ai->ai_addrlen);
+ freeaddrinfo(ai);
+ ai = NULL;
+
+ if (sdom == PF_INET) {
+ fwspec->src.sin.sin_port = htons(src_port);
+ fwspec->dst.sin.sin_port = htons(dst_port);
+ }
+ else { /* PF_INET6 */
+ fwspec->src.sin6.sin6_port = htons(src_port);
+ fwspec->dst.sin6.sin6_port = htons(dst_port);
+ }
+
+ return 0;
+}
+
+
+int
+fwspec_equal(struct fwspec *a, struct fwspec *b)
+{
+ LWIP_ASSERT1(a != NULL);
+ LWIP_ASSERT1(b != NULL);
+
+ if (a->sdom != b->sdom || a->stype != b->stype) {
+ return 0;
+ }
+
+ if (a->sdom == PF_INET) {
+ return a->src.sin.sin_port == b->src.sin.sin_port
+ && a->dst.sin.sin_port == b->dst.sin.sin_port
+ && a->src.sin.sin_addr.s_addr == b->src.sin.sin_addr.s_addr
+ && a->dst.sin.sin_addr.s_addr == b->dst.sin.sin_addr.s_addr;
+ }
+ else { /* PF_INET6 */
+ return a->src.sin6.sin6_port == b->src.sin6.sin6_port
+ && a->dst.sin6.sin6_port == b->dst.sin6.sin6_port
+ && IN6_ARE_ADDR_EQUAL(&a->src.sin6.sin6_addr, &b->src.sin6.sin6_addr)
+ && IN6_ARE_ADDR_EQUAL(&a->dst.sin6.sin6_addr, &b->dst.sin6.sin6_addr);
+ }
+}
+
+
+/**
+ * Set fwdsrc to the IP address of the peer.
+ *
+ * For port-forwarded connections originating from hosts loopback the
+ * source address is set to the address of one of lwIP interfaces.
+ *
+ * Currently we only have one interface so there's not much logic
+ * here. In the future we might need to additionally consult fwspec
+ * and routing table to determine which netif is used for connections
+ * to the specified guest.
+ */
+int
+fwany_ipX_addr_set_src(ipX_addr_t *fwdsrc, const struct sockaddr *peer)
+{
+ int mapping;
+
+ if (peer->sa_family == AF_INET) {
+ const struct sockaddr_in *peer4 = (const struct sockaddr_in *)peer;
+ ip_addr_t peerip4;
+
+ peerip4.addr = peer4->sin_addr.s_addr;
+ mapping = pxremap_inbound_ip4(&fwdsrc->ip4, &peerip4);
+ }
+ else if (peer->sa_family == AF_INET6) {
+ const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)peer;
+ ip6_addr_t peerip6;
+
+ memcpy(&peerip6, &peer6->sin6_addr, sizeof(ip6_addr_t));
+ mapping = pxremap_inbound_ip6(&fwdsrc->ip6, &peerip6);
+ }
+ else {
+ mapping = PXREMAP_FAILED;
+ }
+
+ return mapping;
+}
diff --git a/src/VBox/NetworkServices/NAT/portfwd.h b/src/VBox/NetworkServices/NAT/portfwd.h
new file mode 100644
index 00000000..ce8d25d7
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/portfwd.h
@@ -0,0 +1,74 @@
+/* $Id: portfwd.h $ */
+/** @file
+ * NAT Network - port-forwarding rules, definitions and declarations.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NAT_portfwd_h
+#define VBOX_INCLUDED_SRC_NAT_portfwd_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#ifndef RT_OS_WINDOWS
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#endif
+
+#include "lwip/ip_addr.h"
+
+
+struct fwspec {
+ int sdom; /* PF_INET, PF_INET6 */
+ int stype; /* SOCK_STREAM, SOCK_DGRAM */
+
+ /* listen on */
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in sin; /* sdom == PF_INET */
+ struct sockaddr_in6 sin6; /* sdom == PF_INET6 */
+ } src;
+
+ /* forward to */
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in sin; /* sdom == PF_INET */
+ struct sockaddr_in6 sin6; /* sdom == PF_INET6 */
+ } dst;
+};
+
+
+void portfwd_init(void);
+int portfwd_rule_add(struct fwspec *);
+int portfwd_rule_del(struct fwspec *);
+
+
+int fwspec_set(struct fwspec *, int, int,
+ const char *, uint16_t,
+ const char *, uint16_t);
+
+int fwspec_equal(struct fwspec *, struct fwspec *);
+
+void fwtcp_init(void);
+void fwudp_init(void);
+
+void fwtcp_add(struct fwspec *);
+void fwtcp_del(struct fwspec *);
+void fwudp_add(struct fwspec *);
+void fwudp_del(struct fwspec *);
+
+int fwany_ipX_addr_set_src(ipX_addr_t *, const struct sockaddr *);
+
+#endif /* !VBOX_INCLUDED_SRC_NAT_portfwd_h */
diff --git a/src/VBox/NetworkServices/NAT/proxy.c b/src/VBox/NetworkServices/NAT/proxy.c
new file mode 100644
index 00000000..1fd24ef5
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/proxy.c
@@ -0,0 +1,705 @@
+/* $Id: proxy.c $ */
+/** @file
+ * NAT Network - proxy setup and utilities.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+
+#include "proxy.h"
+#include "proxy_pollmgr.h"
+#include "portfwd.h"
+
+#include "lwip/opt.h"
+
+#include "lwip/sys.h"
+#include "lwip/tcpip.h"
+
+#ifndef RT_OS_WINDOWS
+#include <sys/poll.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <iprt/string.h>
+#include <unistd.h>
+#include <err.h>
+#else
+# include <iprt/string.h>
+#endif
+
+#if defined(SOCK_NONBLOCK) && defined(RT_OS_NETBSD) /* XXX: PR kern/47569 */
+# undef SOCK_NONBLOCK
+#endif
+
+#ifndef __arraycount
+# define __arraycount(a) (sizeof(a)/sizeof(a[0]))
+#endif
+
+static FNRTSTRFORMATTYPE proxy_sockerr_rtstrfmt;
+
+static SOCKET proxy_create_socket(int, int);
+
+volatile struct proxy_options *g_proxy_options;
+static sys_thread_t pollmgr_tid;
+
+/* XXX: for mapping loopbacks to addresses in our network (ip4) */
+struct netif *g_proxy_netif;
+
+
+/*
+ * Called on the lwip thread (aka tcpip thread) from tcpip_init() via
+ * its "tcpip_init_done" callback. Raw API is ok to use here
+ * (e.g. rtadvd), but netconn API is not.
+ */
+void
+proxy_init(struct netif *proxy_netif, struct proxy_options *opts)
+{
+ int status;
+
+ LWIP_ASSERT1(opts != NULL);
+ LWIP_UNUSED_ARG(proxy_netif);
+
+ status = RTStrFormatTypeRegister("sockerr", proxy_sockerr_rtstrfmt, NULL);
+ AssertRC(status);
+
+ g_proxy_options = opts;
+ g_proxy_netif = proxy_netif;
+
+#if 1
+ proxy_rtadvd_start(proxy_netif);
+#endif
+
+ /*
+ * XXX: We use stateless DHCPv6 only to report IPv6 address(es) of
+ * nameserver(s). Since we don't yet support IPv6 addresses in
+ * HostDnsService, there's no point in running DHCPv6.
+ */
+#if 0
+ dhcp6ds_init(proxy_netif);
+#endif
+
+ if (opts->tftp_root != NULL) {
+ tftpd_init(proxy_netif, opts->tftp_root);
+ }
+
+ status = pollmgr_init();
+ if (status < 0) {
+ errx(EXIT_FAILURE, "failed to initialize poll manager");
+ /* NOTREACHED */
+ }
+
+ pxtcp_init();
+ pxudp_init();
+
+ portfwd_init();
+
+ pxdns_init(proxy_netif);
+
+ pxping_init(proxy_netif, opts->icmpsock4, opts->icmpsock6);
+
+ pollmgr_tid = sys_thread_new("pollmgr_thread",
+ pollmgr_thread, NULL,
+ DEFAULT_THREAD_STACKSIZE,
+ DEFAULT_THREAD_PRIO);
+ if (!pollmgr_tid) {
+ errx(EXIT_FAILURE, "failed to create poll manager thread");
+ /* NOTREACHED */
+ }
+}
+
+
+#if !defined(RT_OS_WINDOWS)
+/**
+ * Formatter for %R[sockerr] - unix strerror_r() version.
+ */
+static DECLCALLBACK(size_t)
+proxy_sockerr_rtstrfmt(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
+ const char *pszType, const void *pvValue,
+ int cchWidth, int cchPrecision, unsigned int fFlags,
+ void *pvUser)
+{
+ const int error = (int)(intptr_t)pvValue;
+
+ const char *msg;
+ char buf[128];
+
+ NOREF(cchWidth);
+ NOREF(cchPrecision);
+ NOREF(fFlags);
+ NOREF(pvUser);
+
+ AssertReturn(strcmp(pszType, "sockerr") == 0, 0);
+
+ /* make sure return type mismatch is caught */
+ buf[0] = '\0';
+#if defined(RT_OS_LINUX) && defined(_GNU_SOURCE)
+ msg = strerror_r(error, buf, sizeof(buf));
+#else
+ strerror_r(error, buf, sizeof(buf));
+ msg = buf;
+#endif
+ return RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, "%s", msg);
+}
+
+#else /* RT_OS_WINDOWS */
+
+/**
+ * Formatter for %R[sockerr] - windows FormatMessage() version.
+ */
+static DECLCALLBACK(size_t)
+proxy_sockerr_rtstrfmt(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
+ const char *pszType, const void *pvValue,
+ int cchWidth, int cchPrecision, unsigned int fFlags,
+ void *pvUser)
+{
+ const int error = (int)(intptr_t)pvValue;
+ size_t cb = 0;
+
+ NOREF(cchWidth);
+ NOREF(cchPrecision);
+ NOREF(fFlags);
+ NOREF(pvUser);
+
+ AssertReturn(strcmp(pszType, "sockerr") == 0, 0);
+
+ /*
+ * XXX: Windows strerror() doesn't handle posix error codes, but
+ * since winsock uses its own, it shouldn't be much of a problem.
+ * If you see a strange error message, it's probably from
+ * FormatMessage() for an error from <WinError.h> that has the
+ * same numeric value.
+ */
+ if (error < _sys_nerr) {
+ char buf[128] = "";
+ int status;
+
+ status = strerror_s(buf, sizeof(buf), error);
+ if (status == 0) {
+ if (strcmp(buf, "Unknown error") == 0) {
+ /* windows strerror() doesn't add the numeric value */
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
+ "Unknown error: %d", error);
+ }
+ else {
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
+ "%s", buf);
+ }
+ }
+ else {
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
+ "Unknown error: %d", error);
+ }
+ }
+ else {
+ DWORD nchars;
+ char *msg = NULL;
+
+ nchars = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM
+ | FORMAT_MESSAGE_ALLOCATE_BUFFER,
+ NULL, error, LANG_NEUTRAL,
+ (LPSTR)&msg, 0,
+ NULL);
+ if (nchars == 0 || msg == NULL) {
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
+ "Unknown error: %d", error);
+ }
+ else {
+ /* FormatMessage() "helpfully" adds newline; get rid of it */
+ char *crpos = strchr(msg, '\r');
+ if (crpos != NULL) {
+ *crpos = '\0';
+ }
+
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
+ "%s", msg);
+ }
+
+ if (msg != NULL) {
+ LocalFree(msg);
+ }
+ }
+
+ return cb;
+}
+#endif /* RT_OS_WINDOWS */
+
+
+/**
+ * Send static callback message from poll manager thread to lwip
+ * thread, scheduling a function call in lwip thread context.
+ *
+ * XXX: Existing lwip api only provides non-blocking version for this.
+ * It may fail when lwip thread is not running (mbox invalid) or if
+ * post failed (mbox full). How to handle these?
+ */
+void
+proxy_lwip_post(struct tcpip_msg *msg)
+{
+ struct tcpip_callback_msg *m;
+ err_t error;
+
+ LWIP_ASSERT1(msg != NULL);
+
+ /*
+ * lwip plays games with fake incomplete struct tag to enforce API
+ */
+ m = (struct tcpip_callback_msg *)msg;
+ error = tcpip_callbackmsg(m);
+
+ if (error == ERR_VAL) {
+ /* XXX: lwip thread is not running (mbox invalid) */
+ LWIP_ASSERT1(error != ERR_VAL);
+ }
+
+ LWIP_ASSERT1(error == ERR_OK);
+}
+
+
+/**
+ * Create a non-blocking socket. Disable SIGPIPE for TCP sockets if
+ * possible. On Linux it's not possible and should be disabled for
+ * each send(2) individually.
+ */
+static SOCKET
+proxy_create_socket(int sdom, int stype)
+{
+ SOCKET s;
+ int stype_and_flags;
+ int status;
+
+ LWIP_UNUSED_ARG(status); /* depends on ifdefs */
+
+
+ stype_and_flags = stype;
+
+#if defined(SOCK_NONBLOCK)
+ stype_and_flags |= SOCK_NONBLOCK;
+#endif
+
+ /*
+ * Disable SIGPIPE on disconnected socket. It might be easier to
+ * forgo it and just use MSG_NOSIGNAL on each send*(2), since we
+ * have to do it for Linux anyway, but Darwin does NOT have that
+ * flag (but has SO_NOSIGPIPE socket option).
+ */
+#if !defined(SOCK_NOSIGPIPE) && !defined(SO_NOSIGPIPE) && !defined(MSG_NOSIGNAL)
+#if 0 /* XXX: Solaris has neither, the program should ignore SIGPIPE globally */
+#error Need a way to disable SIGPIPE on connection oriented sockets!
+#endif
+#endif
+
+#if defined(SOCK_NOSIGPIPE)
+ if (stype == SOCK_STREAM) {
+ stype_and_flags |= SOCK_NOSIGPIPE;
+ }
+#endif
+
+ s = socket(sdom, stype_and_flags, 0);
+ if (s == INVALID_SOCKET) {
+ DPRINTF(("socket: %R[sockerr]\n", SOCKERRNO()));
+ return INVALID_SOCKET;
+ }
+
+#if defined(RT_OS_WINDOWS)
+ {
+ u_long mode = 1;
+ status = ioctlsocket(s, FIONBIO, &mode);
+ if (status == SOCKET_ERROR) {
+ DPRINTF(("FIONBIO: %R[sockerr]\n", SOCKERRNO()));
+ closesocket(s);
+ return INVALID_SOCKET;
+ }
+ }
+#elif !defined(SOCK_NONBLOCK)
+ {
+ int sflags;
+
+ sflags = fcntl(s, F_GETFL, 0);
+ if (sflags < 0) {
+ DPRINTF(("F_GETFL: %R[sockerr]\n", SOCKERRNO()));
+ closesocket(s);
+ return INVALID_SOCKET;
+ }
+
+ status = fcntl(s, F_SETFL, sflags | O_NONBLOCK);
+ if (status < 0) {
+ DPRINTF(("O_NONBLOCK: %R[sockerr]\n", SOCKERRNO()));
+ closesocket(s);
+ return INVALID_SOCKET;
+ }
+ }
+#endif
+
+#if !defined(SOCK_NOSIGPIPE) && defined(SO_NOSIGPIPE)
+ if (stype == SOCK_STREAM) {
+ int on = 1;
+ const socklen_t onlen = sizeof(on);
+
+ status = setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &on, onlen);
+ if (status < 0) {
+ DPRINTF(("SO_NOSIGPIPE: %R[sockerr]\n", SOCKERRNO()));
+ closesocket(s);
+ return INVALID_SOCKET;
+ }
+ }
+#endif
+
+ /*
+ * Disable the Nagle algorithm. Otherwise the host may hold back
+ * packets that the guest wants to go out, causing potentially
+ * horrible performance. The guest is already applying the Nagle
+ * algorithm (or not) the way it wants.
+ */
+ if (stype == SOCK_STREAM) {
+ int on = 1;
+ const socklen_t onlen = sizeof(on);
+
+ status = setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (char *)&on, onlen);
+ if (status < 0) {
+ DPRINTF(("TCP_NODELAY: %R[sockerr]\n", SOCKERRNO()));
+ }
+ }
+
+#if defined(RT_OS_WINDOWS)
+ /*
+ * lwIP only holds one packet of "refused data" for us. Proxy
+ * relies on OS socket send buffer and doesn't do its own
+ * buffering. Unfortunately on Windows send buffer is very small
+ * (8K by default) and is not dynamically adpated by the OS it
+ * seems. So a single large write will fill it up and that will
+ * make lwIP drop segments, causing guest TCP into pathologic
+ * resend patterns. As a quick and dirty fix just bump it up.
+ */
+ if (stype == SOCK_STREAM) {
+ int sndbuf;
+ socklen_t optlen = sizeof(sndbuf);
+
+ status = getsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, &optlen);
+ if (status == 0) {
+ if (sndbuf < 64 * 1024) {
+ sndbuf = 64 * 1024;
+ status = setsockopt(s, SOL_SOCKET, SO_SNDBUF,
+ (char *)&sndbuf, optlen);
+ if (status != 0) {
+ DPRINTF(("SO_SNDBUF: setsockopt: %R[sockerr]\n", SOCKERRNO()));
+ }
+ }
+ }
+ else {
+ DPRINTF(("SO_SNDBUF: getsockopt: %R[sockerr]\n", SOCKERRNO()));
+ }
+ }
+#endif
+
+ return s;
+}
+
+
+/**
+ * Create a socket for outbound connection to dst_addr:dst_port.
+ *
+ * The socket is non-blocking and TCP sockets has SIGPIPE disabled if
+ * possible. On Linux it's not possible and should be disabled for
+ * each send(2) individually.
+ */
+SOCKET
+proxy_connected_socket(int sdom, int stype,
+ ipX_addr_t *dst_addr, u16_t dst_port)
+{
+ struct sockaddr_in6 dst_sin6;
+ struct sockaddr_in dst_sin;
+ struct sockaddr *pdst_sa;
+ socklen_t dst_sa_len;
+ void *pdst_addr;
+ const struct sockaddr *psrc_sa;
+ socklen_t src_sa_len;
+ int status;
+ int sockerr;
+ SOCKET s;
+
+ LWIP_ASSERT1(sdom == PF_INET || sdom == PF_INET6);
+ LWIP_ASSERT1(stype == SOCK_STREAM || stype == SOCK_DGRAM);
+
+ DPRINTF(("---> %s ", stype == SOCK_STREAM ? "TCP" : "UDP"));
+ if (sdom == PF_INET6) {
+ pdst_sa = (struct sockaddr *)&dst_sin6;
+ pdst_addr = (void *)&dst_sin6.sin6_addr;
+
+ memset(&dst_sin6, 0, sizeof(dst_sin6));
+#if HAVE_SA_LEN
+ dst_sin6.sin6_len =
+#endif
+ dst_sa_len = sizeof(dst_sin6);
+ dst_sin6.sin6_family = AF_INET6;
+ memcpy(&dst_sin6.sin6_addr, &dst_addr->ip6, sizeof(ip6_addr_t));
+ dst_sin6.sin6_port = htons(dst_port);
+
+ DPRINTF(("[%RTnaipv6]:%d ", &dst_sin6.sin6_addr, dst_port));
+ }
+ else { /* sdom = PF_INET */
+ pdst_sa = (struct sockaddr *)&dst_sin;
+ pdst_addr = (void *)&dst_sin.sin_addr;
+
+ memset(&dst_sin, 0, sizeof(dst_sin));
+#if HAVE_SA_LEN
+ dst_sin.sin_len =
+#endif
+ dst_sa_len = sizeof(dst_sin);
+ dst_sin.sin_family = AF_INET;
+ dst_sin.sin_addr.s_addr = dst_addr->ip4.addr; /* byte-order? */
+ dst_sin.sin_port = htons(dst_port);
+
+ DPRINTF(("%RTnaipv4:%d ", dst_sin.sin_addr.s_addr, dst_port));
+ }
+
+ s = proxy_create_socket(sdom, stype);
+ if (s == INVALID_SOCKET) {
+ return INVALID_SOCKET;
+ }
+ DPRINTF(("socket %d\n", s));
+
+ /** @todo needs locking if dynamic modifyvm is allowed */
+ if (sdom == PF_INET6) {
+ psrc_sa = (const struct sockaddr *)g_proxy_options->src6;
+ src_sa_len = sizeof(struct sockaddr_in6);
+ }
+ else {
+ psrc_sa = (const struct sockaddr *)g_proxy_options->src4;
+ src_sa_len = sizeof(struct sockaddr_in);
+ }
+ if (psrc_sa != NULL) {
+ status = bind(s, psrc_sa, src_sa_len);
+ if (status == SOCKET_ERROR) {
+ sockerr = SOCKERRNO();
+ DPRINTF(("socket %d: bind: %R[sockerr]\n", s, sockerr));
+ closesocket(s);
+ SET_SOCKERRNO(sockerr);
+ return INVALID_SOCKET;
+ }
+ }
+
+ status = connect(s, pdst_sa, dst_sa_len);
+ if (status == SOCKET_ERROR
+#if !defined(RT_OS_WINDOWS)
+ && SOCKERRNO() != EINPROGRESS
+#else
+ && SOCKERRNO() != EWOULDBLOCK
+#endif
+ )
+ {
+ sockerr = SOCKERRNO();
+ DPRINTF(("socket %d: connect: %R[sockerr]\n", s, sockerr));
+ closesocket(s);
+ SET_SOCKERRNO(sockerr);
+ return INVALID_SOCKET;
+ }
+
+ return s;
+}
+
+
+/**
+ * Create a socket for inbound (port-forwarded) connections to
+ * src_addr (port is part of sockaddr, so not a separate argument).
+ *
+ * The socket is non-blocking and TCP sockets has SIGPIPE disabled if
+ * possible. On Linux it's not possible and should be disabled for
+ * each send(2) individually.
+ *
+ * TODO?: Support v6-mapped v4 so that user can specify she wants
+ * "udp" and get both versions?
+ */
+SOCKET
+proxy_bound_socket(int sdom, int stype, struct sockaddr *src_addr)
+{
+ SOCKET s;
+ int on;
+ const socklen_t onlen = sizeof(on);
+ int status;
+ int sockerr;
+
+ s = proxy_create_socket(sdom, stype);
+ if (s == INVALID_SOCKET) {
+ return INVALID_SOCKET;
+ }
+ DPRINTF(("socket %d\n", s));
+
+ on = 1;
+ status = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&on, onlen);
+ if (status < 0) { /* not good, but not fatal */
+ DPRINTF(("SO_REUSEADDR: %R[sockerr]\n", SOCKERRNO()));
+ }
+
+ status = bind(s, src_addr,
+ sdom == PF_INET ?
+ sizeof(struct sockaddr_in)
+ : sizeof(struct sockaddr_in6));
+ if (status == SOCKET_ERROR) {
+ sockerr = SOCKERRNO();
+ DPRINTF(("bind: %R[sockerr]\n", sockerr));
+ closesocket(s);
+ SET_SOCKERRNO(sockerr);
+ return INVALID_SOCKET;
+ }
+
+ if (stype == SOCK_STREAM) {
+ status = listen(s, 5);
+ if (status == SOCKET_ERROR) {
+ sockerr = SOCKERRNO();
+ DPRINTF(("listen: %R[sockerr]\n", sockerr));
+ closesocket(s);
+ SET_SOCKERRNO(sockerr);
+ return INVALID_SOCKET;
+ }
+ }
+
+ return s;
+}
+
+
+void
+proxy_reset_socket(SOCKET s)
+{
+ struct linger linger;
+
+ linger.l_onoff = 1;
+ linger.l_linger = 0;
+
+ /* On Windows we can run into issue here, perhaps SO_LINGER isn't enough, and
+ * we should use WSA{Send,Recv}Disconnect instead.
+ *
+ * Links for the reference:
+ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms738547%28v=vs.85%29.aspx
+ * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4468997
+ */
+ setsockopt(s, SOL_SOCKET, SO_LINGER, (char *)&linger, sizeof(linger));
+
+ closesocket(s);
+}
+
+
+int
+proxy_sendto(SOCKET sock, struct pbuf *p, void *name, size_t namelen)
+{
+ struct pbuf *q;
+ size_t i, clen;
+#ifndef RT_OS_WINDOWS
+ struct msghdr mh;
+ ssize_t nsent;
+#else
+ DWORD nsent;
+#endif
+ int rc;
+ IOVEC fixiov[8]; /* fixed size (typical case) */
+ const size_t fixiovsize = sizeof(fixiov)/sizeof(fixiov[0]);
+ IOVEC *dyniov; /* dynamically sized */
+ IOVEC *iov;
+ int error = 0;
+
+ /*
+ * Static iov[] is usually enough since UDP protocols use small
+ * datagrams to avoid fragmentation, but be prepared.
+ */
+ clen = pbuf_clen(p);
+ if (clen > fixiovsize) {
+ /*
+ * XXX: TODO: check that clen is shorter than IOV_MAX
+ */
+ dyniov = (IOVEC *)malloc(clen * sizeof(*dyniov));
+ if (dyniov == NULL) {
+ error = -errno; /* sic: not a socket error */
+ goto out;
+ }
+ iov = dyniov;
+ }
+ else {
+ dyniov = NULL;
+ iov = fixiov;
+ }
+
+
+ for (q = p, i = 0; i < clen; q = q->next, ++i) {
+ LWIP_ASSERT1(q != NULL);
+
+ IOVEC_SET_BASE(iov[i], q->payload);
+ IOVEC_SET_LEN(iov[i], q->len);
+ }
+
+#ifndef RT_OS_WINDOWS
+ memset(&mh, 0, sizeof(mh));
+ mh.msg_name = name;
+ mh.msg_namelen = namelen;
+ mh.msg_iov = iov;
+ mh.msg_iovlen = clen;
+
+ nsent = sendmsg(sock, &mh, 0);
+ rc = (nsent >= 0) ? 0 : SOCKET_ERROR;
+#else
+ rc = WSASendTo(sock, iov, (DWORD)clen, &nsent, 0,
+ name, (int)namelen, NULL, NULL);
+#endif
+ if (rc == SOCKET_ERROR) {
+ error = SOCKERRNO();
+ DPRINTF(("%s: socket %d: sendmsg: %R[sockerr]\n",
+ __func__, sock, error));
+ error = -error;
+ }
+
+ out:
+ if (dyniov != NULL) {
+ free(dyniov);
+ }
+ return error;
+}
+
+
+static const char *lwiperr[] = {
+ "ERR_OK",
+ "ERR_MEM",
+ "ERR_BUF",
+ "ERR_TIMEOUT",
+ "ERR_RTE",
+ "ERR_INPROGRESS",
+ "ERR_VAL",
+ "ERR_WOULDBLOCK",
+ "ERR_USE",
+ "ERR_ISCONN",
+ "ERR_ABRT",
+ "ERR_RST",
+ "ERR_CLSD",
+ "ERR_CONN",
+ "ERR_ARG",
+ "ERR_IF"
+};
+
+
+const char *
+proxy_lwip_strerr(err_t error)
+{
+ static char buf[32];
+ int e = -error;
+
+ if (0 <= e && e < (int)__arraycount(lwiperr)) {
+ return lwiperr[e];
+ }
+ else {
+ RTStrPrintf(buf, sizeof(buf), "unknown error %d", error);
+ return buf;
+ }
+}
diff --git a/src/VBox/NetworkServices/NAT/proxy.h b/src/VBox/NetworkServices/NAT/proxy.h
new file mode 100644
index 00000000..48d9dd66
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/proxy.h
@@ -0,0 +1,121 @@
+/* $Id: proxy.h $ */
+/** @file
+ * NAT Network - common definitions and declarations.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NAT_proxy_h
+#define VBOX_INCLUDED_SRC_NAT_proxy_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#if !defined(VBOX)
+#include "vbox-compat.h"
+#endif
+
+#include "lwip/err.h"
+#include "lwip/ip_addr.h"
+#include "winutils.h"
+
+/* forward */
+struct netif;
+struct tcpip_msg;
+struct pbuf;
+struct sockaddr;
+struct sockaddr_in;
+struct sockaddr_in6;
+
+struct ip4_lomap
+{
+ ip_addr_t loaddr;
+ uint32_t off;
+};
+
+struct ip4_lomap_desc
+{
+ const struct ip4_lomap *lomap;
+ unsigned int num_lomap;
+};
+
+struct proxy_options {
+ int ipv6_enabled;
+ int ipv6_defroute;
+ SOCKET icmpsock4;
+ SOCKET icmpsock6;
+ const char *tftp_root;
+ const struct sockaddr_in *src4;
+ const struct sockaddr_in6 *src6;
+ const struct ip4_lomap_desc *lomap_desc;
+ const char **nameservers;
+};
+
+extern volatile struct proxy_options *g_proxy_options;
+extern struct netif *g_proxy_netif;
+
+void proxy_init(struct netif *, struct proxy_options *);
+SOCKET proxy_connected_socket(int, int, ipX_addr_t *, u16_t);
+SOCKET proxy_bound_socket(int, int, struct sockaddr *);
+void proxy_reset_socket(SOCKET);
+int proxy_sendto(SOCKET, struct pbuf *, void *, size_t);
+void proxy_lwip_post(struct tcpip_msg *);
+const char *proxy_lwip_strerr(err_t);
+
+/* proxy_rtadvd.c */
+void proxy_rtadvd_start(struct netif *);
+void proxy_rtadvd_do_quick(void *);
+
+/* rtmon_*.c */
+int rtmon_get_defaults(void);
+
+/* proxy_dhcp6ds.c */
+err_t dhcp6ds_init(struct netif *);
+
+/* proxy_tftpd.c */
+err_t tftpd_init(struct netif *, const char *);
+
+/* pxtcp.c */
+void pxtcp_init(void);
+
+/* pxudp.c */
+void pxudp_init(void);
+
+/* pxdns.c */
+err_t pxdns_init(struct netif *);
+void pxdns_set_nameservers(void *);
+
+/* pxping.c */
+err_t pxping_init(struct netif *, SOCKET, SOCKET);
+
+
+#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(RT_OS_WINDOWS)
+# define HAVE_SA_LEN 0
+#else
+# define HAVE_SA_LEN 1
+#endif
+
+#define LWIP_ASSERT1(condition) LWIP_ASSERT(#condition, condition)
+
+/*
+ * TODO: DPRINTF0 should probably become LogRel but its usage needs to
+ * be cleaned up a bit before.
+ */
+#define DPRINTF0(a) Log(a)
+
+#define DPRINTF(a) DPRINTF1(a)
+#define DPRINTF1(a) Log2(a)
+#define DPRINTF2(a) Log3(a)
+
+#endif /* !VBOX_INCLUDED_SRC_NAT_proxy_h */
+
diff --git a/src/VBox/NetworkServices/NAT/proxy_dhcp6ds.c b/src/VBox/NetworkServices/NAT/proxy_dhcp6ds.c
new file mode 100644
index 00000000..ac831346
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/proxy_dhcp6ds.c
@@ -0,0 +1,317 @@
+/* $Id: proxy_dhcp6ds.c $ */
+/** @file
+ * NAT Network - Simple stateless DHCPv6 (RFC 3736) server.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+#include "dhcp6.h"
+#include "proxy.h"
+
+#include <string.h>
+
+#include "lwip/opt.h"
+#include "lwip/mld6.h"
+#include "lwip/udp.h"
+
+
+static void dhcp6ds_recv(void *, struct udp_pcb *, struct pbuf *, ip6_addr_t *, u16_t);
+
+
+/* ff02::1:2 - "All_DHCP_Relay_Agents_and_Servers" link-scoped multicast */
+static /* const */ ip6_addr_t all_dhcp_relays_and_servers = {
+ { PP_HTONL(0xff020000UL), 0, 0, PP_HTONL(0x00010002UL) }
+};
+
+/* ff05::1:3 - "All_DHCP_Servers" site-scoped multicast */
+static /* const */ ip6_addr_t all_dhcp_servers = {
+ { PP_HTONL(0xff050000UL), 0, 0, PP_HTONL(0x00010003UL) }
+};
+
+
+static struct udp_pcb *dhcp6ds_pcb;
+
+/* prebuilt Server ID option */
+#define DUID_LL_LEN (/* duid type */ 2 + /* hw type */ 2 + /* ether addr */ 6)
+static u8_t dhcp6ds_serverid[/* opt */ 2 + /* optlen */ 2 + DUID_LL_LEN];
+
+/* prebuilt DNS Servers option */
+static u8_t dhcp6ds_dns[/* opt */ 2 + /* optlen */ 2 + /* IPv6 addr */ 16];
+
+
+/**
+ * Initialize DHCP6 server.
+ *
+ * Join DHCP6 multicast groups.
+ * Create and bind server pcb.
+ * Prebuild fixed parts of reply.
+ */
+err_t
+dhcp6ds_init(struct netif *proxy_netif)
+{
+ ip6_addr_t *pxaddr, *pxaddr_nonlocal;
+ int i;
+ err_t error;
+
+ LWIP_ASSERT1(proxy_netif != NULL);
+ LWIP_ASSERT1(proxy_netif->hwaddr_len == 6); /* ethernet */
+
+ pxaddr = netif_ip6_addr(proxy_netif, 0); /* link local */
+
+ /*
+ * XXX: TODO: This is a leftover from testing with IPv6 mapped
+ * loopback with a special IPv6->IPv4 mapping hack in pxudp.c
+ */
+ /* advertise ourself as DNS resolver - will be proxied to host */
+ pxaddr_nonlocal = NULL;
+ for (i = 1; i < LWIP_IPV6_NUM_ADDRESSES; ++i) {
+ if (ip6_addr_ispreferred(netif_ip6_addr_state(proxy_netif, i))
+ && !ip6_addr_islinklocal(netif_ip6_addr(proxy_netif, i)))
+ {
+ pxaddr_nonlocal = netif_ip6_addr(proxy_netif, i);
+ break;
+ }
+ }
+ LWIP_ASSERT1(pxaddr_nonlocal != NULL); /* must be configured on the netif */
+
+
+ error = mld6_joingroup(pxaddr, &all_dhcp_relays_and_servers);
+ if (error != ERR_OK) {
+ DPRINTF0(("%s: failed to join All_DHCP_Relay_Agents_and_Servers: %s\n",
+ __func__, proxy_lwip_strerr(error)));
+ goto err;
+ }
+
+ error = mld6_joingroup(pxaddr, &all_dhcp_servers);
+ if (error != ERR_OK) {
+ DPRINTF0(("%s: failed to join All_DHCP_Servers: %s\n",
+ __func__, proxy_lwip_strerr(error)));
+ goto err1;
+ }
+
+
+ dhcp6ds_pcb = udp_new_ip6();
+ if (dhcp6ds_pcb == NULL) {
+ DPRINTF0(("%s: failed to allocate PCB\n", __func__));
+ error = ERR_MEM;
+ goto err2;
+ }
+
+ udp_recv_ip6(dhcp6ds_pcb, dhcp6ds_recv, NULL);
+
+ error = udp_bind_ip6(dhcp6ds_pcb, pxaddr, DHCP6_SERVER_PORT);
+ if (error != ERR_OK) {
+ DPRINTF0(("%s: failed to bind PCB\n", __func__));
+ goto err3;
+ }
+
+
+#define OPT_SET(buf, off, c) do { \
+ u16_t _s = PP_HTONS(c); \
+ memcpy(&(buf)[off], &_s, sizeof(u16_t)); \
+ } while (0)
+
+#define SERVERID_SET(off, c) OPT_SET(dhcp6ds_serverid, (off), (c))
+#define DNSSRV_SET(off, c) OPT_SET(dhcp6ds_dns, (off), (c))
+
+ SERVERID_SET(0, DHCP6_OPTION_SERVERID);
+ SERVERID_SET(2, DUID_LL_LEN);
+ SERVERID_SET(4, DHCP6_DUID_LL);
+ SERVERID_SET(6, ARES_HRD_ETHERNET);
+ memcpy(&dhcp6ds_serverid[8], proxy_netif->hwaddr, 6);
+
+ DNSSRV_SET(0, DHCP6_OPTION_DNS_SERVERS);
+ DNSSRV_SET(2, 16); /* one IPv6 address */
+ /*
+ * XXX: TODO: This is a leftover from testing with IPv6 mapped
+ * loopback with a special IPv6->IPv4 mapping hack in pxudp.c
+ */
+ memcpy(&dhcp6ds_dns[4], pxaddr_nonlocal, sizeof(ip6_addr_t));
+
+#undef SERVERID_SET
+#undef DNSSRV_SET
+
+ return ERR_OK;
+
+
+ err3:
+ udp_remove(dhcp6ds_pcb);
+ dhcp6ds_pcb = NULL;
+ err2:
+ mld6_leavegroup(pxaddr, &all_dhcp_servers);
+ err1:
+ mld6_leavegroup(pxaddr, &all_dhcp_relays_and_servers);
+ err:
+ return error;
+}
+
+
+static u8_t dhcp6ds_reply_buf[1024];
+
+static void
+dhcp6ds_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p,
+ ip6_addr_t *addr, u16_t port)
+{
+ u8_t msg_header[4];
+ unsigned int msg_type, msg_tid;
+ int copied;
+ size_t roff;
+ struct pbuf *q;
+ err_t error;
+
+ LWIP_UNUSED_ARG(arg);
+ LWIP_ASSERT1(p != NULL);
+
+ copied = pbuf_copy_partial(p, msg_header, sizeof(msg_header), 0);
+ if (copied != sizeof(msg_header)) {
+ DPRINTF(("%s: message header truncated\n", __func__));
+ pbuf_free(p);
+ return;
+ }
+ pbuf_header(p, -(s16_t)sizeof(msg_header));
+
+ msg_type = msg_header[0];
+ msg_tid = (msg_header[1] << 16) | (msg_header[2] << 8) | msg_header[3];
+ DPRINTF(("%s: type %u, tid 0x%6x\n", __func__, msg_type, msg_tid));
+ if (msg_type != DHCP6_INFORMATION_REQUEST) { /** @todo ? RELAY_FORW */
+ pbuf_free(p);
+ return;
+ }
+
+ roff = 0;
+
+ msg_header[0] = DHCP6_REPLY;
+ memcpy(dhcp6ds_reply_buf + roff, msg_header, sizeof(msg_header));
+ roff += sizeof(msg_header);
+
+
+ /* loop over options */
+ while (p->tot_len > 0) {
+ u16_t opt, optlen;
+
+ /* fetch option code */
+ copied = pbuf_copy_partial(p, &opt, sizeof(opt), 0);
+ if (copied != sizeof(opt)) {
+ DPRINTF(("%s: option header truncated\n", __func__));
+ pbuf_free(p);
+ return;
+ }
+ pbuf_header(p, -(s16_t)sizeof(opt));
+ opt = ntohs(opt);
+
+ /* fetch option length */
+ copied = pbuf_copy_partial(p, &optlen, sizeof(optlen), 0);
+ if (copied != sizeof(optlen)) {
+ DPRINTF(("%s: option %u length truncated\n", __func__, opt));
+ pbuf_free(p);
+ return;
+ }
+ pbuf_header(p, -(s16_t)sizeof(optlen));
+ optlen = ntohs(optlen);
+
+ /* enough data? */
+ if (optlen > p->tot_len) {
+ DPRINTF(("%s: option %u truncated: expect %u, got %u\n",
+ __func__, opt, optlen, p->tot_len));
+ pbuf_free(p);
+ return;
+ }
+
+ DPRINTF2(("%s: option %u length %u\n", __func__, opt, optlen));
+
+ if (opt == DHCP6_OPTION_CLIENTID) {
+ u16_t s;
+
+ /* "A DUID can be no more than 128 octets long (not
+ including the type code)." */
+ if (optlen > 130) {
+ DPRINTF(("%s: client DUID too long: %u\n", __func__, optlen));
+ pbuf_free(p);
+ return;
+ }
+
+ s = PP_HTONS(DHCP6_OPTION_CLIENTID);
+ memcpy(dhcp6ds_reply_buf + roff, &s, sizeof(s));
+ roff += sizeof(s);
+
+ s = ntohs(optlen);
+ memcpy(dhcp6ds_reply_buf + roff, &s, sizeof(s));
+ roff += sizeof(s);
+
+ pbuf_copy_partial(p, dhcp6ds_reply_buf + roff, optlen, 0);
+ roff += optlen;
+ }
+ else if (opt == DHCP6_OPTION_ORO) {
+ u16_t *opts;
+ int i, nopts;
+
+ if (optlen % 2 != 0) {
+ DPRINTF2(("%s: Option Request of odd length\n", __func__));
+ goto bad_oro;
+ }
+ nopts = optlen / 2;
+
+ opts = (u16_t *)malloc(optlen);
+ if (opts == NULL) {
+ DPRINTF2(("%s: failed to allocate space for Option Request\n",
+ __func__));
+ goto bad_oro;
+ }
+
+ pbuf_copy_partial(p, opts, optlen, 0);
+ for (i = 0; i < nopts; ++i) {
+ opt = ntohs(opts[i]);
+ DPRINTF2(("> request option %u\n", opt));
+ };
+ free(opts);
+
+ bad_oro: /* empty */;
+ }
+
+ pbuf_header(p, -optlen); /* go to next option */
+ }
+ pbuf_free(p); /* done */
+
+
+ memcpy(dhcp6ds_reply_buf + roff, dhcp6ds_serverid, sizeof(dhcp6ds_serverid));
+ roff += sizeof(dhcp6ds_serverid);
+
+ memcpy(dhcp6ds_reply_buf + roff, dhcp6ds_dns, sizeof(dhcp6ds_dns));
+ roff += sizeof(dhcp6ds_dns);
+
+ Assert(roff == (u16_t)roff);
+ q = pbuf_alloc(PBUF_RAW, (u16_t)roff, PBUF_RAM);
+ if (q == NULL) {
+ DPRINTF(("%s: pbuf_alloc(%d) failed\n", __func__, (int)roff));
+ return;
+ }
+
+ error = pbuf_take(q, dhcp6ds_reply_buf, (u16_t)roff);
+ if (error != ERR_OK) {
+ DPRINTF(("%s: pbuf_take(%d) failed: %s\n",
+ __func__, (int)roff, proxy_lwip_strerr(error)));
+ pbuf_free(q);
+ return;
+ }
+
+ error = udp_sendto_ip6(pcb, q, addr, port);
+ if (error != ERR_OK) {
+ DPRINTF(("%s: udp_sendto failed: %s\n",
+ __func__, proxy_lwip_strerr(error)));
+ }
+
+ pbuf_free(q);
+}
diff --git a/src/VBox/NetworkServices/NAT/proxy_pollmgr.c b/src/VBox/NetworkServices/NAT/proxy_pollmgr.c
new file mode 100644
index 00000000..1686fb8f
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/proxy_pollmgr.c
@@ -0,0 +1,828 @@
+/* $Id: proxy_pollmgr.c $ */
+/** @file
+ * NAT Network - poll manager.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+
+#include "proxy_pollmgr.h"
+#include "proxy.h"
+
+#ifndef RT_OS_WINDOWS
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#else
+#include <iprt/errcore.h>
+#include <stdlib.h>
+#include <string.h>
+#include "winpoll.h"
+#endif
+
+#include <iprt/req.h>
+#include <iprt/errcore.h>
+
+
+#define POLLMGR_GARBAGE (-1)
+
+
+enum {
+ POLLMGR_QUEUE = 0,
+
+ POLLMGR_SLOT_STATIC_COUNT,
+ POLLMGR_SLOT_FIRST_DYNAMIC = POLLMGR_SLOT_STATIC_COUNT
+};
+
+
+struct pollmgr_chan {
+ struct pollmgr_handler *handler;
+ void *arg;
+ bool arg_valid;
+};
+
+struct pollmgr {
+ struct pollfd *fds;
+ struct pollmgr_handler **handlers;
+ nfds_t capacity; /* allocated size of the arrays */
+ nfds_t nfds; /* part of the arrays in use */
+
+ /* channels (socketpair) for static slots */
+ SOCKET chan[POLLMGR_SLOT_STATIC_COUNT][2];
+#define POLLMGR_CHFD_RD 0 /* - pollmgr side */
+#define POLLMGR_CHFD_WR 1 /* - client side */
+
+
+ /* emulate channels with request queue */
+ RTREQQUEUE queue;
+ struct pollmgr_handler queue_handler;
+ struct pollmgr_chan chan_handlers[POLLMGR_CHAN_COUNT];
+} pollmgr;
+
+
+static int pollmgr_queue_callback(struct pollmgr_handler *, SOCKET, int);
+static void pollmgr_chan_call_handler(int, void *);
+
+static void pollmgr_loop(void);
+
+static void pollmgr_add_at(int, struct pollmgr_handler *, SOCKET, int);
+static void pollmgr_refptr_delete(struct pollmgr_refptr *);
+
+
+/*
+ * We cannot portably peek at the length of the incoming datagram and
+ * pre-allocate pbuf chain to recvmsg() directly to it. On Linux it's
+ * possible to recv with MSG_PEEK|MSG_TRUC, but extra syscall is
+ * probably more expensive (haven't measured) than doing an extra copy
+ * of data, since typical UDP datagrams are small enough to avoid
+ * fragmentation.
+ *
+ * We can use shared buffer here since we read from sockets
+ * sequentially in a loop over pollfd.
+ */
+u8_t pollmgr_udpbuf[64 * 1024];
+
+
+int
+pollmgr_init(void)
+{
+ struct pollfd *newfds;
+ struct pollmgr_handler **newhdls;
+ nfds_t newcap;
+ int rc, status;
+ nfds_t i;
+
+ rc = RTReqQueueCreate(&pollmgr.queue);
+ if (RT_FAILURE(rc))
+ return -1;
+
+ pollmgr.fds = NULL;
+ pollmgr.handlers = NULL;
+ pollmgr.capacity = 0;
+ pollmgr.nfds = 0;
+
+ for (i = 0; i < POLLMGR_SLOT_STATIC_COUNT; ++i) {
+ pollmgr.chan[i][POLLMGR_CHFD_RD] = INVALID_SOCKET;
+ pollmgr.chan[i][POLLMGR_CHFD_WR] = INVALID_SOCKET;
+ }
+
+ for (i = 0; i < POLLMGR_SLOT_STATIC_COUNT; ++i) {
+#ifndef RT_OS_WINDOWS
+ int j;
+
+ status = socketpair(PF_LOCAL, SOCK_DGRAM, 0, pollmgr.chan[i]);
+ if (status < 0) {
+ DPRINTF(("socketpair: %R[sockerr]\n", SOCKERRNO()));
+ goto cleanup_close;
+ }
+
+ /* now manually make them O_NONBLOCK */
+ for (j = 0; j < 2; ++j) {
+ int s = pollmgr.chan[i][j];
+ int sflags;
+
+ sflags = fcntl(s, F_GETFL, 0);
+ if (sflags < 0) {
+ DPRINTF0(("F_GETFL: %R[sockerr]\n", errno));
+ goto cleanup_close;
+ }
+
+ status = fcntl(s, F_SETFL, sflags | O_NONBLOCK);
+ if (status < 0) {
+ DPRINTF0(("O_NONBLOCK: %R[sockerr]\n", errno));
+ goto cleanup_close;
+ }
+ }
+#else
+ status = RTWinSocketPair(PF_INET, SOCK_DGRAM, 0, pollmgr.chan[i]);
+ if (RT_FAILURE(status)) {
+ goto cleanup_close;
+ }
+#endif
+ }
+
+
+ newcap = 16; /* XXX: magic */
+ LWIP_ASSERT1(newcap >= POLLMGR_SLOT_STATIC_COUNT);
+
+ newfds = (struct pollfd *)
+ malloc(newcap * sizeof(*pollmgr.fds));
+ if (newfds == NULL) {
+ DPRINTF(("%s: Failed to allocate fds array\n", __func__));
+ goto cleanup_close;
+ }
+
+ newhdls = (struct pollmgr_handler **)
+ malloc(newcap * sizeof(*pollmgr.handlers));
+ if (newhdls == NULL) {
+ DPRINTF(("%s: Failed to allocate handlers array\n", __func__));
+ free(newfds);
+ goto cleanup_close;
+ }
+
+ pollmgr.capacity = newcap;
+ pollmgr.fds = newfds;
+ pollmgr.handlers = newhdls;
+
+ pollmgr.nfds = POLLMGR_SLOT_STATIC_COUNT;
+
+ for (i = 0; i < pollmgr.capacity; ++i) {
+ pollmgr.fds[i].fd = INVALID_SOCKET;
+ pollmgr.fds[i].events = 0;
+ pollmgr.fds[i].revents = 0;
+ }
+
+ /* add request queue notification */
+ pollmgr.queue_handler.callback = pollmgr_queue_callback;
+ pollmgr.queue_handler.data = NULL;
+ pollmgr.queue_handler.slot = -1;
+
+ pollmgr_add_at(POLLMGR_QUEUE, &pollmgr.queue_handler,
+ pollmgr.chan[POLLMGR_QUEUE][POLLMGR_CHFD_RD],
+ POLLIN);
+
+ return 0;
+
+ cleanup_close:
+ for (i = 0; i < POLLMGR_SLOT_STATIC_COUNT; ++i) {
+ SOCKET *chan = pollmgr.chan[i];
+ if (chan[POLLMGR_CHFD_RD] != INVALID_SOCKET) {
+ closesocket(chan[POLLMGR_CHFD_RD]);
+ closesocket(chan[POLLMGR_CHFD_WR]);
+ }
+ }
+
+ return -1;
+}
+
+
+/*
+ * Add new channel. We now implement channels with request queue, so
+ * all channels get the same socket that triggers queue processing.
+ *
+ * Must be called before pollmgr loop is started, so no locking.
+ */
+SOCKET
+pollmgr_add_chan(int slot, struct pollmgr_handler *handler)
+{
+ AssertReturn(0 <= slot && slot < POLLMGR_CHAN_COUNT, INVALID_SOCKET);
+ AssertReturn(handler != NULL && handler->callback != NULL, INVALID_SOCKET);
+
+ handler->slot = slot;
+ pollmgr.chan_handlers[slot].handler = handler;
+ return pollmgr.chan[POLLMGR_QUEUE][POLLMGR_CHFD_WR];
+}
+
+
+/*
+ * This used to actually send data over the channel's socket. Now we
+ * queue a request and send single byte notification over shared
+ * POLLMGR_QUEUE socket.
+ */
+ssize_t
+pollmgr_chan_send(int slot, void *buf, size_t nbytes)
+{
+ static const char notification = 0x5a;
+
+ void *ptr;
+ SOCKET fd;
+ ssize_t nsent;
+ int rc;
+
+ AssertReturn(0 <= slot && slot < POLLMGR_CHAN_COUNT, -1);
+
+ /*
+ * XXX: Hack alert. We only ever "sent" single pointer which was
+ * simultaneously both the wakeup event for the poll and the
+ * argument for the channel handler that it read from the channel.
+ * So now we pass this pointer to the request and arrange for the
+ * handler to "read" it when it asks for it.
+ */
+ if (nbytes != sizeof(void *)) {
+ return -1;
+ }
+
+ ptr = *(void **)buf;
+
+ rc = RTReqQueueCallEx(pollmgr.queue, NULL, 0,
+ RTREQFLAGS_VOID | RTREQFLAGS_NO_WAIT,
+ (PFNRT)pollmgr_chan_call_handler, 2,
+ slot, ptr);
+
+ fd = pollmgr.chan[POLLMGR_QUEUE][POLLMGR_CHFD_WR];
+ nsent = send(fd, &notification, 1, 0);
+ if (nsent == SOCKET_ERROR) {
+ DPRINTF(("send on chan %d: %R[sockerr]\n", slot, SOCKERRNO()));
+ return -1;
+ }
+ else if ((size_t)nsent != 1) {
+ DPRINTF(("send on chan %d: datagram truncated to %u bytes",
+ slot, (unsigned int)nsent));
+ return -1;
+ }
+
+ /* caller thinks it's sending the pointer */
+ return sizeof(void *);
+}
+
+
+/*
+ * pollmgr_chan_send() sent us a notification, process the queue.
+ */
+static int
+pollmgr_queue_callback(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ ssize_t nread;
+ int sockerr;
+ int rc;
+
+ RT_NOREF(handler, revents);
+ Assert(pollmgr.queue != NIL_RTREQQUEUE);
+
+ nread = recv(fd, (char *)pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0);
+ sockerr = SOCKERRNO(); /* save now, may be clobbered */
+
+ if (nread == SOCKET_ERROR) {
+ DPRINTF0(("%s: recv: %R[sockerr]\n", __func__, sockerr));
+ return POLLIN;
+ }
+
+ DPRINTF2(("%s: read %zd\n", __func__, nread));
+ if (nread == 0) {
+ return POLLIN;
+ }
+
+ rc = RTReqQueueProcess(pollmgr.queue, 0);
+ if (RT_UNLIKELY(rc != VERR_TIMEOUT && RT_FAILURE_NP(rc))) {
+ DPRINTF0(("%s: RTReqQueueProcess: %Rrc\n", __func__, rc));
+ }
+
+ return POLLIN;
+}
+
+
+/*
+ * Queued requests use this function to emulate the call to the
+ * handler's callback.
+ */
+static void
+pollmgr_chan_call_handler(int slot, void *arg)
+{
+ struct pollmgr_handler *handler;
+ int nevents;
+
+ AssertReturnVoid(0 <= slot && slot < POLLMGR_CHAN_COUNT);
+
+ handler = pollmgr.chan_handlers[slot].handler;
+ AssertReturnVoid(handler != NULL && handler->callback != NULL);
+
+ /* arrange for pollmgr_chan_recv_ptr() to "receive" the arg */
+ pollmgr.chan_handlers[slot].arg = arg;
+ pollmgr.chan_handlers[slot].arg_valid = true;
+
+ nevents = handler->callback(handler, INVALID_SOCKET, POLLIN);
+ if (nevents != POLLIN) {
+ DPRINTF2(("%s: nevents=0x%x!\n", __func__, nevents));
+ }
+}
+
+
+/*
+ * "Receive" a pointer "sent" over poll manager channel.
+ */
+void *
+pollmgr_chan_recv_ptr(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ int slot;
+ void *ptr;
+
+ RT_NOREF(fd);
+
+ slot = handler->slot;
+ Assert(0 <= slot && slot < POLLMGR_CHAN_COUNT);
+
+ if (revents & POLLNVAL) {
+ errx(EXIT_FAILURE, "chan %d: fd invalid", (int)handler->slot);
+ /* NOTREACHED */
+ }
+
+ if (revents & (POLLERR | POLLHUP)) {
+ errx(EXIT_FAILURE, "chan %d: fd error", (int)handler->slot);
+ /* NOTREACHED */
+ }
+
+ LWIP_ASSERT1(revents & POLLIN);
+
+ if (!pollmgr.chan_handlers[slot].arg_valid) {
+ err(EXIT_FAILURE, "chan %d: recv", (int)handler->slot);
+ /* NOTREACHED */
+ }
+
+ ptr = pollmgr.chan_handlers[slot].arg;
+ pollmgr.chan_handlers[slot].arg_valid = false;
+
+ return ptr;
+}
+
+
+/*
+ * Must be called from pollmgr loop (via callbacks), so no locking.
+ */
+int
+pollmgr_add(struct pollmgr_handler *handler, SOCKET fd, int events)
+{
+ int slot;
+
+ DPRINTF2(("%s: new fd %d\n", __func__, fd));
+
+ if (pollmgr.nfds == pollmgr.capacity) {
+ struct pollfd *newfds;
+ struct pollmgr_handler **newhdls;
+ nfds_t newcap;
+ nfds_t i;
+
+ newcap = pollmgr.capacity * 2;
+
+ newfds = (struct pollfd *)
+ realloc(pollmgr.fds, newcap * sizeof(*pollmgr.fds));
+ if (newfds == NULL) {
+ DPRINTF(("%s: Failed to reallocate fds array\n", __func__));
+ handler->slot = -1;
+ return -1;
+ }
+
+ pollmgr.fds = newfds; /* don't crash/leak if realloc(handlers) fails */
+ /* but don't update capacity yet! */
+
+ newhdls = (struct pollmgr_handler **)
+ realloc(pollmgr.handlers, newcap * sizeof(*pollmgr.handlers));
+ if (newhdls == NULL) {
+ DPRINTF(("%s: Failed to reallocate handlers array\n", __func__));
+ /* if we failed to realloc here, then fds points to the
+ * new array, but we pretend we still has old capacity */
+ handler->slot = -1;
+ return -1;
+ }
+
+ pollmgr.handlers = newhdls;
+ pollmgr.capacity = newcap;
+
+ for (i = pollmgr.nfds; i < newcap; ++i) {
+ newfds[i].fd = INVALID_SOCKET;
+ newfds[i].events = 0;
+ newfds[i].revents = 0;
+ newhdls[i] = NULL;
+ }
+ }
+
+ slot = pollmgr.nfds;
+ ++pollmgr.nfds;
+
+ pollmgr_add_at(slot, handler, fd, events);
+ return slot;
+}
+
+
+static void
+pollmgr_add_at(int slot, struct pollmgr_handler *handler, SOCKET fd, int events)
+{
+ pollmgr.fds[slot].fd = fd;
+ pollmgr.fds[slot].events = events;
+ pollmgr.fds[slot].revents = 0;
+ pollmgr.handlers[slot] = handler;
+
+ handler->slot = slot;
+}
+
+
+void
+pollmgr_update_events(int slot, int events)
+{
+ LWIP_ASSERT1(slot >= POLLMGR_SLOT_FIRST_DYNAMIC);
+ LWIP_ASSERT1((nfds_t)slot < pollmgr.nfds);
+
+ pollmgr.fds[slot].events = events;
+}
+
+
+void
+pollmgr_del_slot(int slot)
+{
+ LWIP_ASSERT1(slot >= POLLMGR_SLOT_FIRST_DYNAMIC);
+
+ DPRINTF2(("%s(%d): fd %d ! DELETED\n",
+ __func__, slot, pollmgr.fds[slot].fd));
+
+ pollmgr.fds[slot].fd = INVALID_SOCKET; /* see poll loop */
+}
+
+
+void
+pollmgr_thread(void *ignored)
+{
+ LWIP_UNUSED_ARG(ignored);
+ pollmgr_loop();
+}
+
+
+static void
+pollmgr_loop(void)
+{
+ int nready;
+ SOCKET delfirst;
+ SOCKET *pdelprev;
+ int i;
+
+ for (;;) {
+#ifndef RT_OS_WINDOWS
+ nready = poll(pollmgr.fds, pollmgr.nfds, -1);
+#else
+ int rc = RTWinPoll(pollmgr.fds, pollmgr.nfds,RT_INDEFINITE_WAIT, &nready);
+ if (RT_FAILURE(rc)) {
+ err(EXIT_FAILURE, "poll"); /* XXX: what to do on error? */
+ /* NOTREACHED*/
+ }
+#endif
+
+ DPRINTF2(("%s: ready %d fd%s\n",
+ __func__, nready, (nready == 1 ? "" : "s")));
+
+ if (nready < 0) {
+ if (errno == EINTR) {
+ continue;
+ }
+
+ err(EXIT_FAILURE, "poll"); /* XXX: what to do on error? */
+ /* NOTREACHED*/
+ }
+ else if (nready == 0) { /* cannot happen, we wait forever (-1) */
+ continue; /* - but be defensive */
+ }
+
+
+ delfirst = INVALID_SOCKET;
+ pdelprev = &delfirst;
+
+ for (i = 0; (nfds_t)i < pollmgr.nfds && nready > 0; ++i) {
+ struct pollmgr_handler *handler;
+ SOCKET fd;
+ int revents, nevents;
+
+ fd = pollmgr.fds[i].fd;
+ revents = pollmgr.fds[i].revents;
+
+ /*
+ * Channel handlers can request deletion of dynamic slots
+ * by calling pollmgr_del_slot() that clobbers slot's fd.
+ */
+ if (fd == INVALID_SOCKET && i >= POLLMGR_SLOT_FIRST_DYNAMIC) {
+ /* adjust count if events were pending for that slot */
+ if (revents != 0) {
+ --nready;
+ }
+
+ /* pretend that slot handler requested deletion */
+ nevents = -1;
+ goto update_events;
+ }
+
+ if (revents == 0) {
+ continue; /* next fd */
+ }
+ --nready;
+
+ handler = pollmgr.handlers[i];
+
+ if (handler != NULL && handler->callback != NULL) {
+#ifdef LWIP_PROXY_DEBUG
+# if LWIP_PROXY_DEBUG /* DEBUG */
+ if (i < POLLMGR_SLOT_FIRST_DYNAMIC) {
+ if (revents == POLLIN) {
+ DPRINTF2(("%s: ch %d\n", __func__, i));
+ }
+ else {
+ DPRINTF2(("%s: ch %d @ revents 0x%x!\n",
+ __func__, i, revents));
+ }
+ }
+ else {
+ DPRINTF2(("%s: fd %d @ revents 0x%x\n",
+ __func__, fd, revents));
+ }
+# endif /* LWIP_PROXY_DEBUG / DEBUG */
+#endif
+ nevents = (*handler->callback)(handler, fd, revents);
+ }
+ else {
+ DPRINTF0(("%s: invalid handler for fd %d: ", __func__, fd));
+ if (handler == NULL) {
+ DPRINTF0(("NULL\n"));
+ }
+ else {
+ DPRINTF0(("%p (callback = NULL)\n", (void *)handler));
+ }
+ nevents = -1; /* delete it */
+ }
+
+ update_events:
+ if (nevents >= 0) {
+ if (nevents != pollmgr.fds[i].events) {
+ DPRINTF2(("%s: fd %d ! nevents 0x%x\n",
+ __func__, fd, nevents));
+ }
+ pollmgr.fds[i].events = nevents;
+ }
+ else if (i < POLLMGR_SLOT_FIRST_DYNAMIC) {
+ /* Don't garbage-collect channels. */
+ DPRINTF2(("%s: fd %d ! DELETED (channel %d)\n",
+ __func__, fd, i));
+ pollmgr.fds[i].fd = INVALID_SOCKET;
+ pollmgr.fds[i].events = 0;
+ pollmgr.fds[i].revents = 0;
+ pollmgr.handlers[i] = NULL;
+ }
+ else {
+ DPRINTF2(("%s: fd %d ! DELETED\n", __func__, fd));
+
+ /* schedule for deletion (see g/c loop for details) */
+ *pdelprev = i; /* make previous entry point to us */
+ pdelprev = &pollmgr.fds[i].fd;
+
+ pollmgr.fds[i].fd = INVALID_SOCKET; /* end of list (for now) */
+ pollmgr.fds[i].events = POLLMGR_GARBAGE;
+ pollmgr.fds[i].revents = 0;
+ pollmgr.handlers[i] = NULL;
+ }
+ } /* processing loop */
+
+
+ /*
+ * Garbage collect and compact the array.
+ *
+ * We overload pollfd::fd of garbage entries to store the
+ * index of the next garbage entry. The garbage list is
+ * co-directional with the fds array. The index of the first
+ * entry is in "delfirst", the last entry "points to"
+ * INVALID_SOCKET.
+ *
+ * See update_events code for nevents < 0 at the end of the
+ * processing loop above.
+ */
+ while (delfirst != INVALID_SOCKET) {
+ const int last = pollmgr.nfds - 1;
+
+ /*
+ * We want a live entry in the last slot to swap into the
+ * freed slot, so make sure we have one.
+ */
+ if (pollmgr.fds[last].events == POLLMGR_GARBAGE /* garbage */
+ || pollmgr.fds[last].fd == INVALID_SOCKET) /* or killed */
+ {
+ /* drop garbage entry at the end of the array */
+ --pollmgr.nfds;
+
+ if (delfirst == (SOCKET)last) {
+ /* congruent to delnext >= pollmgr.nfds test below */
+ delfirst = INVALID_SOCKET; /* done */
+ }
+ }
+ else {
+ const SOCKET delnext = pollmgr.fds[delfirst].fd;
+
+ /* copy live entry at the end to the first slot being freed */
+ pollmgr.fds[delfirst] = pollmgr.fds[last]; /* struct copy */
+ pollmgr.handlers[delfirst] = pollmgr.handlers[last];
+ pollmgr.handlers[delfirst]->slot = (int)delfirst;
+ --pollmgr.nfds;
+
+ if ((nfds_t)delnext >= pollmgr.nfds) {
+ delfirst = INVALID_SOCKET; /* done */
+ }
+ else {
+ delfirst = delnext;
+ }
+ }
+
+ pollmgr.fds[last].fd = INVALID_SOCKET;
+ pollmgr.fds[last].events = 0;
+ pollmgr.fds[last].revents = 0;
+ pollmgr.handlers[last] = NULL;
+ }
+ } /* poll loop */
+}
+
+
+/**
+ * Create strongly held refptr.
+ */
+struct pollmgr_refptr *
+pollmgr_refptr_create(struct pollmgr_handler *ptr)
+{
+ struct pollmgr_refptr *rp;
+
+ LWIP_ASSERT1(ptr != NULL);
+
+ rp = (struct pollmgr_refptr *)malloc(sizeof (*rp));
+ if (rp == NULL) {
+ return NULL;
+ }
+
+ sys_mutex_new(&rp->lock);
+ rp->ptr = ptr;
+ rp->strong = 1;
+ rp->weak = 0;
+
+ return rp;
+}
+
+
+static void
+pollmgr_refptr_delete(struct pollmgr_refptr *rp)
+{
+ if (rp == NULL) {
+ return;
+ }
+
+ LWIP_ASSERT1(rp->strong == 0);
+ LWIP_ASSERT1(rp->weak == 0);
+
+ sys_mutex_free(&rp->lock);
+ free(rp);
+}
+
+
+/**
+ * Add weak reference before "rp" is sent over a poll manager channel.
+ */
+void
+pollmgr_refptr_weak_ref(struct pollmgr_refptr *rp)
+{
+ sys_mutex_lock(&rp->lock);
+
+ LWIP_ASSERT1(rp->ptr != NULL);
+ LWIP_ASSERT1(rp->strong > 0);
+
+ ++rp->weak;
+
+ sys_mutex_unlock(&rp->lock);
+}
+
+
+/**
+ * Try to get the pointer from implicitely weak reference we've got
+ * from a channel.
+ *
+ * If we detect that the object is still strongly referenced, but no
+ * longer registered with the poll manager we abort strengthening
+ * conversion here b/c lwip thread callback is already scheduled to
+ * destruct the object.
+ */
+struct pollmgr_handler *
+pollmgr_refptr_get(struct pollmgr_refptr *rp)
+{
+ struct pollmgr_handler *handler;
+ size_t weak;
+
+ sys_mutex_lock(&rp->lock);
+
+ LWIP_ASSERT1(rp->weak > 0);
+ weak = --rp->weak;
+
+ handler = rp->ptr;
+ if (handler == NULL) {
+ LWIP_ASSERT1(rp->strong == 0);
+ sys_mutex_unlock(&rp->lock);
+ if (weak == 0) {
+ pollmgr_refptr_delete(rp);
+ }
+ return NULL;
+ }
+
+ LWIP_ASSERT1(rp->strong == 1);
+
+ /*
+ * Here we woild do:
+ *
+ * ++rp->strong;
+ *
+ * and then, after channel handler is done, we would decrement it
+ * back.
+ *
+ * Instead we check that the object is still registered with poll
+ * manager. If it is, there's no race with lwip thread trying to
+ * drop its strong reference, as lwip thread callback to destruct
+ * the object is always scheduled by its poll manager callback.
+ *
+ * Conversly, if we detect that the object is no longer registered
+ * with poll manager, we immediately abort. Since channel handler
+ * can't do anything useful anyway and would have to return
+ * immediately.
+ *
+ * Since channel handler would always find rp->strong as it had
+ * left it, just elide extra strong reference creation to avoid
+ * the whole back-and-forth.
+ */
+
+ if (handler->slot < 0) { /* no longer polling */
+ sys_mutex_unlock(&rp->lock);
+ return NULL;
+ }
+
+ sys_mutex_unlock(&rp->lock);
+ return handler;
+}
+
+
+/**
+ * Remove (the only) strong reference.
+ *
+ * If it were real strong/weak pointers, we should also call
+ * destructor for the referenced object, but
+ */
+void
+pollmgr_refptr_unref(struct pollmgr_refptr *rp)
+{
+ sys_mutex_lock(&rp->lock);
+
+ LWIP_ASSERT1(rp->strong == 1);
+ --rp->strong;
+
+ if (rp->strong > 0) {
+ sys_mutex_unlock(&rp->lock);
+ }
+ else {
+ size_t weak;
+
+ /* void *ptr = rp->ptr; */
+ rp->ptr = NULL;
+
+ /* delete ptr; // see doc comment */
+
+ weak = rp->weak;
+ sys_mutex_unlock(&rp->lock);
+ if (weak == 0) {
+ pollmgr_refptr_delete(rp);
+ }
+ }
+}
diff --git a/src/VBox/NetworkServices/NAT/proxy_pollmgr.h b/src/VBox/NetworkServices/NAT/proxy_pollmgr.h
new file mode 100644
index 00000000..0b44da53
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/proxy_pollmgr.h
@@ -0,0 +1,85 @@
+/* $Id: proxy_pollmgr.h $ */
+/** @file
+ * NAT Network - poll manager, definitions and declarations.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NAT_proxy_pollmgr_h
+#define VBOX_INCLUDED_SRC_NAT_proxy_pollmgr_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#ifndef RT_OS_WINDOWS
+# include <unistd.h> /* for ssize_t */
+#endif
+#include "lwip/sys.h"
+
+enum pollmgr_slot_t {
+ POLLMGR_CHAN_PXTCP_ADD, /* new proxy tcp connection from guest */
+ POLLMGR_CHAN_PXTCP_POLLIN, /* free space in ringbuf, may POLLIN */
+ POLLMGR_CHAN_PXTCP_POLLOUT, /* schedule one-shot POLLOUT callback */
+ POLLMGR_CHAN_PXTCP_DEL, /* delete pxtcp */
+ POLLMGR_CHAN_PXTCP_RESET, /* send RST and delete pxtcp */
+
+ POLLMGR_CHAN_PXUDP_ADD, /* new proxy udp conversation from guest */
+ POLLMGR_CHAN_PXUDP_DEL, /* delete pxudp from pollmgr */
+
+ POLLMGR_CHAN_PORTFWD, /* add/remove port forwarding rules */
+
+ POLLMGR_CHAN_COUNT
+};
+
+
+struct pollmgr_handler; /* forward */
+typedef int (*pollmgr_callback)(struct pollmgr_handler *, SOCKET, int);
+
+struct pollmgr_handler {
+ pollmgr_callback callback;
+ void *data;
+ int slot;
+};
+
+struct pollmgr_refptr {
+ struct pollmgr_handler *ptr;
+ sys_mutex_t lock;
+ size_t strong;
+ size_t weak;
+};
+
+int pollmgr_init(void);
+
+/* static named slots (aka "channels") */
+SOCKET pollmgr_add_chan(int, struct pollmgr_handler *);
+ssize_t pollmgr_chan_send(int, void *buf, size_t nbytes);
+void *pollmgr_chan_recv_ptr(struct pollmgr_handler *, SOCKET, int);
+
+/* dynamic slots */
+int pollmgr_add(struct pollmgr_handler *, SOCKET, int);
+
+/* special-purpose strong/weak references */
+struct pollmgr_refptr *pollmgr_refptr_create(struct pollmgr_handler *);
+void pollmgr_refptr_weak_ref(struct pollmgr_refptr *);
+struct pollmgr_handler *pollmgr_refptr_get(struct pollmgr_refptr *);
+void pollmgr_refptr_unref(struct pollmgr_refptr *);
+
+void pollmgr_update_events(int, int);
+void pollmgr_del_slot(int);
+
+void pollmgr_thread(void *);
+
+/* buffer for callbacks to receive udp without worrying about truncation */
+extern u8_t pollmgr_udpbuf[64 * 1024];
+
+#endif /* !VBOX_INCLUDED_SRC_NAT_proxy_pollmgr_h */
diff --git a/src/VBox/NetworkServices/NAT/proxy_rtadvd.c b/src/VBox/NetworkServices/NAT/proxy_rtadvd.c
new file mode 100644
index 00000000..849ef789
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/proxy_rtadvd.c
@@ -0,0 +1,417 @@
+/* $Id: proxy_rtadvd.c $ */
+/** @file
+ * NAT Network - IPv6 router advertisement daemon.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+
+#include "proxy.h"
+
+#include "lwip/opt.h"
+#include "lwip/sys.h"
+#include "lwip/stats.h"
+#include "lwip/timers.h"
+
+#include "lwip/inet_chksum.h"
+#include "lwip/icmp6.h"
+#include "lwip/nd6.h"
+
+#include "lwip/raw.h"
+
+#include <string.h>
+
+
+static void proxy_rtadvd_timer(void *);
+static void proxy_rtadvd_send_multicast(struct netif *);
+static void proxy_rtadvd_fill_payload(struct netif *, int);
+
+static u8_t rtadvd_recv(void *, struct raw_pcb *, struct pbuf *, ip6_addr_t *);
+
+
+/* ff02::1 - link-local all nodes multicast address */
+static ip6_addr_t allnodes_linklocal = {
+ { PP_HTONL(0xff020000UL), 0, 0, PP_HTONL(0x00000001UL) }
+};
+
+
+/*
+ * Unsolicited Router Advertisement payload.
+ *
+ * NB: Since ICMP checksum covers pseudo-header with destination
+ * address (link-local allnodes multicast in this case) this payload
+ * cannot be used for solicited replies to unicast addresses.
+ */
+static unsigned int unsolicited_ra_payload_length;
+static u8_t unsolicited_ra_payload[
+ sizeof(struct ra_header)
+ /* reserves enough space for NETIF_MAX_HWADDR_LEN */
+ + sizeof(struct lladdr_option)
+ /* we only announce one prefix */
+ + sizeof(struct prefix_option) * 1
+];
+
+
+static int ndefaults = 0;
+
+static struct raw_pcb *rtadvd_pcb;
+
+
+void
+proxy_rtadvd_start(struct netif *proxy_netif)
+{
+#if 0 /* XXX */
+ ndefaults = rtmon_get_defaults();
+#else
+ ndefaults = g_proxy_options->ipv6_defroute;
+#endif
+ if (ndefaults < 0) {
+ DPRINTF0(("rtadvd: failed to read IPv6 routing table, aborting\n"));
+ return;
+ }
+
+ proxy_rtadvd_fill_payload(proxy_netif, ndefaults > 0);
+
+ rtadvd_pcb = raw_new_ip6(IP6_NEXTH_ICMP6);
+ if (rtadvd_pcb == NULL) {
+ DPRINTF0(("rtadvd: failed to allocate pcb, aborting\n"));
+ return;
+ }
+
+ /*
+ * We cannot use raw_bind_ip6() since raw_input() doesn't grok
+ * multicasts. We are going to use ip6_output_if() directly.
+ */
+ raw_recv_ip6(rtadvd_pcb, rtadvd_recv, proxy_netif);
+
+ sys_timeout(3 * 1000, proxy_rtadvd_timer, proxy_netif);
+}
+
+
+static int quick_ras = 2;
+
+
+/**
+ * lwIP thread callback invoked when we start/stop advertising default
+ * route.
+ */
+void
+proxy_rtadvd_do_quick(void *arg)
+{
+ struct netif *proxy_netif = (struct netif *)arg;
+
+ quick_ras = 2;
+ sys_untimeout(proxy_rtadvd_timer, proxy_netif);
+ proxy_rtadvd_timer(proxy_netif); /* sends and re-arms */
+}
+
+
+static void
+proxy_rtadvd_timer(void *arg)
+{
+ struct netif *proxy_netif = (struct netif *)arg;
+ int newdefs;
+ u32_t delay;
+
+#if 0 /* XXX */
+ newdefs = rtmon_get_defaults();
+#else
+ newdefs = g_proxy_options->ipv6_defroute;
+#endif
+ if (newdefs != ndefaults && newdefs != -1) {
+ ndefaults = newdefs;
+ proxy_rtadvd_fill_payload(proxy_netif, ndefaults > 0);
+ }
+
+ proxy_rtadvd_send_multicast(proxy_netif);
+
+ if (quick_ras > 0) {
+ --quick_ras;
+ delay = 16 * 1000;
+ }
+ else {
+ delay = 600 * 1000;
+ }
+
+ sys_timeout(delay, proxy_rtadvd_timer, proxy_netif);
+}
+
+
+/*
+ * This should be folded into icmp6/nd6 input, but I don't want to
+ * solve this in general, making it configurable, etc.
+ *
+ * Cf. RFC 4861:
+ * 6.1.1. Validation of Router Solicitation Messages
+ */
+static u8_t
+rtadvd_recv(void *arg, struct raw_pcb *pcb, struct pbuf *p, ip6_addr_t *addr)
+{
+ enum raw_recv_status { RAW_RECV_CONTINUE = 0, RAW_RECV_CONSUMED = 1 };
+
+ struct netif *proxy_netif = (struct netif *)arg;
+ struct ip6_hdr *ip6_hdr;
+ struct icmp6_hdr *icmp6_hdr;
+ struct lladdr_option *lladdr_opt;
+ void *option;
+ u8_t opttype, optlen8;
+
+ LWIP_UNUSED_ARG(pcb);
+ LWIP_UNUSED_ARG(addr);
+
+ /* save a pointer to IP6 header and skip to ICMP6 payload */
+ ip6_hdr = (struct ip6_hdr *)p->payload;
+ pbuf_header(p, -ip_current_header_tot_len());
+
+ if (p->len < sizeof(struct icmp6_hdr)) {
+ ICMP6_STATS_INC(icmp6.lenerr);
+ goto drop;
+ }
+
+ if (ip6_chksum_pseudo(p, IP6_NEXTH_ICMP6, p->tot_len,
+ ip6_current_src_addr(),
+ ip6_current_dest_addr()) != 0)
+ {
+ ICMP6_STATS_INC(icmp6.chkerr);
+ goto drop;
+ }
+
+ icmp6_hdr = (struct icmp6_hdr *)p->payload;
+ if (icmp6_hdr->type != ICMP6_TYPE_RS) {
+ pbuf_header(p, ip_current_header_tot_len()); /* restore payload ptr */
+ return RAW_RECV_CONTINUE; /* not interested */
+ }
+
+ /* only now that we know it's ICMP6_TYPE_RS we can check IP6 hop limit */
+ if (IP6H_HOPLIM(ip6_hdr) != 255) {
+ ICMP6_STATS_INC(icmp6.proterr);
+ goto drop;
+ }
+
+ /* future, backward-incompatible changes may use different Code values. */
+ if (icmp6_hdr->code != 0) {
+ ICMP6_STATS_INC(icmp6.proterr);
+ goto drop;
+ }
+
+ /* skip past rs_header, nothing interesting in it */
+ if (p->len < sizeof(struct rs_header)) {
+ ICMP6_STATS_INC(icmp6.lenerr);
+ goto drop;
+ }
+ pbuf_header(p, -(s16_t)sizeof(struct rs_header));
+
+ lladdr_opt = NULL;
+ while (p->len > 0) {
+ int optlen;
+
+ if (p->len < 8) {
+ ICMP6_STATS_INC(icmp6.lenerr);
+ goto drop;
+ }
+
+ option = p->payload;
+ opttype = ((u8_t *)option)[0];
+ optlen8 = ((u8_t *)option)[1]; /* in units of 8 octets */
+
+ if (optlen8 == 0) {
+ ICMP6_STATS_INC(icmp6.proterr);
+ goto drop;
+ }
+
+ optlen = (unsigned int)optlen8 << 3;
+ if (p->len < optlen) {
+ ICMP6_STATS_INC(icmp6.lenerr);
+ goto drop;
+ }
+
+ if (opttype == ND6_OPTION_TYPE_SOURCE_LLADDR) {
+ if (lladdr_opt != NULL) { /* duplicate */
+ ICMP6_STATS_INC(icmp6.proterr);
+ goto drop;
+ }
+ lladdr_opt = (struct lladdr_option *)option;
+ }
+
+ pbuf_header(p, -optlen);
+ }
+
+ if (ip6_addr_isany(ip6_current_src_addr())) {
+ if (lladdr_opt != NULL) {
+ ICMP6_STATS_INC(icmp6.proterr);
+ goto drop;
+ }
+
+ /* reply with multicast RA */
+ }
+ else {
+ /*
+ * XXX: Router is supposed to update its Neighbor Cache (6.2.6),
+ * but it's hidden inside nd6.c.
+ */
+
+ /* may reply with either unicast or multicast RA */
+ }
+ /* we just always reply with multicast RA */
+
+ pbuf_free(p); /* NB: this invalidates lladdr_option */
+
+ sys_untimeout(proxy_rtadvd_timer, proxy_netif);
+ proxy_rtadvd_timer(proxy_netif); /* sends and re-arms */
+
+ return RAW_RECV_CONSUMED;
+
+ drop:
+ pbuf_free(p);
+ ICMP6_STATS_INC(icmp6.drop);
+ return RAW_RECV_CONSUMED;
+}
+
+
+static void
+proxy_rtadvd_send_multicast(struct netif *proxy_netif)
+{
+ struct pbuf *ph, *pp;
+ err_t error;
+
+ ph = pbuf_alloc(PBUF_IP, 0, PBUF_RAM);
+ if (ph == NULL) {
+ DPRINTF0(("%s: failed to allocate RA header pbuf\n", __func__));
+ return;
+ }
+
+ pp = pbuf_alloc(PBUF_RAW, unsolicited_ra_payload_length, PBUF_ROM);
+ if (pp == NULL) {
+ DPRINTF0(("%s: failed to allocate RA payload pbuf\n", __func__));
+ pbuf_free(ph);
+ return;
+ }
+ pp->payload = unsolicited_ra_payload;
+ pbuf_chain(ph, pp);
+
+ error = ip6_output_if(ph,
+ netif_ip6_addr(proxy_netif, 0), /* src: link-local */
+ &allnodes_linklocal, /* dst */
+ 255, /* hop limit */
+ 0, /* traffic class */
+ IP6_NEXTH_ICMP6,
+ proxy_netif);
+ if (error != ERR_OK) {
+ DPRINTF0(("%s: failed to send RA (err=%d)\n", __func__, error));
+ }
+
+ pbuf_free(pp);
+ pbuf_free(ph);
+}
+
+
+/*
+ * XXX: TODO: Only ra_header::router_lifetime (and hence
+ * ra_header::chksum) need to be changed, so we can precompute it once
+ * and then only update these two fields.
+ */
+static void
+proxy_rtadvd_fill_payload(struct netif *proxy_netif, int is_default)
+{
+ struct pbuf *p;
+ struct ra_header *ra_hdr;
+ struct lladdr_option *lladdr_opt;
+ struct prefix_option *pfx_opt;
+ unsigned int lladdr_optlen;
+
+ LWIP_ASSERT("netif hwaddr too long",
+ proxy_netif->hwaddr_len <= NETIF_MAX_HWADDR_LEN);
+
+ /* type + length + ll addr + round up to 8 octets */
+ lladdr_optlen = (2 + proxy_netif->hwaddr_len + 7) & ~0x7;
+
+ /* actual payload length */
+ unsolicited_ra_payload_length =
+ sizeof(struct ra_header)
+ + lladdr_optlen
+ + sizeof(struct prefix_option) * 1;
+
+ /* Set fields. */
+ ra_hdr = (struct ra_header *)unsolicited_ra_payload;
+ lladdr_opt = (struct lladdr_option *)((u8_t *)ra_hdr + sizeof(struct ra_header));
+ pfx_opt = (struct prefix_option *)((u8_t *)lladdr_opt + lladdr_optlen);
+
+ memset(unsolicited_ra_payload, 0, sizeof(unsolicited_ra_payload));
+
+ ra_hdr->type = ICMP6_TYPE_RA;
+
+#if 0
+ /*
+ * "M" flag. Tell guests to use stateful DHCP6. Disabled here
+ * since we don't provide stateful server.
+ */
+ ra_hdr->flags |= ND6_RA_FLAG_MANAGED_ADDR_CONFIG;
+#endif
+ /*
+ * XXX: TODO: Disable "O" flag for now to match disabled stateless
+ * server. We don't yet get IPv6 nameserver addresses from
+ * HostDnsService, so we have nothing to say, don't tell guests to
+ * come asking.
+ */
+#if 0
+ /*
+ * "O" flag. Tell guests to use DHCP6 for DNS and the like. This
+ * is served by simple stateless server (RFC 3736).
+ *
+ * XXX: "STATEFUL" in the flag name was probably a bug in RFC2461.
+ * It's present in the text, but not in the router configuration
+ * variable name. It's dropped in the text in RFC4861.
+ */
+ ra_hdr->flags |= ND6_RA_FLAG_OTHER_STATEFUL_CONFIG;
+#endif
+
+ if (is_default) {
+ ra_hdr->router_lifetime = PP_HTONS(1200); /* seconds */
+ }
+ else {
+ ra_hdr->router_lifetime = 0;
+ }
+
+ lladdr_opt->type = ND6_OPTION_TYPE_SOURCE_LLADDR;
+ lladdr_opt->length = lladdr_optlen >> 3; /* in units of 8 octets */
+ memcpy(lladdr_opt->addr, proxy_netif->hwaddr, proxy_netif->hwaddr_len);
+
+ pfx_opt->type = ND6_OPTION_TYPE_PREFIX_INFO;
+ pfx_opt->length = 4;
+ pfx_opt->prefix_length = 64;
+ pfx_opt->flags = ND6_PREFIX_FLAG_ON_LINK
+ | ND6_PREFIX_FLAG_AUTONOMOUS;
+ pfx_opt->valid_lifetime = ~0U; /* infinite */
+ pfx_opt->preferred_lifetime = ~0U; /* infinite */
+ pfx_opt->prefix.addr[0] = netif_ip6_addr(proxy_netif, 1)->addr[0];
+ pfx_opt->prefix.addr[1] = netif_ip6_addr(proxy_netif, 1)->addr[1];
+
+
+ /* we need a temp pbuf to calculate the checksum */
+ p = pbuf_alloc(PBUF_IP, unsolicited_ra_payload_length, PBUF_ROM);
+ if (p == NULL) {
+ DPRINTF0(("rtadvd: failed to allocate RA pbuf\n"));
+ return;
+ }
+ p->payload = unsolicited_ra_payload;
+
+ ra_hdr->chksum = ip6_chksum_pseudo(p, IP6_NEXTH_ICMP6, p->len,
+ /* src addr: netif's link-local */
+ netif_ip6_addr(proxy_netif, 0),
+ /* dst addr */
+ &allnodes_linklocal);
+ pbuf_free(p);
+}
diff --git a/src/VBox/NetworkServices/NAT/proxy_tftpd.c b/src/VBox/NetworkServices/NAT/proxy_tftpd.c
new file mode 100644
index 00000000..92ade07e
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/proxy_tftpd.c
@@ -0,0 +1,981 @@
+/* $Id: proxy_tftpd.c $ */
+/** @file
+ * NAT Network - TFTP server.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+
+#include "proxy.h"
+#include "tftp.h"
+
+#ifndef RT_OS_WINDOWS
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#else
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <io.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+# define O_RDONLY _O_RDONLY
+# define S_ISREG(x) ((x) & _S_IFREG)
+#endif
+
+#include "lwip/timers.h"
+#include "lwip/udp.h"
+
+#include <iprt/string.h>
+
+struct xfer {
+ struct udp_pcb *pcb;
+ int fd;
+ unsigned int ack;
+ struct pbuf *pbuf;
+
+ struct pbuf *oack;
+
+ int rexmit;
+
+ ipX_addr_t peer_ip;
+ u16_t peer_port;
+
+ char *filename;
+ int octet;
+
+ /* options */
+ unsigned int blksize;
+ int blksize_from_opt;
+
+ unsigned int timeout;
+ int timeout_from_opt;
+
+ off_t tsize;
+ int tsize_from_opt;
+};
+
+struct tftpd {
+ struct udp_pcb *pcb;
+ char *root;
+
+#define TFTP_MAX_XFERS 3
+ struct xfer xfers[TFTP_MAX_XFERS];
+};
+
+struct tftp_option {
+ const char *name;
+ int (*getopt)(struct xfer *, const char *);
+ int (*ackopt)(struct xfer *, char **, size_t *);
+};
+
+
+static void tftpd_recv(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t);
+
+static void tftpd_rrq(struct pbuf *, ip_addr_t *, u16_t);
+
+static void tftp_xfer_recv(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t);
+
+static void tftp_recv_ack(struct xfer *, u16_t);
+static void tftp_fillbuf(struct xfer *);
+static void tftp_send(struct xfer *);
+static void tftp_timeout(void *);
+
+static struct xfer *tftp_xfer_alloc(ip_addr_t *, u16_t);
+static int tftp_xfer_create_pcb(struct xfer *);
+static void tftp_xfer_free(struct xfer *);
+
+static int tftp_parse_filename(struct xfer *, char **, size_t *);
+static int tftp_parse_mode(struct xfer *, char **, size_t *);
+static int tftp_parse_option(struct xfer *, char **, size_t *);
+
+static int tftp_opt_blksize(struct xfer *, const char *);
+static int tftp_opt_timeout(struct xfer *, const char *);
+static int tftp_opt_tsize(struct xfer *, const char *);
+
+static char *tftp_getstr(struct xfer *, const char *, char **, size_t *);
+
+static int tftp_ack_blksize(struct xfer *, char **, size_t *);
+static int tftp_ack_timeout(struct xfer *, char **, size_t *);
+static int tftp_ack_tsize(struct xfer *, char **, size_t *);
+
+static int tftp_add_oack(char **, size_t *, const char *, const char *, ...) __attribute__((format(printf, 4, 5)));
+
+static ssize_t tftp_strnlen(char *, size_t);
+
+static int tftp_internal_error(struct xfer *);
+static int tftp_error(struct xfer *, u16_t, const char *, ...) __attribute__((format(printf, 3, 4)));
+static void tftpd_error(ip_addr_t *, u16_t, u16_t, const char *, ...) __attribute__((format(printf, 4, 5)));
+static struct pbuf *tftp_verror(u16_t, const char *, va_list);
+
+
+/* const */ int report_transient_errors = 1;
+static struct tftpd tftpd;
+
+static struct tftp_option tftp_options[] = {
+ { "blksize", tftp_opt_blksize, tftp_ack_blksize }, /* RFC 2348 */
+ { "timeout", tftp_opt_timeout, tftp_ack_timeout }, /* RFC 2349 */
+ { "tsize", tftp_opt_tsize, tftp_ack_tsize }, /* RFC 2349 */
+ { NULL, NULL, NULL }
+};
+
+
+err_t
+tftpd_init(struct netif *proxy_netif, const char *tftproot)
+{
+ size_t len;
+ err_t error;
+
+ tftpd.root = strdup(tftproot);
+ if (tftpd.root == NULL) {
+ DPRINTF0(("%s: failed to allocate tftpd.root\n", __func__));
+ return ERR_MEM;
+ }
+
+ len = strlen(tftproot);
+ if (tftpd.root[len - 1] == '/') {
+ tftpd.root[len - 1] = '\0';
+ }
+
+ tftpd.pcb = udp_new();
+ if (tftpd.pcb == NULL) {
+ DPRINTF0(("%s: failed to allocate PCB\n", __func__));
+ return ERR_MEM;
+ }
+
+ udp_recv(tftpd.pcb, tftpd_recv, NULL);
+
+ error = udp_bind(tftpd.pcb, &proxy_netif->ip_addr, TFTP_SERVER_PORT);
+ if (error != ERR_OK) {
+ DPRINTF0(("%s: failed to bind PCB\n", __func__));
+ return error;
+ }
+
+ return ERR_OK;
+}
+
+
+static void
+tftpd_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p,
+ ip_addr_t *addr, u16_t port)
+{
+ u16_t op;
+
+ LWIP_ASSERT1(pcb == tftpd.pcb);
+
+ LWIP_UNUSED_ARG(pcb); /* only in assert */
+ LWIP_UNUSED_ARG(arg);
+
+ if (pbuf_clen(p) > 1) { /* this code assumes contiguous aligned payload */
+ pbuf_free(p);
+ return;
+ }
+
+ op = ntohs(*(u16_t *)p->payload);
+ switch (op) {
+ case TFTP_RRQ:
+ tftpd_rrq(p, addr, port);
+ break;
+
+ case TFTP_WRQ:
+ tftpd_error(addr, port, TFTP_EACCESS, "Permission denied");
+ break;
+
+ default:
+ tftpd_error(addr, port, TFTP_ENOSYS, "Bad opcode %d", op);
+ break;
+ }
+
+ pbuf_free(p);
+}
+
+
+/**
+ * Parse Read Request packet and start new transfer.
+ */
+static void
+tftpd_rrq(struct pbuf *p, ip_addr_t *addr, u16_t port)
+{
+ struct xfer *xfer;
+ char *s;
+ size_t len;
+ int has_options;
+ int status;
+
+ xfer = tftp_xfer_alloc(addr, port);
+ if (xfer == NULL) {
+ return;
+ }
+
+ /* skip opcode */
+ s = (char *)p->payload + sizeof(u16_t);
+ len = p->len - sizeof(u16_t);
+
+
+ /*
+ * Parse RRQ:
+ * filename, mode, [opt1, value1, [...] ]
+ */
+ status = tftp_parse_filename(xfer, &s, &len);
+ if (status < 0) {
+ goto terminate;
+ }
+
+ status = tftp_parse_mode(xfer, &s, &len);
+ if (status < 0) {
+ goto terminate;
+ }
+
+ has_options = 0;
+ while (len > 0) {
+ status = tftp_parse_option(xfer, &s, &len);
+ if (status < 0) {
+ goto terminate;
+ }
+ has_options += status;
+ }
+
+
+ /*
+ * Create OACK packet if necessary.
+ */
+ if (has_options) {
+ xfer->oack = pbuf_alloc(PBUF_RAW, 128, PBUF_RAM);
+ if (xfer->oack != NULL) {
+ struct tftp_option *o;
+
+ ((u16_t *)xfer->oack->payload)[0] = PP_HTONS(TFTP_OACK);
+
+ s = (char *)xfer->oack->payload + sizeof(u16_t);
+ len = xfer->oack->len - sizeof(u16_t);
+
+ for (o = &tftp_options[0]; o->name != NULL; ++o) {
+ status = (*o->ackopt)(xfer, &s, &len);
+ if (status < 0) {
+ pbuf_free(xfer->oack);
+ xfer->oack = NULL;
+ break;
+ }
+ }
+
+ if (xfer->oack != NULL) {
+ Assert((u16_t)(xfer->oack->len - len) == xfer->oack->len - len);
+ pbuf_realloc(xfer->oack, (u16_t)(xfer->oack->len - len));
+ }
+ }
+ }
+
+
+ /*
+ * Create static pbuf that will be used for all data packets.
+ */
+ xfer->pbuf = pbuf_alloc(PBUF_RAW, xfer->blksize + 4, PBUF_RAM);
+ if (xfer->pbuf == NULL) {
+ tftp_internal_error(xfer);
+ goto terminate;
+ }
+ ((u16_t *)xfer->pbuf->payload)[0] = PP_HTONS(TFTP_DATA);
+
+
+ /*
+ * Finally, create PCB. Before this point any error was reported
+ * from the server port (see tftp_error() for the reason).
+ */
+ status = tftp_xfer_create_pcb(xfer);
+ if (status < 0) {
+ goto terminate;
+ }
+
+ if (xfer->oack) {
+ tftp_send(xfer);
+ }
+ else {
+ /* trigger send of the first data packet */
+ tftp_recv_ack(xfer, 0);
+ }
+
+ return;
+
+ terminate:
+ DPRINTF(("%s: terminated", __func__));
+ tftp_xfer_free(xfer);
+}
+
+
+static void
+tftp_xfer_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p,
+ ip_addr_t *addr, u16_t port)
+{
+ struct xfer *xfer = (struct xfer *)arg;
+ u16_t op;
+
+ LWIP_UNUSED_ARG(pcb); /* assert only */
+ LWIP_UNUSED_ARG(addr);
+ LWIP_UNUSED_ARG(port);
+
+ LWIP_ASSERT1(xfer->pcb == pcb);
+
+ if (p->len < 2) {
+ tftp_error(xfer, TFTP_ENOSYS, "Short packet");
+ tftp_xfer_free(xfer);
+ pbuf_free(p);
+ return;
+ }
+
+ op = ntohs(*(u16_t *)p->payload);
+ if (op == TFTP_ACK) {
+ u16_t ack;
+
+ if (p->len < 4) {
+ tftp_error(xfer, TFTP_ENOSYS, "Short packet");
+ tftp_xfer_free(xfer);
+ pbuf_free(p);
+ return;
+ }
+
+ ack = ntohs(((u16_t *)p->payload)[1]);
+ tftp_recv_ack(xfer, ack);
+ }
+ else if (op == TFTP_ERROR) {
+ tftp_xfer_free(xfer);
+ }
+ else {
+ tftp_error(xfer, TFTP_ENOSYS, "Unexpected opcode %d", op);
+ tftp_xfer_free(xfer);
+ }
+
+ pbuf_free(p);
+}
+
+
+static void
+tftp_recv_ack(struct xfer *xfer, u16_t ack)
+{
+ if (ack != (u16_t)xfer->ack) {
+ DPRINTF2(("%s: expect %u (%u), got %u\n",
+ __func__, (u16_t)xfer->ack, xfer->ack, ack));
+ return;
+ }
+
+ sys_untimeout(tftp_timeout, xfer);
+ xfer->rexmit = 0;
+
+ if (xfer->pbuf->len < xfer->blksize) {
+ DPRINTF(("%s: got final ack %u (%u)\n",
+ __func__, (u16_t)xfer->ack, xfer->ack));
+ tftp_xfer_free(xfer);
+ return;
+ }
+
+ if (xfer->oack != NULL) {
+ pbuf_free(xfer->oack);
+ xfer->oack = NULL;
+ }
+
+ ++xfer->ack;
+ tftp_fillbuf(xfer);
+ tftp_send(xfer);
+}
+
+
+static void
+tftp_send(struct xfer *xfer)
+{
+ struct pbuf *pbuf;
+
+ pbuf = xfer->oack ? xfer->oack : xfer->pbuf;
+ udp_send(xfer->pcb, pbuf);
+ sys_timeout(xfer->timeout * 1000, tftp_timeout, xfer);
+}
+
+
+static void
+tftp_timeout(void *arg)
+{
+ struct xfer *xfer = (struct xfer *)arg;
+ int maxrexmit;
+
+ maxrexmit = xfer->timeout < 60 ? 5 : 3;
+ if (++xfer->rexmit < maxrexmit) {
+ tftp_send(xfer);
+ }
+ else {
+ tftp_xfer_free(xfer);
+ }
+}
+
+
+static void
+tftp_fillbuf(struct xfer *xfer)
+{
+ ssize_t nread;
+
+ DPRINTF2(("%s: reading block %u\n", __func__, xfer->ack));
+
+ ((u16_t *)xfer->pbuf->payload)[1] = htons(xfer->ack);
+ nread = read(xfer->fd, (char *)xfer->pbuf->payload + 4, xfer->blksize);
+
+ if (nread < 0) {
+ tftp_error(xfer, TFTP_EUNDEF, "Read failed");
+ return;
+ }
+
+ pbuf_realloc(xfer->pbuf, nread + 4);
+}
+
+
+/**
+ * Find a free transfer slot (without a pcb). Record peer's IP
+ * address and port, but don't allocate a pcb yet.
+ *
+ * We delay creation of the pcb in response to the original request
+ * until the request is verified and accepted. This makes using
+ * tcpdump(8) easier, since tcpdump does not track TFTP transfers, so
+ * an error reply from a new pcb is not recognized as such and is not
+ * decoded as TFTP (see tftp_error()).
+ *
+ * If the request is rejected, the pcb remains NULL and the transfer
+ * slot remains unallocated. Since all TFTP processing happens on the
+ * lwIP thread, there's no concurrent processing, so we don't need to
+ * "lock" the transfer slot until the pcb is allocated.
+ */
+static struct xfer *
+tftp_xfer_alloc(ip_addr_t *addr, u16_t port)
+{
+ struct xfer *xfer;
+ int i;
+
+ /* Find free xfer slot */
+ xfer = NULL;
+ for (i = 0; i < TFTP_MAX_XFERS; ++i) {
+ if (tftpd.xfers[i].pcb == NULL) {
+ xfer = &tftpd.xfers[i];
+ break;
+ }
+ }
+
+ if (xfer == NULL) {
+ if (report_transient_errors) {
+ tftpd_error(addr, port, TFTP_EUNDEF,
+ "Maximum number of simultaneous connections exceeded");
+ }
+ return NULL;
+ }
+
+ ipX_addr_copy(0, xfer->peer_ip, *ip_2_ipX(addr));
+ xfer->peer_port = port;
+
+ xfer->ack = 0;
+
+ xfer->pbuf = NULL;
+ xfer->oack = NULL;
+ xfer->rexmit = 0;
+
+ xfer->blksize = 512;
+ xfer->blksize_from_opt = 0;
+
+ xfer->timeout = 1;
+ xfer->timeout_from_opt = 0;
+
+ xfer->tsize = -1;
+ xfer->tsize_from_opt = 0;
+
+ return xfer;
+}
+
+
+static int
+tftp_xfer_create_pcb(struct xfer *xfer)
+{
+ struct udp_pcb *pcb;
+ err_t error;
+
+ pcb = udp_new();
+
+ /* Bind */
+ if (pcb != NULL) {
+ error = udp_bind(pcb, ipX_2_ip(&tftpd.pcb->local_ip), 0);
+ if (error != ERR_OK) {
+ udp_remove(pcb);
+ pcb = NULL;
+ }
+ }
+
+ /* Connect */
+ if (pcb != NULL) {
+ error = udp_connect(pcb, ipX_2_ip(&xfer->peer_ip), xfer->peer_port);
+ if (error != ERR_OK) {
+ udp_remove(pcb);
+ pcb = NULL;
+ }
+ }
+
+ if (pcb == NULL) {
+ if (report_transient_errors) {
+ tftp_error(xfer, TFTP_EUNDEF, "Failed to create connection");
+ }
+ return -1;
+ }
+
+ xfer->pcb = pcb;
+ udp_recv(xfer->pcb, tftp_xfer_recv, xfer);
+
+ return 0;
+}
+
+
+static void
+tftp_xfer_free(struct xfer *xfer)
+{
+ sys_untimeout(tftp_timeout, xfer);
+
+ if (xfer->pcb != NULL) {
+ udp_remove(xfer->pcb);
+ xfer->pcb = NULL;
+ }
+
+ if (xfer->fd > 0) {
+ close(xfer->fd);
+ xfer->fd = -1;
+ }
+
+ if (xfer->oack != NULL) {
+ pbuf_free(xfer->oack);
+ xfer->oack = NULL;
+ }
+
+ if (xfer->pbuf != NULL) {
+ pbuf_free(xfer->pbuf);
+ xfer->pbuf = NULL;
+ }
+
+ if (xfer->filename != NULL) {
+ free(xfer->filename);
+ xfer->filename = NULL;
+ }
+}
+
+
+static int
+tftp_parse_filename(struct xfer *xfer, char **ps, size_t *plen)
+{
+ const char *filename;
+ struct stat st;
+ char *pathname;
+ char *s;
+ size_t len;
+ int status;
+
+ filename = tftp_getstr(xfer, "filename", ps, plen);
+ if (filename == NULL) {
+ return -1;
+ }
+
+ DPRINTF(("%s: requested file name: %s\n", __func__, filename));
+ xfer->filename = strdup(filename);
+ if (xfer->filename == NULL) {
+ return tftp_internal_error(xfer);
+ }
+
+ /* replace backslashes with forward slashes */
+ s = xfer->filename;
+ while ((s = strchr(s, '\\')) != NULL) {
+ *s++ = '/';
+ }
+
+ /* deny attempts to break out of tftp dir */
+ if (strncmp(xfer->filename, "../", 3) == 0
+ || strstr(xfer->filename, "/../") != NULL)
+ {
+ return tftp_error(xfer, TFTP_ENOENT, "Permission denied");
+ }
+
+ len = strlen(tftpd.root) + 1 /*slash*/ + strlen(xfer->filename) + 1 /*nul*/;
+ pathname = (char *)malloc(len);
+ if (pathname == NULL) {
+ return tftp_internal_error(xfer);
+ }
+
+ RTStrPrintf(pathname, len, "%s/%s", tftpd.root, xfer->filename);
+/** @todo fix RTStrPrintf because this does not currently work:
+ * status = RTStrPrintf(pathname, len, "%s/%s", tftpd.root, xfer->filename);
+ * if (status < 0) {
+ * return tftp_internal_error(xfer);
+ * }
+ */
+
+ DPRINTF(("%s: full pathname: %s\n", __func__, pathname));
+ xfer->fd = open(pathname, O_RDONLY);
+ if (xfer->fd < 0) {
+ if (errno == EPERM) {
+ return tftp_error(xfer, TFTP_ENOENT, "Permission denied");
+ }
+ else {
+ return tftp_error(xfer, TFTP_ENOENT, "File not found");
+ }
+ }
+
+ status = fstat(xfer->fd, &st);
+ if (status < 0) {
+ return tftp_internal_error(xfer);
+ }
+
+ if (!S_ISREG(st.st_mode)) {
+ return tftp_error(xfer, TFTP_ENOENT, "File not found");
+ }
+
+ xfer->tsize = st.st_size;
+ return 0;
+}
+
+
+static int
+tftp_parse_mode(struct xfer *xfer, char **ps, size_t *plen)
+{
+ const char *modename;
+
+ modename = tftp_getstr(xfer, "mode", ps, plen);
+ if (modename == NULL) {
+ return -1;
+ }
+
+ if (RTStrICmp(modename, "octet") == 0) {
+ xfer->octet = 1;
+ }
+ else if (RTStrICmp(modename, "netascii") == 0) {
+ xfer->octet = 0;
+ /* XXX: not (yet?) */
+ return tftp_error(xfer, TFTP_ENOSYS, "Mode \"netascii\" not supported");
+ }
+ else if (RTStrICmp(modename, "mail") == 0) {
+ return tftp_error(xfer, TFTP_ENOSYS, "Mode \"mail\" not supported");
+ }
+ else {
+ return tftp_error(xfer, TFTP_ENOSYS, "Unknown mode \"%s\"", modename);
+ }
+
+ return 0;
+}
+
+
+static int
+tftp_parse_option(struct xfer *xfer, char **ps, size_t *plen)
+{
+ const char *opt;
+ const char *val;
+ struct tftp_option *o;
+
+ opt = tftp_getstr(xfer, "option name", ps, plen);
+ if (opt == NULL) {
+ return -1;
+ }
+
+ if (*plen == 0) {
+ return tftp_error(xfer, TFTP_EUNDEF, "Missing option value");
+ }
+
+ val = tftp_getstr(xfer, "option value", ps, plen);
+ if (val == NULL) {
+ return -1;
+ }
+
+ /* handle option if known, ignore otherwise */
+ for (o = &tftp_options[0]; o->name != NULL; ++o) {
+ if (RTStrICmp(o->name, opt) == 0) {
+ return (*o->getopt)(xfer, val);
+ }
+ }
+
+ return 0; /* unknown option */
+}
+
+
+static int
+tftp_opt_blksize(struct xfer *xfer, const char *optval)
+{
+ char *end;
+ long blksize;
+
+ errno = 0;
+ blksize = strtol(optval, &end, 10);
+ if (errno != 0 || *end != '\0') {
+ return 0;
+ }
+
+ if (blksize < 8) {
+ return 0;
+ }
+
+ if (blksize > 1428) { /* exceeds ethernet mtu */
+ blksize = 1428;
+ }
+
+ xfer->blksize = blksize;
+ xfer->blksize_from_opt = 1;
+ return 1;
+}
+
+
+static int
+tftp_opt_timeout(struct xfer *xfer, const char *optval)
+{
+ LWIP_UNUSED_ARG(xfer);
+ LWIP_UNUSED_ARG(optval);
+ return 0;
+}
+
+
+static int
+tftp_opt_tsize(struct xfer *xfer, const char *optval)
+{
+ LWIP_UNUSED_ARG(optval); /* must be "0", but we don't check it */
+
+ if (xfer->tsize < 0) {
+ return 0;
+ }
+
+ xfer->tsize_from_opt = 1;
+ return 1;
+}
+
+
+static char *
+tftp_getstr(struct xfer *xfer, const char *msg, char **ps, size_t *plen)
+{
+ char *s;
+ ssize_t slen;
+
+ s = *ps;
+ slen = tftp_strnlen(s, *plen);
+ if (slen < 0) {
+ tftp_error(xfer, TFTP_EUNDEF, "Unterminated %s", msg);
+ return NULL;
+ }
+
+ *ps += slen + 1;
+ *plen -= slen + 1;
+
+ return s;
+}
+
+
+static int
+tftp_ack_blksize(struct xfer *xfer, char **ps, size_t *plen)
+{
+ if (!xfer->blksize_from_opt) {
+ return 0;
+ }
+
+ return tftp_add_oack(ps, plen, "blksize", "%u", xfer->blksize);
+}
+
+
+static int
+tftp_ack_timeout(struct xfer *xfer, char **ps, size_t *plen)
+{
+ if (!xfer->timeout_from_opt) {
+ return 0;
+ }
+
+ return tftp_add_oack(ps, plen, "timeout", "%u", xfer->timeout);
+}
+
+
+static int
+tftp_ack_tsize(struct xfer *xfer, char **ps, size_t *plen)
+{
+ if (!xfer->tsize_from_opt) {
+ return 0;
+ }
+
+ LWIP_ASSERT1(xfer->tsize >= 0);
+ return tftp_add_oack(ps, plen, "tsize",
+ /* XXX: FIXME: want 64 bit */
+ "%lu", (unsigned long)xfer->tsize);
+}
+
+
+static int
+tftp_add_oack(char **ps, size_t *plen,
+ const char *optname, const char *fmt, ...)
+{
+ va_list ap;
+ int sz;
+
+/** @todo Fix RTStrPrintf because this doesn't really work.
+ * sz = RTStrPrintf(*ps, *plen, "%s", optname);
+ * if (sz < 0 || (size_t)sz >= *plen) {
+ * return -1;
+ * } */
+ sz = (int)RTStrPrintf(*ps, *plen, "%s", optname);
+ if (/*sz < 0 ||*/ (size_t)sz >= *plen) {
+ return -1;
+ }
+
+ ++sz; /* for nul byte */
+ *ps += sz;
+ *plen -= sz;
+
+ va_start(ap, fmt);
+ sz = vsnprintf(*ps, *plen, fmt, ap);
+ va_end(ap);
+ if (sz < 0 || (size_t)sz >= *plen) {
+ return -1;
+ }
+
+ ++sz; /* for nul byte */
+ *ps += sz;
+ *plen -= sz;
+
+ return 0;
+}
+
+
+static ssize_t
+tftp_strnlen(char *buf, size_t bufsize)
+{
+ void *end;
+
+ end = memchr(buf, '\0', bufsize);
+ if (end == NULL) {
+ return -1;
+ }
+
+ return (char *)end - buf;
+}
+
+
+static int
+tftp_internal_error(struct xfer *xfer)
+{
+ if (report_transient_errors) {
+ tftp_error(xfer, TFTP_EUNDEF, "Internal error");
+ }
+ return -1;
+}
+
+
+/**
+ * Send an error packet to the peer.
+ *
+ * PCB may not be created yet in which case send the error packet from
+ * the TFTP server port (*).
+ *
+ * (*) We delay creation of the PCB in response to the original
+ * request until the request is verified and accepted. This makes
+ * using tcpdump(8) easier, since tcpdump does not track TFTP
+ * transfers, so an error reply from a new PCB is not recognized as
+ * such and is not decoded as TFTP.
+ *
+ * Always returns -1 for callers to reuse.
+ */
+static int
+tftp_error(struct xfer *xfer, u16_t error, const char *fmt, ...)
+{
+ va_list ap;
+ struct pbuf *q;
+
+ LWIP_ASSERT1(xfer != NULL);
+
+ va_start(ap, fmt);
+ q = tftp_verror(error, fmt, ap);
+ va_end(ap);
+
+ if (q == NULL) {
+ return -1;
+ }
+
+ if (xfer->pcb != NULL) {
+ udp_send(xfer->pcb, q);
+ }
+ else {
+ udp_sendto(tftpd.pcb, q, ipX_2_ip(&xfer->peer_ip), xfer->peer_port);
+ }
+
+ pbuf_free(q);
+ return -1;
+}
+
+
+/**
+ * Send an error packet from TFTP server port to the specified peer.
+ */
+static void
+tftpd_error(ip_addr_t *addr, u16_t port, u16_t error, const char *fmt, ...)
+{
+ va_list ap;
+ struct pbuf *q;
+
+ va_start(ap, fmt);
+ q = tftp_verror(error, fmt, ap);
+ va_end(ap);
+
+ if (q != NULL) {
+ udp_sendto(tftpd.pcb, q, addr, port);
+ pbuf_free(q);
+ }
+}
+
+
+/**
+ * Create ERROR pbuf with formatted error message.
+ */
+static struct pbuf *
+tftp_verror(u16_t error, const char *fmt, va_list ap)
+{
+ struct tftp_error {
+ u16_t opcode; /* TFTP_ERROR */
+ u16_t errcode;
+ char errmsg[512];
+ };
+
+ struct pbuf *p;
+ struct tftp_error *errpkt;
+ int msgsz;
+
+ p = pbuf_alloc(PBUF_TRANSPORT, sizeof(*errpkt), PBUF_RAM);
+ if (p == NULL) {
+ return NULL;
+ }
+
+ errpkt = (struct tftp_error *)p->payload;
+ errpkt->opcode = PP_HTONS(TFTP_ERROR);
+ errpkt->errcode = htons(error);
+
+ msgsz = vsnprintf(errpkt->errmsg, sizeof(errpkt->errmsg), fmt, ap);
+ if (msgsz < 0) {
+ errpkt->errmsg[0] = '\0';
+ msgsz = 1;
+ }
+ else if ((size_t)msgsz < sizeof(errpkt->errmsg)) {
+ ++msgsz; /* for nul byte */
+ }
+ else {
+ msgsz = sizeof(errpkt->errmsg); /* truncated, includes nul byte */
+ }
+
+ pbuf_realloc(p, sizeof(*errpkt) - sizeof(errpkt->errmsg) + msgsz);
+ return p;
+}
diff --git a/src/VBox/NetworkServices/NAT/pxdns.c b/src/VBox/NetworkServices/NAT/pxdns.c
new file mode 100644
index 00000000..28d018ce
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/pxdns.c
@@ -0,0 +1,932 @@
+/* $Id: pxdns.c $ */
+/** @file
+ * NAT Network - DNS proxy.
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+/*
+ * Copyright (c) 2003,2004,2005 Armin Wolfermann
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+
+#include "proxy.h"
+#include "proxy_pollmgr.h"
+#include "pxtcp.h"
+
+#include "lwip/sys.h"
+#include "lwip/tcpip.h"
+#include "lwip/ip_addr.h"
+#include "lwip/udp.h"
+#include "lwip/tcp.h"
+
+#ifndef RT_OS_WINDOWS
+#include <sys/poll.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#else
+#include "winpoll.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+
+union sockaddr_inet {
+ struct sockaddr sa;
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+};
+
+
+struct request;
+
+
+/**
+ * DNS Proxy
+ */
+struct pxdns {
+ SOCKET sock4;
+ SOCKET sock6;
+
+ struct pollmgr_handler pmhdl4;
+ struct pollmgr_handler pmhdl6;
+
+ struct udp_pcb *pcb4;
+ struct udp_pcb *pcb6;
+
+ struct tcp_pcb *ltcp;
+
+ size_t generation;
+ size_t nresolvers;
+ union sockaddr_inet *resolvers;
+
+ u16_t id;
+
+ sys_mutex_t lock;
+
+ size_t active_queries;
+ size_t expired_queries;
+ size_t late_answers;
+ size_t hash_collisions;
+
+#define TIMEOUT 5
+ size_t timeout_slot;
+ u32_t timeout_mask;
+ struct request *timeout_list[TIMEOUT];
+
+#define HASHSIZE 10
+#define HASH(id) ((id) & ((1 << HASHSIZE) - 1))
+ struct request *request_hash[1 << HASHSIZE];
+} g_pxdns;
+
+
+struct request {
+ /**
+ * Request ID that we use in relayed request.
+ */
+ u16_t id;
+
+ /**
+ * pxdns::generation used for this request
+ */
+ size_t generation;
+
+ /**
+ * Current index into pxdns::resolvers
+ */
+ size_t residx;
+
+ /**
+ * PCB from which we have received this request. lwIP doesn't
+ * support listening for both IPv4 and IPv6 on the same pcb, so we
+ * use two and need to keep track.
+ */
+ struct udp_pcb *pcb;
+
+ /**
+ * Client this request is from and its original request ID.
+ */
+ ipX_addr_t client_addr;
+ u16_t client_port;
+ u16_t client_id;
+
+ /**
+ * Chaining for pxdns::request_hash
+ */
+ struct request **pprev_hash;
+ struct request *next_hash;
+
+ /**
+ * Chaining for pxdns::timeout_list
+ */
+ struct request **pprev_timeout;
+ struct request *next_timeout;
+
+ /**
+ * Slot in pxdns::timeout_list
+ */
+ size_t timeout_slot;
+
+ /**
+ * Pbuf with reply received on pollmgr thread.
+ */
+ struct pbuf *reply;
+
+ /**
+ * Preallocated lwIP message to send reply from the lwIP thread.
+ */
+ struct tcpip_msg msg_reply;
+
+ /**
+ * Client request. ID is replaced with ours, original saved in
+ * client_id. Use a copy since we might need to resend and we
+ * don't want to hold onto pbuf of the request.
+ */
+ size_t size;
+ u8_t data[1];
+};
+
+
+static void pxdns_create_resolver_sockaddrs(struct pxdns *pxdns,
+ const char **nameservers);
+
+static err_t pxdns_accept_syn(void *arg, struct tcp_pcb *newpcb, struct pbuf *syn);
+
+static void pxdns_recv4(void *arg, struct udp_pcb *pcb, struct pbuf *p,
+ ip_addr_t *addr, u16_t port);
+static void pxdns_recv6(void *arg, struct udp_pcb *pcb, struct pbuf *p,
+ ip6_addr_t *addr, u16_t port);
+static void pxdns_query(struct pxdns *pxdns, struct udp_pcb *pcb, struct pbuf *p,
+ ipX_addr_t *addr, u16_t port);
+static void pxdns_timer(void *arg);
+static int pxdns_rexmit(struct pxdns *pxdns, struct request *req);
+static int pxdns_forward_outbound(struct pxdns *pxdns, struct request *req);
+
+static int pxdns_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents);
+static void pxdns_pcb_reply(void *ctx);
+
+static void pxdns_request_register(struct pxdns *pxdns, struct request *req);
+static void pxdns_request_deregister(struct pxdns *pxdns, struct request *req);
+static struct request *pxdns_request_find(struct pxdns *pxdns, u16_t id);
+
+static void pxdns_hash_add(struct pxdns *pxdns, struct request *req);
+static void pxdns_hash_del(struct pxdns *pxdns, struct request *req);
+static void pxdns_timeout_add(struct pxdns *pxdns, struct request *req);
+static void pxdns_timeout_del(struct pxdns *pxdns, struct request *req);
+
+static void pxdns_request_free(struct request *req);
+
+
+err_t
+pxdns_init(struct netif *proxy_netif)
+{
+ struct pxdns *pxdns = &g_pxdns;
+ err_t error;
+
+ LWIP_UNUSED_ARG(proxy_netif);
+
+ pxdns->ltcp = tcp_new();
+ if (pxdns->ltcp != NULL) {
+ tcp_bind_ip6(pxdns->ltcp, IP6_ADDR_ANY, 53);
+ pxdns->ltcp = tcp_listen_dual(pxdns->ltcp);
+ if (pxdns->ltcp != NULL) {
+ tcp_arg(pxdns->ltcp, pxdns);
+ tcp_accept_syn(pxdns->ltcp, pxdns_accept_syn);
+ }
+ }
+
+ pxdns->pmhdl4.callback = pxdns_pmgr_pump;
+ pxdns->pmhdl4.data = (void *)pxdns;
+ pxdns->pmhdl4.slot = -1;
+
+ pxdns->pmhdl6.callback = pxdns_pmgr_pump;
+ pxdns->pmhdl6.data = (void *)pxdns;
+ pxdns->pmhdl6.slot = -1;
+
+ pxdns->pcb4 = udp_new();
+ if (pxdns->pcb4 == NULL) {
+ error = ERR_MEM;
+ goto err_cleanup_pcb;
+ }
+
+ pxdns->pcb6 = udp_new_ip6();
+ if (pxdns->pcb6 == NULL) {
+ error = ERR_MEM;
+ goto err_cleanup_pcb;
+ }
+
+ error = udp_bind(pxdns->pcb4, IP_ADDR_ANY, 53);
+ if (error != ERR_OK) {
+ goto err_cleanup_pcb;
+ }
+
+ error = udp_bind_ip6(pxdns->pcb6, IP6_ADDR_ANY, 53);
+ if (error != ERR_OK) {
+ goto err_cleanup_pcb;
+ }
+
+ udp_recv(pxdns->pcb4, pxdns_recv4, pxdns);
+ udp_recv_ip6(pxdns->pcb6, pxdns_recv6, pxdns);
+
+ pxdns->sock4 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (pxdns->sock4 == INVALID_SOCKET) {
+ goto err_cleanup_pcb;
+ }
+
+ pxdns->sock6 = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (pxdns->sock6 == INVALID_SOCKET) {
+ /* it's ok if the host doesn't support IPv6 */
+ /* XXX: TODO: log */
+ }
+
+ pxdns->generation = 0;
+ pxdns->nresolvers = 0;
+ pxdns->resolvers = NULL;
+ pxdns_create_resolver_sockaddrs(pxdns, g_proxy_options->nameservers);
+
+ sys_mutex_new(&pxdns->lock);
+
+ pxdns->timeout_slot = 0;
+ pxdns->timeout_mask = 0;
+
+ /* NB: assumes pollmgr thread is not running yet */
+ pollmgr_add(&pxdns->pmhdl4, pxdns->sock4, POLLIN);
+ if (pxdns->sock6 != INVALID_SOCKET) {
+ pollmgr_add(&pxdns->pmhdl6, pxdns->sock6, POLLIN);
+ }
+
+ return ERR_OK;
+
+ err_cleanup_pcb:
+ if (pxdns->pcb4 != NULL) {
+ udp_remove(pxdns->pcb4);
+ pxdns->pcb4 = NULL;
+ }
+ if (pxdns->pcb6 != NULL) {
+ udp_remove(pxdns->pcb6);
+ pxdns->pcb4 = NULL;
+ }
+
+ return error;
+}
+
+
+/**
+ * lwIP thread callback to set the new list of nameservers.
+ */
+void
+pxdns_set_nameservers(void *arg)
+{
+ const char **nameservers = (const char **)arg;
+
+ if (g_proxy_options->nameservers != NULL) {
+ RTMemFree((void *)g_proxy_options->nameservers);
+ }
+ g_proxy_options->nameservers = nameservers;
+
+ pxdns_create_resolver_sockaddrs(&g_pxdns, nameservers);
+}
+
+
+/**
+ * Use this list of nameservers to resolve guest requests.
+ *
+ * Runs on lwIP thread, so no new queries or retramsmits compete with
+ * it for the use of the existing list of resolvers (to be replaced).
+ */
+static void
+pxdns_create_resolver_sockaddrs(struct pxdns *pxdns, const char **nameservers)
+{
+ struct addrinfo hints;
+ union sockaddr_inet *resolvers;
+ size_t nnames, nresolvers;
+ const char **p;
+ int status;
+
+ resolvers = NULL;
+ nresolvers = 0;
+
+ if (nameservers == NULL) {
+ goto update_resolvers;
+ }
+
+ nnames = 0;
+ for (p = nameservers; *p != NULL; ++p) {
+ ++nnames;
+ }
+
+ if (nnames == 0) {
+ goto update_resolvers;
+ }
+
+ resolvers = (union sockaddr_inet *)calloc(sizeof(resolvers[0]), nnames);
+ if (resolvers == NULL) {
+ nresolvers = 0;
+ goto update_resolvers;
+ }
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_DGRAM;
+ hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
+
+ for (p = nameservers; *p != NULL; ++p) {
+ const char *name = *p;
+ struct addrinfo *ai;
+ status = getaddrinfo(name, /* "domain" */ "53", &hints, &ai);
+ if (status != 0) {
+ /* XXX: log failed resolution */
+ continue;
+ }
+
+ if (ai->ai_family != AF_INET && ai->ai_family != AF_INET6) {
+ /* XXX: log unsupported address family */
+ freeaddrinfo(ai);
+ continue;
+ }
+
+ if (ai->ai_addrlen > sizeof(resolvers[nresolvers])) {
+ /* XXX: log */
+ freeaddrinfo(ai);
+ continue;
+ }
+
+ if (ai->ai_family == AF_INET6 && pxdns->sock6 == INVALID_SOCKET) {
+ /* no IPv6 support on the host, can't use this resolver */
+ freeaddrinfo(ai);
+ continue;
+ }
+
+ memcpy(&resolvers[nresolvers], ai->ai_addr, ai->ai_addrlen);
+ freeaddrinfo(ai);
+ ++nresolvers;
+ }
+
+ if (nresolvers == 0) {
+ if (resolvers != NULL) {
+ free(resolvers);
+ }
+ resolvers = NULL;
+ }
+
+ update_resolvers:
+ ++pxdns->generation;
+ if (pxdns->resolvers != NULL) {
+ free(pxdns->resolvers);
+ }
+ pxdns->resolvers = resolvers;
+ pxdns->nresolvers = nresolvers;
+}
+
+
+static void
+pxdns_request_free(struct request *req)
+{
+ LWIP_ASSERT1(req->pprev_hash == NULL);
+ LWIP_ASSERT1(req->pprev_timeout == NULL);
+
+ if (req->reply != NULL) {
+ pbuf_free(req->reply);
+ }
+ free(req);
+}
+
+
+static void
+pxdns_hash_add(struct pxdns *pxdns, struct request *req)
+{
+ struct request **chain;
+
+ LWIP_ASSERT1(req->pprev_hash == NULL);
+ chain = &pxdns->request_hash[HASH(req->id)];
+ if ((req->next_hash = *chain) != NULL) {
+ (*chain)->pprev_hash = &req->next_hash;
+ ++pxdns->hash_collisions;
+ }
+ *chain = req;
+ req->pprev_hash = chain;
+}
+
+
+static void
+pxdns_timeout_add(struct pxdns *pxdns, struct request *req)
+{
+ struct request **chain;
+ u32_t omask;
+
+ LWIP_ASSERT1(req->pprev_timeout == NULL);
+
+ req->timeout_slot = pxdns->timeout_slot;
+ chain = &pxdns->timeout_list[req->timeout_slot];
+ if ((req->next_timeout = *chain) != NULL) {
+ (*chain)->pprev_timeout = &req->next_timeout;
+ }
+ *chain = req;
+ req->pprev_timeout = chain;
+
+ omask = pxdns->timeout_mask;
+ pxdns->timeout_mask |= 1U << req->timeout_slot;
+ if (omask == 0) {
+ sys_timeout(1 * 1000, pxdns_timer, pxdns);
+ }
+}
+
+
+static void
+pxdns_hash_del(struct pxdns *pxdns, struct request *req)
+{
+ LWIP_ASSERT1(req->pprev_hash != NULL);
+ --pxdns->active_queries;
+
+ if (req->next_hash != NULL) {
+ req->next_hash->pprev_hash = req->pprev_hash;
+ }
+ *req->pprev_hash = req->next_hash;
+ req->pprev_hash = NULL;
+ req->next_hash = NULL;
+}
+
+
+static void
+pxdns_timeout_del(struct pxdns *pxdns, struct request *req)
+{
+ LWIP_ASSERT1(req->pprev_timeout != NULL);
+ LWIP_ASSERT1(req->timeout_slot < TIMEOUT);
+
+ if (req->next_timeout != NULL) {
+ req->next_timeout->pprev_timeout = req->pprev_timeout;
+ }
+ *req->pprev_timeout = req->next_timeout;
+ req->pprev_timeout = NULL;
+ req->next_timeout = NULL;
+
+ if (pxdns->timeout_list[req->timeout_slot] == NULL) {
+ pxdns->timeout_mask &= ~(1U << req->timeout_slot);
+ /* may be on pollmgr thread so no sys_untimeout */
+ }
+}
+
+
+
+/**
+ * Do bookkeeping on new request. Called from pxdns_query().
+ */
+static void
+pxdns_request_register(struct pxdns *pxdns, struct request *req)
+{
+ sys_mutex_lock(&pxdns->lock);
+
+ pxdns_hash_add(pxdns, req);
+ pxdns_timeout_add(pxdns, req);
+ ++pxdns->active_queries;
+
+ sys_mutex_unlock(&pxdns->lock);
+}
+
+
+static void
+pxdns_request_deregister(struct pxdns *pxdns, struct request *req)
+{
+ sys_mutex_lock(&pxdns->lock);
+
+ pxdns_hash_del(pxdns, req);
+ pxdns_timeout_del(pxdns, req);
+ --pxdns->active_queries;
+
+ sys_mutex_unlock(&pxdns->lock);
+}
+
+
+/**
+ * Find request by the id we used when relaying it and remove it from
+ * id hash and timeout list. Called from pxdns_pmgr_pump() when reply
+ * comes.
+ */
+static struct request *
+pxdns_request_find(struct pxdns *pxdns, u16_t id)
+{
+ struct request *req = NULL;
+
+ sys_mutex_lock(&pxdns->lock);
+
+ /* find request in the id->req hash */
+ for (req = pxdns->request_hash[HASH(id)]; req != NULL; req = req->next_hash) {
+ if (req->id == id) {
+ break;
+ }
+ }
+
+ if (req != NULL) {
+ pxdns_hash_del(pxdns, req);
+ pxdns_timeout_del(pxdns, req);
+ --pxdns->active_queries;
+ }
+
+ sys_mutex_unlock(&pxdns->lock);
+ return req;
+}
+
+
+/**
+ * Retransmit of g/c expired requests and move timeout slot forward.
+ */
+static void
+pxdns_timer(void *arg)
+{
+ struct pxdns *pxdns = (struct pxdns *)arg;
+ struct request **chain, *req;
+ u32_t mask;
+
+ sys_mutex_lock(&pxdns->lock);
+
+ /*
+ * Move timeout slot first. New slot points to the list of
+ * expired requests. If any expired request is retransmitted, we
+ * keep it on the list (that is now current), effectively
+ * resetting the timeout.
+ */
+ LWIP_ASSERT1(pxdns->timeout_slot < TIMEOUT);
+ if (++pxdns->timeout_slot == TIMEOUT) {
+ pxdns->timeout_slot = 0;
+ }
+
+ chain = &pxdns->timeout_list[pxdns->timeout_slot];
+ req = *chain;
+ while (req != NULL) {
+ struct request *expired = req;
+ req = req->next_timeout;
+
+ if (pxdns_rexmit(pxdns, expired)) {
+ continue;
+ }
+
+ pxdns_hash_del(pxdns, expired);
+ pxdns_timeout_del(pxdns, expired);
+ ++pxdns->expired_queries;
+
+ pxdns_request_free(expired);
+ }
+
+ if (pxdns->timeout_list[pxdns->timeout_slot] == NULL) {
+ pxdns->timeout_mask &= ~(1U << pxdns->timeout_slot);
+ }
+ else {
+ pxdns->timeout_mask |= 1U << pxdns->timeout_slot;
+ }
+ mask = pxdns->timeout_mask;
+
+ sys_mutex_unlock(&pxdns->lock);
+
+ if (mask != 0) {
+ sys_timeout(1 * 1000, pxdns_timer, pxdns);
+ }
+}
+
+
+static void
+pxdns_recv4(void *arg, struct udp_pcb *pcb, struct pbuf *p,
+ ip_addr_t *addr, u16_t port)
+{
+ struct pxdns *pxdns = (struct pxdns *)arg;
+ pxdns_query(pxdns, pcb, p, ip_2_ipX(addr), port);
+}
+
+static void
+pxdns_recv6(void *arg, struct udp_pcb *pcb, struct pbuf *p,
+ ip6_addr_t *addr, u16_t port)
+{
+ struct pxdns *pxdns = (struct pxdns *)arg;
+ pxdns_query(pxdns, pcb, p, ip6_2_ipX(addr), port);
+}
+
+
+static void
+pxdns_query(struct pxdns *pxdns, struct udp_pcb *pcb, struct pbuf *p,
+ ipX_addr_t *addr, u16_t port)
+{
+ struct request *req;
+ int sent;
+
+ if (pxdns->nresolvers == 0) {
+ /* nothing we can do */
+ pbuf_free(p);
+ return;
+ }
+
+ req = calloc(1, sizeof(struct request) - 1 + p->tot_len);
+ if (req == NULL) {
+ pbuf_free(p);
+ return;
+ }
+
+ /* copy request data */
+ req->size = p->tot_len;
+ pbuf_copy_partial(p, req->data, p->tot_len, 0);
+
+ /* save client identity and client's request id */
+ req->pcb = pcb;
+ ipX_addr_copy(PCB_ISIPV6(pcb), req->client_addr, *addr);
+ req->client_port = port;
+ memcpy(&req->client_id, req->data, sizeof(req->client_id));
+
+ /* slap our request id onto it */
+ req->id = pxdns->id++;
+ memcpy(req->data, &req->id, sizeof(u16_t));
+
+ /* resolver to forward to */
+ req->generation = pxdns->generation;
+ req->residx = 0;
+
+ /* prepare for relaying the reply back to guest */
+ req->msg_reply.type = TCPIP_MSG_CALLBACK_STATIC;
+ req->msg_reply.sem = NULL;
+ req->msg_reply.msg.cb.function = pxdns_pcb_reply;
+ req->msg_reply.msg.cb.ctx = (void *)req;
+
+ DPRINTF2(("%s: req=%p: client id %d -> id %d\n",
+ __func__, (void *)req, req->client_id, req->id));
+
+ pxdns_request_register(pxdns, req);
+
+ sent = pxdns_forward_outbound(pxdns, req);
+ if (!sent) {
+ sent = pxdns_rexmit(pxdns, req);
+ }
+ if (!sent) {
+ pxdns_request_deregister(pxdns, req);
+ pxdns_request_free(req);
+ }
+}
+
+
+/**
+ * Forward request to the req::residx resolver in the pxdns::resolvers
+ * array of upstream resolvers.
+ *
+ * Returns 1 on success, 0 on failure.
+ */
+static int
+pxdns_forward_outbound(struct pxdns *pxdns, struct request *req)
+{
+ union sockaddr_inet *resolver;
+ ssize_t nsent;
+#ifdef RT_OS_WINDOWS
+ const char *pSendData = (const char *)&req->data[0];
+ int cbSendData = (int)req->size;
+ Assert((size_t)cbSendData == req->size);
+#else
+ const void *pSendData = &req->data[0];
+ size_t cbSendData = req->size;
+#endif
+
+ DPRINTF2(("%s: req %p: sending to resolver #%lu\n",
+ __func__, (void *)req, (unsigned long)req->residx));
+
+ LWIP_ASSERT1(req->generation == pxdns->generation);
+ LWIP_ASSERT1(req->residx < pxdns->nresolvers);
+ resolver = &pxdns->resolvers[req->residx];
+
+ if (resolver->sa.sa_family == AF_INET) {
+ nsent = sendto(pxdns->sock4, pSendData, cbSendData, 0,
+ &resolver->sa, sizeof(resolver->sin));
+
+ }
+ else if (resolver->sa.sa_family == AF_INET6) {
+ if (pxdns->sock6 != INVALID_SOCKET) {
+ nsent = sendto(pxdns->sock6, pSendData, cbSendData, 0,
+ &resolver->sa, sizeof(resolver->sin6));
+ }
+ else {
+ /* shouldn't happen, we should have weeded out IPv6 resolvers */
+ return 0;
+ }
+ }
+ else {
+ /* shouldn't happen, we should have weeded out unsupported families */
+ return 0;
+ }
+
+ if ((size_t)nsent == req->size) {
+ return 1; /* sent */
+ }
+
+ if (nsent < 0) {
+ DPRINTF2(("%s: send: %R[sockerr]\n", __func__, SOCKERRNO()));
+ }
+ else {
+ DPRINTF2(("%s: sent only %lu of %lu\n",
+ __func__, (unsigned long)nsent, (unsigned long)req->size));
+ }
+ return 0; /* not sent, caller will retry as necessary */
+}
+
+
+/**
+ * Forward request to the next resolver in the pxdns::resolvers array
+ * of upstream resolvers if there are any left.
+ */
+static int
+pxdns_rexmit(struct pxdns *pxdns, struct request *req)
+{
+ int sent;
+
+ if (/* __predict_false */ req->generation != pxdns->generation) {
+ DPRINTF2(("%s: req %p: generation %lu != pxdns generation %lu\n",
+ __func__, (void *)req,
+ (unsigned long)req->generation,
+ (unsigned long)pxdns->generation));
+ return 0;
+ }
+
+ LWIP_ASSERT1(req->residx < pxdns->nresolvers);
+ do {
+ if (++req->residx == pxdns->nresolvers) {
+ return 0;
+ }
+
+ sent = pxdns_forward_outbound(pxdns, req);
+ } while (!sent);
+
+ return 1;
+}
+
+
+static int
+pxdns_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pxdns *pxdns;
+ struct request *req;
+ ssize_t nread;
+ err_t error;
+ u16_t id;
+
+ pxdns = (struct pxdns *)handler->data;
+ LWIP_ASSERT1(handler == &pxdns->pmhdl4 || handler == &pxdns->pmhdl6);
+ LWIP_ASSERT1(fd == (handler == &pxdns->pmhdl4 ? pxdns->sock4 : pxdns->sock6));
+
+ if (revents & ~(POLLIN|POLLERR)) {
+ DPRINTF0(("%s: unexpected revents 0x%x\n", __func__, revents));
+ return POLLIN;
+ }
+
+ if (revents & POLLERR) {
+ int sockerr = -1;
+ socklen_t optlen = (socklen_t)sizeof(sockerr);
+ int status;
+
+ status = getsockopt(fd, SOL_SOCKET,
+ SO_ERROR, (char *)&sockerr, &optlen);
+ if (status < 0) {
+ DPRINTF(("%s: sock %d: SO_ERROR failed: %R[sockerr]\n",
+ __func__, fd, SOCKERRNO()));
+ }
+ else {
+ DPRINTF(("%s: sock %d: %R[sockerr]\n",
+ __func__, fd, sockerr));
+ }
+ }
+
+ if ((revents & POLLIN) == 0) {
+ return POLLIN;
+ }
+
+
+#ifdef RT_OS_WINDOWS
+ nread = recv(fd, (char *)pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0);
+#else
+ nread = recv(fd, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0);
+#endif
+ if (nread < 0) {
+ DPRINTF(("%s: %R[sockerr]\n", __func__, SOCKERRNO()));
+ return POLLIN;
+ }
+
+ /* check for minimum dns packet length */
+ if (nread < 12) {
+ DPRINTF2(("%s: short reply %lu bytes\n",
+ __func__, (unsigned long)nread));
+ return POLLIN;
+ }
+
+ /* XXX: shall we proxy back RCODE=Refused responses? */
+
+ memcpy(&id, pollmgr_udpbuf, sizeof(id));
+ req = pxdns_request_find(pxdns, id);
+ if (req == NULL) {
+ DPRINTF2(("%s: orphaned reply for %d\n", __func__, id));
+ ++pxdns->late_answers;
+ return POLLIN;
+ }
+
+ DPRINTF2(("%s: reply for req=%p: id %d -> client id %d\n",
+ __func__, (void *)req, req->id, req->client_id));
+
+ req->reply = pbuf_alloc(PBUF_RAW, nread, PBUF_RAM);
+ if (req->reply == NULL) {
+ DPRINTF(("%s: pbuf_alloc(%d) failed\n", __func__, (int)nread));
+ pxdns_request_free(req);
+ return POLLIN;
+ }
+
+ memcpy(pollmgr_udpbuf, &req->client_id, sizeof(req->client_id));
+ error = pbuf_take(req->reply, pollmgr_udpbuf, nread);
+ if (error != ERR_OK) {
+ DPRINTF(("%s: pbuf_take(%d) failed\n", __func__, (int)nread));
+ pxdns_request_free(req);
+ return POLLIN;
+ }
+
+ proxy_lwip_post(&req->msg_reply);
+ return POLLIN;
+}
+
+
+/**
+ * Called on lwIP thread via request::msg_reply callback.
+ */
+static void
+pxdns_pcb_reply(void *ctx)
+{
+ struct request *req = (struct request *)ctx;
+ err_t error;
+
+ error = udp_sendto(req->pcb, req->reply,
+ ipX_2_ip(&req->client_addr), req->client_port);
+ if (error != ERR_OK) {
+ DPRINTF(("%s: udp_sendto err %s\n",
+ __func__, proxy_lwip_strerr(error)));
+ }
+
+ pxdns_request_free(req);
+}
+
+
+/**
+ * TCP DNS proxy. This kicks in for large replies that don't fit into
+ * 512 bytes of UDP payload. Client will retry with TCP to get
+ * complete reply.
+ */
+static err_t
+pxdns_accept_syn(void *arg, struct tcp_pcb *newpcb, struct pbuf *syn)
+{
+ struct pxdns *pxdns = (struct pxdns *)arg;
+ union sockaddr_inet *si;
+ ipX_addr_t *dst;
+ u16_t dst_port;
+
+ tcp_accepted(pxdns->ltcp);
+
+ if (pxdns->nresolvers == 0) {
+ return ERR_CONN;
+ }
+
+ si = &pxdns->resolvers[0];
+
+ if (si->sa.sa_family == AF_INET6) {
+ dst = (ipX_addr_t *)&si->sin6.sin6_addr;
+ dst_port = ntohs(si->sin6.sin6_port);
+ }
+ else {
+ dst = (ipX_addr_t *)&si->sin.sin_addr;
+ dst_port = ntohs(si->sin.sin_port);
+ }
+
+ /*
+ * XXX: TODO: need to implement protocol hooks. E.g. here if
+ * connect fails, we should try connecting to a different server.
+ */
+ return pxtcp_pcb_accept_outbound(newpcb, syn,
+ si->sa.sa_family == AF_INET6, dst, dst_port);
+}
diff --git a/src/VBox/NetworkServices/NAT/pxping.c b/src/VBox/NetworkServices/NAT/pxping.c
new file mode 100644
index 00000000..86dc284b
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/pxping.c
@@ -0,0 +1,2001 @@
+/* $Id: pxping.c $ */
+/** @file
+ * NAT Network - ping proxy, raw sockets version.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+#include "proxy.h"
+#include "proxy_pollmgr.h"
+#include "pxremap.h"
+
+#include <iprt/string.h>
+
+#ifndef RT_OS_WINDOWS
+#include <sys/types.h>
+#include <sys/socket.h>
+#ifdef RT_OS_DARWIN
+# define __APPLE_USE_RFC_3542
+#endif
+#include <netinet/in.h>
+#include <poll.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#else
+#include <iprt/stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "winpoll.h"
+#endif
+
+#include "lwip/opt.h"
+
+#include "lwip/sys.h"
+#include "lwip/tcpip.h"
+#include "lwip/inet_chksum.h"
+#include "lwip/ip.h"
+#include "lwip/icmp.h"
+
+#if defined(RT_OS_LINUX) && !defined(__USE_GNU)
+#if __GLIBC_PREREQ(2, 8)
+/*
+ * XXX: This is gross. in6_pktinfo is now hidden behind _GNU_SOURCE
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=6775
+ *
+ * But in older glibc versions, e.g. RHEL5, it is not! I don't want
+ * to deal with _GNU_SOURCE now, so as a kludge check for glibc
+ * version. It seems the __USE_GNU guard was introduced in 2.8.
+ */
+struct in6_pktinfo {
+ struct in6_addr ipi6_addr;
+ unsigned int ipi6_ifindex;
+};
+#endif /* __GLIBC_PREREQ */
+#endif /* RT_OS_LINUX && !__USE_GNU */
+
+
+/* forward */
+struct ping_pcb;
+
+
+/**
+ * Global state for ping proxy collected in one entity to minimize
+ * globals. There's only one instance of this structure.
+ *
+ * Raw ICMP sockets are promiscuous, so it doesn't make sense to have
+ * multiple. If this code ever needs to support multiple netifs, the
+ * netif member should be exiled into "pcb".
+ */
+struct pxping {
+ SOCKET sock4;
+
+#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS)
+# define DF_WITH_IP_HDRINCL
+ int hdrincl;
+#else
+ int df;
+#endif
+ int ttl;
+ int tos;
+
+ SOCKET sock6;
+#ifdef RT_OS_WINDOWS
+ LPFN_WSARECVMSG pfWSARecvMsg6;
+#endif
+ int hopl;
+
+ struct pollmgr_handler pmhdl4;
+ struct pollmgr_handler pmhdl6;
+
+ struct netif *netif;
+
+ /**
+ * Protect lwIP and pmgr accesses to the list of pcbs.
+ */
+ sys_mutex_t lock;
+
+ /*
+ * We need to find pcbs both from the guest side and from the host
+ * side. If we need to support industrial grade ping throughput,
+ * we will need two pcb hashes. For now, a short linked list
+ * should be enough. Cf. pxping_pcb_for_request() and
+ * pxping_pcb_for_reply().
+ */
+#define PXPING_MAX_PCBS 8
+ size_t npcbs;
+ struct ping_pcb *pcbs;
+
+#define TIMEOUT 5
+ int timer_active;
+ size_t timeout_slot;
+ struct ping_pcb *timeout_list[TIMEOUT];
+};
+
+
+/**
+ * Quasi PCB for ping.
+ */
+struct ping_pcb {
+ ipX_addr_t src;
+ ipX_addr_t dst;
+
+ u8_t is_ipv6;
+ u8_t is_mapped;
+
+ u16_t guest_id;
+ u16_t host_id;
+
+ /**
+ * Desired slot in pxping::timeout_list. See pxping_timer().
+ */
+ size_t timeout_slot;
+
+ /**
+ * Chaining for pxping::timeout_list
+ */
+ struct ping_pcb **pprev_timeout;
+ struct ping_pcb *next_timeout;
+
+ /**
+ * Chaining for pxping::pcbs
+ */
+ struct ping_pcb *next;
+
+ union {
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+ } peer;
+};
+
+
+/**
+ * lwIP thread callback message for IPv4 ping.
+ *
+ * We pass raw IP datagram for ip_output_if() so we only need pbuf and
+ * netif (from pxping).
+ */
+struct ping_msg {
+ struct tcpip_msg msg;
+ struct pxping *pxping;
+ struct pbuf *p;
+};
+
+
+/**
+ * lwIP thread callback message for IPv6 ping.
+ *
+ * We cannot obtain raw IPv6 datagram from host without extra trouble,
+ * so we pass ICMPv6 payload in pbuf and also other parameters to
+ * ip6_output_if().
+ */
+struct ping6_msg {
+ struct tcpip_msg msg;
+ struct pxping *pxping;
+ struct pbuf *p;
+ ip6_addr_t src, dst;
+ int hopl, tclass;
+};
+
+
+#ifdef RT_OS_WINDOWS
+static int pxping_init_windows(struct pxping *pxping);
+#endif
+static void pxping_recv4(void *arg, struct pbuf *p);
+static void pxping_recv6(void *arg, struct pbuf *p);
+
+static void pxping_timer(void *arg);
+static void pxping_timer_needed(struct pxping *pxping);
+
+static struct ping_pcb *pxping_pcb_for_request(struct pxping *pxping,
+ int is_ipv6,
+ ipX_addr_t *src, ipX_addr_t *dst,
+ u16_t guest_id);
+static struct ping_pcb *pxping_pcb_for_reply(struct pxping *pxping, int is_ipv6,
+ ipX_addr_t *dst, u16_t host_id);
+
+static FNRTSTRFORMATTYPE pxping_pcb_rtstrfmt;
+static struct ping_pcb *pxping_pcb_allocate(struct pxping *pxping);
+static void pxping_pcb_register(struct pxping *pxping, struct ping_pcb *pcb);
+static void pxping_pcb_deregister(struct pxping *pxping, struct ping_pcb *pcb);
+static void pxping_pcb_delete(struct pxping *pxping, struct ping_pcb *pcb);
+static void pxping_timeout_add(struct pxping *pxping, struct ping_pcb *pcb);
+static void pxping_timeout_del(struct pxping *pxping, struct ping_pcb *pcb);
+
+static int pxping_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents);
+
+static void pxping_pmgr_icmp4(struct pxping *pxping);
+static void pxping_pmgr_icmp4_echo(struct pxping *pxping,
+ u16_t iplen, struct sockaddr_in *peer);
+static void pxping_pmgr_icmp4_error(struct pxping *pxping,
+ u16_t iplen, struct sockaddr_in *peer);
+static void pxping_pmgr_icmp6(struct pxping *pxping);
+static void pxping_pmgr_icmp6_echo(struct pxping *pxping,
+ ip6_addr_t *src, ip6_addr_t *dst,
+ int hopl, int tclass, u16_t icmplen);
+static void pxping_pmgr_icmp6_error(struct pxping *pxping,
+ ip6_addr_t *src, ip6_addr_t *dst,
+ int hopl, int tclass, u16_t icmplen);
+
+static void pxping_pmgr_forward_inbound(struct pxping *pxping, u16_t iplen);
+static void pxping_pcb_forward_inbound(void *arg);
+
+static void pxping_pmgr_forward_inbound6(struct pxping *pxping,
+ ip6_addr_t *src, ip6_addr_t *dst,
+ u8_t hopl, u8_t tclass,
+ u16_t icmplen);
+static void pxping_pcb_forward_inbound6(void *arg);
+
+/*
+ * NB: This is not documented except in RTFS.
+ *
+ * If ip_output_if() is passed dest == NULL then it treats p as
+ * complete IP packet with payload pointing to the IP header. It does
+ * not build IP header, ignores all header-related arguments, fetches
+ * real destination from the header in the pbuf and outputs pbuf to
+ * the specified netif.
+ */
+#define ip_raw_output_if(p, netif) \
+ (ip_output_if((p), NULL, NULL, 0, 0, 0, (netif)))
+
+
+
+static struct pxping g_pxping;
+
+
+err_t
+pxping_init(struct netif *netif, SOCKET sock4, SOCKET sock6)
+{
+ const int on = 1;
+ int status;
+
+ if (sock4 == INVALID_SOCKET && sock6 == INVALID_SOCKET) {
+ return ERR_VAL;
+ }
+
+ g_pxping.netif = netif;
+ sys_mutex_new(&g_pxping.lock);
+
+ g_pxping.sock4 = sock4;
+ if (g_pxping.sock4 != INVALID_SOCKET) {
+#ifdef DF_WITH_IP_HDRINCL
+ g_pxping.hdrincl = 0;
+#else
+ g_pxping.df = -1;
+#endif
+ g_pxping.ttl = -1;
+ g_pxping.tos = 0;
+
+#ifdef RT_OS_LINUX
+ {
+ const int dont = IP_PMTUDISC_DONT;
+ status = setsockopt(sock4, IPPROTO_IP, IP_MTU_DISCOVER,
+ &dont, sizeof(dont));
+ if (status != 0) {
+ DPRINTF(("IP_MTU_DISCOVER: %R[sockerr]\n", SOCKERRNO()));
+ }
+ }
+#endif /* RT_OS_LINUX */
+
+ g_pxping.pmhdl4.callback = pxping_pmgr_pump;
+ g_pxping.pmhdl4.data = (void *)&g_pxping;
+ g_pxping.pmhdl4.slot = -1;
+ pollmgr_add(&g_pxping.pmhdl4, g_pxping.sock4, POLLIN);
+
+ ping_proxy_accept(pxping_recv4, &g_pxping);
+ }
+
+ g_pxping.sock6 = sock6;
+#ifdef RT_OS_WINDOWS
+ /* we need recvmsg */
+ if (g_pxping.sock6 != INVALID_SOCKET) {
+ status = pxping_init_windows(&g_pxping);
+ if (status == SOCKET_ERROR) {
+ g_pxping.sock6 = INVALID_SOCKET;
+ /* close(sock6); */
+ }
+ }
+#endif
+ if (g_pxping.sock6 != INVALID_SOCKET) {
+ g_pxping.hopl = -1;
+
+#if !defined(IPV6_RECVPKTINFO)
+#define IPV6_RECVPKTINFO (IPV6_PKTINFO)
+#endif
+ status = setsockopt(sock6, IPPROTO_IPV6, IPV6_RECVPKTINFO,
+ (const char *)&on, sizeof(on));
+ if (status < 0) {
+ DPRINTF(("IPV6_RECVPKTINFO: %R[sockerr]\n", SOCKERRNO()));
+ /* XXX: for now this is fatal */
+ }
+
+#if !defined(IPV6_RECVHOPLIMIT)
+#define IPV6_RECVHOPLIMIT (IPV6_HOPLIMIT)
+#endif
+ status = setsockopt(sock6, IPPROTO_IPV6, IPV6_RECVHOPLIMIT,
+ (const char *)&on, sizeof(on));
+ if (status < 0) {
+ DPRINTF(("IPV6_RECVHOPLIMIT: %R[sockerr]\n", SOCKERRNO()));
+ }
+
+#ifdef IPV6_RECVTCLASS /* new in RFC 3542, there's no RFC 2292 counterpart */
+ /** @todo IPV6_RECVTCLASS */
+#endif
+
+ g_pxping.pmhdl6.callback = pxping_pmgr_pump;
+ g_pxping.pmhdl6.data = (void *)&g_pxping;
+ g_pxping.pmhdl6.slot = -1;
+ pollmgr_add(&g_pxping.pmhdl6, g_pxping.sock6, POLLIN);
+
+ ping6_proxy_accept(pxping_recv6, &g_pxping);
+ }
+
+ status = RTStrFormatTypeRegister("ping_pcb", pxping_pcb_rtstrfmt, NULL);
+ AssertRC(status);
+
+ return ERR_OK;
+}
+
+
+#ifdef RT_OS_WINDOWS
+static int
+pxping_init_windows(struct pxping *pxping)
+{
+ GUID WSARecvMsgGUID = WSAID_WSARECVMSG;
+ DWORD nread;
+ int status;
+
+ pxping->pfWSARecvMsg6 = NULL;
+ status = WSAIoctl(pxping->sock6,
+ SIO_GET_EXTENSION_FUNCTION_POINTER,
+ &WSARecvMsgGUID, sizeof(WSARecvMsgGUID),
+ &pxping->pfWSARecvMsg6, sizeof(pxping->pfWSARecvMsg6),
+ &nread,
+ NULL, NULL);
+ return status;
+}
+#endif /* RT_OS_WINDOWS */
+
+
+static u32_t
+chksum_delta_16(u16_t oval, u16_t nval)
+{
+ u32_t sum = (u16_t)~oval;
+ sum += nval;
+ return sum;
+}
+
+
+static u32_t
+chksum_update_16(u16_t *oldp, u16_t nval)
+{
+ u32_t sum = chksum_delta_16(*oldp, nval);
+ *oldp = nval;
+ return sum;
+}
+
+
+static u32_t
+chksum_delta_32(u32_t oval, u32_t nval)
+{
+ u32_t sum = ~oval;
+ sum = FOLD_U32T(sum);
+ sum += FOLD_U32T(nval);
+ return sum;
+}
+
+
+static u32_t
+chksum_update_32(u32_t *oldp, u32_t nval)
+{
+ u32_t sum = chksum_delta_32(*oldp, nval);
+ *oldp = nval;
+ return sum;
+}
+
+
+static u32_t
+chksum_delta_ipv6(const ip6_addr_t *oldp, const ip6_addr_t *newp)
+{
+ u32_t sum;
+
+ sum = chksum_delta_32(oldp->addr[0], newp->addr[0]);
+ sum += chksum_delta_32(oldp->addr[1], newp->addr[1]);
+ sum += chksum_delta_32(oldp->addr[2], newp->addr[2]);
+ sum += chksum_delta_32(oldp->addr[3], newp->addr[3]);
+
+ return sum;
+}
+
+
+static u32_t
+chksum_update_ipv6(ip6_addr_t *oldp, const ip6_addr_t *newp)
+{
+ u32_t sum;
+
+ sum = chksum_update_32(&oldp->addr[0], newp->addr[0]);
+ sum += chksum_update_32(&oldp->addr[1], newp->addr[1]);
+ sum += chksum_update_32(&oldp->addr[2], newp->addr[2]);
+ sum += chksum_update_32(&oldp->addr[3], newp->addr[3]);
+
+ return sum;
+}
+
+
+/**
+ * ICMP Echo Request in pbuf "p" is to be proxied.
+ */
+static void
+pxping_recv4(void *arg, struct pbuf *p)
+{
+ struct pxping *pxping = (struct pxping *)arg;
+ struct ping_pcb *pcb;
+#ifdef DF_WITH_IP_HDRINCL
+ struct ip_hdr iph_orig;
+#endif
+ struct icmp_echo_hdr icmph_orig;
+ struct ip_hdr *iph;
+ struct icmp_echo_hdr *icmph;
+ int df, ttl, tos;
+ u32_t sum;
+ u16_t iphlen;
+ int status;
+
+ iphlen = ip_current_header_tot_len();
+ if (iphlen != IP_HLEN) { /* we don't do options */
+ pbuf_free(p);
+ return;
+ }
+
+ iph = (/* UNCONST */ struct ip_hdr *)ip_current_header();
+ icmph = (struct icmp_echo_hdr *)p->payload;
+
+ pcb = pxping_pcb_for_request(pxping, 0,
+ ipX_current_src_addr(),
+ ipX_current_dest_addr(),
+ icmph->id);
+ if (pcb == NULL) {
+ pbuf_free(p);
+ return;
+ }
+
+ DPRINTF(("ping %p: %R[ping_pcb] seq %d len %u ttl %d\n",
+ pcb, pcb,
+ ntohs(icmph->seqno), (unsigned int)p->tot_len,
+ IPH_TTL(iph)));
+
+ ttl = IPH_TTL(iph);
+ if (!pcb->is_mapped) {
+ if (RT_UNLIKELY(ttl == 1)) {
+ status = pbuf_header(p, iphlen); /* back to IP header */
+ if (RT_LIKELY(status == 0)) {
+ icmp_time_exceeded(p, ICMP_TE_TTL);
+ }
+ pbuf_free(p);
+ return;
+ }
+ --ttl;
+ }
+
+ /*
+ * OS X doesn't provide a socket option to control fragmentation.
+ * Solaris doesn't provide IP_DONTFRAG on all releases we support.
+ * In this case we have to use IP_HDRINCL. We don't want to use
+ * it always since it doesn't handle fragmentation (but that's ok
+ * for DF) and Windows doesn't do automatic source address
+ * selection with IP_HDRINCL.
+ */
+ df = (IPH_OFFSET(iph) & PP_HTONS(IP_DF)) != 0;
+
+#ifdef DF_WITH_IP_HDRINCL
+ if (df != pxping->hdrincl) {
+ status = setsockopt(pxping->sock4, IPPROTO_IP, IP_HDRINCL,
+ &df, sizeof(df));
+ if (RT_LIKELY(status == 0)) {
+ pxping->hdrincl = df;
+ }
+ else {
+ DPRINTF(("IP_HDRINCL: %R[sockerr]\n", SOCKERRNO()));
+ }
+ }
+
+ if (pxping->hdrincl) {
+ status = pbuf_header(p, iphlen); /* back to IP header */
+ if (RT_UNLIKELY(status != 0)) {
+ pbuf_free(p);
+ return;
+ }
+
+ /* we will overwrite IP header, save original for ICMP errors */
+ memcpy(&iph_orig, iph, iphlen);
+
+ if (pcb->is_mapped) {
+ ip4_addr_set_u32(&iph->dest, pcb->peer.sin.sin_addr.s_addr);
+ }
+
+ if (g_proxy_options->src4 != NULL) {
+ ip4_addr_set_u32(&iph->src, g_proxy_options->src4->sin_addr.s_addr);
+ }
+ else {
+ /* let the kernel select suitable source address */
+ ip_addr_set_any(&iph->src);
+ }
+
+ IPH_TTL_SET(iph, ttl); /* already decremented */
+ IPH_ID_SET(iph, 0); /* kernel will set one */
+#ifdef RT_OS_DARWIN
+ /* wants ip_offset and ip_len fields in host order */
+ IPH_OFFSET_SET(iph, ntohs(IPH_OFFSET(iph)));
+ IPH_LEN_SET(iph, ntohs(IPH_LEN(iph)));
+ /* wants checksum of everything (sic!), in host order */
+ sum = inet_chksum_pbuf(p);
+ IPH_CHKSUM_SET(iph, sum);
+#else /* !RT_OS_DARWIN */
+ IPH_CHKSUM_SET(iph, 0); /* kernel will recalculate */
+#endif
+ }
+ else /* !pxping->hdrincl */
+#endif /* DF_WITH_IP_HDRINCL */
+ {
+#if !defined(DF_WITH_IP_HDRINCL)
+ /* control DF flag via setsockopt(2) */
+#define USE_DF_OPTION(_Optname) \
+ const int dfopt = _Optname; \
+ const char * const dfoptname = #_Optname; NOREF(dfoptname)
+#if defined(RT_OS_LINUX)
+ USE_DF_OPTION(IP_MTU_DISCOVER);
+ df = df ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
+#elif defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
+ USE_DF_OPTION(IP_DONTFRAG);
+#elif defined(RT_OS_WINDOWS)
+ USE_DF_OPTION(IP_DONTFRAGMENT);
+#endif
+ if (df != pxping->df) {
+ status = setsockopt(pxping->sock4, IPPROTO_IP, dfopt,
+ (char *)&df, sizeof(df));
+ if (RT_LIKELY(status == 0)) {
+ pxping->df = df;
+ }
+ else {
+ DPRINTF(("%s: %R[sockerr]\n", dfoptname, SOCKERRNO()));
+ }
+ }
+#endif /* !DF_WITH_IP_HDRINCL */
+
+ if (ttl != pxping->ttl) {
+ status = setsockopt(pxping->sock4, IPPROTO_IP, IP_TTL,
+ (char *)&ttl, sizeof(ttl));
+ if (RT_LIKELY(status == 0)) {
+ pxping->ttl = ttl;
+ }
+ else {
+ DPRINTF(("IP_TTL: %R[sockerr]\n", SOCKERRNO()));
+ }
+ }
+
+ tos = IPH_TOS(iph);
+ if (tos != pxping->tos) {
+ status = setsockopt(pxping->sock4, IPPROTO_IP, IP_TOS,
+ (char *)&tos, sizeof(tos));
+ if (RT_LIKELY(status == 0)) {
+ pxping->tos = tos;
+ }
+ else {
+ DPRINTF(("IP_TOS: %R[sockerr]\n", SOCKERRNO()));
+ }
+ }
+ }
+
+ /* rewrite ICMP echo header */
+ memcpy(&icmph_orig, icmph, sizeof(*icmph));
+ sum = (u16_t)~icmph->chksum;
+ sum += chksum_update_16(&icmph->id, pcb->host_id);
+ sum = FOLD_U32T(sum);
+ icmph->chksum = ~sum;
+
+ status = proxy_sendto(pxping->sock4, p,
+ &pcb->peer.sin, sizeof(pcb->peer.sin));
+ if (status != 0) {
+ int error = -status;
+ DPRINTF(("%s: sendto: %R[sockerr]\n", __func__, error));
+
+#ifdef DF_WITH_IP_HDRINCL
+ if (pxping->hdrincl) {
+ /* restore original IP header */
+ memcpy(iph, &iph_orig, iphlen);
+ }
+ else
+#endif
+ {
+ status = pbuf_header(p, iphlen); /* back to IP header */
+ if (RT_UNLIKELY(status != 0)) {
+ pbuf_free(p);
+ return;
+ }
+ }
+
+ /* restore original ICMP header */
+ memcpy(icmph, &icmph_orig, sizeof(*icmph));
+
+ /*
+ * Some ICMP errors may be generated by the kernel and we read
+ * them from the socket and forward them normally, hence the
+ * ifdefs below.
+ */
+ switch (error) {
+
+#if !( defined(RT_OS_SOLARIS) \
+ || (defined(RT_OS_LINUX) && !defined(DF_WITH_IP_HDRINCL)) \
+ )
+ case EMSGSIZE:
+ icmp_dest_unreach(p, ICMP_DUR_FRAG);
+ break;
+#endif
+
+ case ENETDOWN:
+ case ENETUNREACH:
+ icmp_dest_unreach(p, ICMP_DUR_NET);
+ break;
+
+ case EHOSTDOWN:
+ case EHOSTUNREACH:
+ icmp_dest_unreach(p, ICMP_DUR_HOST);
+ break;
+ }
+ }
+
+ pbuf_free(p);
+}
+
+
+/**
+ * ICMPv6 Echo Request in pbuf "p" is to be proxied.
+ */
+static void
+pxping_recv6(void *arg, struct pbuf *p)
+{
+ struct pxping *pxping = (struct pxping *)arg;
+ struct ping_pcb *pcb;
+ struct ip6_hdr *iph;
+ struct icmp6_echo_hdr *icmph;
+ int hopl;
+ u16_t iphlen;
+ u16_t id, seq;
+ int status;
+
+ iph = (/* UNCONST */ struct ip6_hdr *)ip6_current_header();
+ iphlen = ip_current_header_tot_len();
+
+ icmph = (struct icmp6_echo_hdr *)p->payload;
+
+ id = icmph->id;
+ seq = icmph->seqno;
+
+ pcb = pxping_pcb_for_request(pxping, 1,
+ ipX_current_src_addr(),
+ ipX_current_dest_addr(),
+ id);
+ if (pcb == NULL) {
+ pbuf_free(p);
+ return;
+ }
+
+ DPRINTF(("ping %p: %R[ping_pcb] seq %d len %u hopl %d\n",
+ pcb, pcb,
+ ntohs(seq), (unsigned int)p->tot_len,
+ IP6H_HOPLIM(iph)));
+
+ hopl = IP6H_HOPLIM(iph);
+ if (!pcb->is_mapped) {
+ if (hopl == 1) {
+ status = pbuf_header(p, iphlen); /* back to IP header */
+ if (RT_LIKELY(status == 0)) {
+ icmp6_time_exceeded(p, ICMP6_TE_HL);
+ }
+ pbuf_free(p);
+ return;
+ }
+ --hopl;
+ }
+
+ /*
+ * Rewrite ICMPv6 echo header. We don't need to recompute the
+ * checksum since, unlike IPv4, checksum includes pseudo-header.
+ * OS computes checksum for us on send() since it needs to select
+ * source address.
+ */
+ icmph->id = pcb->host_id;
+
+ /** @todo use control messages to save a syscall? */
+ if (hopl != pxping->hopl) {
+ status = setsockopt(pxping->sock6, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
+ (char *)&hopl, sizeof(hopl));
+ if (status == 0) {
+ pxping->hopl = hopl;
+ }
+ else {
+ DPRINTF(("IPV6_HOPLIMIT: %R[sockerr]\n", SOCKERRNO()));
+ }
+ }
+
+ status = proxy_sendto(pxping->sock6, p,
+ &pcb->peer.sin6, sizeof(pcb->peer.sin6));
+ if (status != 0) {
+ int error = -status;
+ DPRINTF(("%s: sendto: %R[sockerr]\n", __func__, error));
+
+ status = pbuf_header(p, iphlen); /* back to IP header */
+ if (RT_UNLIKELY(status != 0)) {
+ pbuf_free(p);
+ return;
+ }
+
+ /* restore original ICMP header */
+ icmph->id = pcb->guest_id;
+
+ switch (error) {
+ case EACCES:
+ icmp6_dest_unreach(p, ICMP6_DUR_PROHIBITED);
+ break;
+
+#ifdef ENONET
+ case ENONET:
+#endif
+ case ENETDOWN:
+ case ENETUNREACH:
+ case EHOSTDOWN:
+ case EHOSTUNREACH:
+ icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE);
+ break;
+ }
+ }
+
+ pbuf_free(p);
+}
+
+
+/**
+ * Formatter for %R[ping_pcb].
+ */
+static DECLCALLBACK(size_t)
+pxping_pcb_rtstrfmt(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
+ const char *pszType, const void *pvValue,
+ int cchWidth, int cchPrecision, unsigned int fFlags,
+ void *pvUser)
+{
+ const struct ping_pcb *pcb = (const struct ping_pcb *)pvValue;
+ size_t cb = 0;
+
+ NOREF(cchWidth);
+ NOREF(cchPrecision);
+ NOREF(fFlags);
+ NOREF(pvUser);
+
+ AssertReturn(strcmp(pszType, "ping_pcb") == 0, 0);
+
+ if (pcb == NULL) {
+ return RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, "(null)");
+ }
+
+ /* XXX: %RTnaipv4 takes the value, but %RTnaipv6 takes the pointer */
+ if (pcb->is_ipv6) {
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
+ "%RTnaipv6 -> %RTnaipv6", &pcb->src, &pcb->dst);
+ if (pcb->is_mapped) {
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
+ " (%RTnaipv6)", &pcb->peer.sin6.sin6_addr);
+ }
+ }
+ else {
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
+ "%RTnaipv4 -> %RTnaipv4",
+ ip4_addr_get_u32(ipX_2_ip(&pcb->src)),
+ ip4_addr_get_u32(ipX_2_ip(&pcb->dst)));
+ if (pcb->is_mapped) {
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
+ " (%RTnaipv4)", pcb->peer.sin.sin_addr.s_addr);
+ }
+ }
+
+ cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
+ " id %04x->%04x", ntohs(pcb->guest_id), ntohs(pcb->host_id));
+
+ return cb;
+}
+
+
+static struct ping_pcb *
+pxping_pcb_allocate(struct pxping *pxping)
+{
+ struct ping_pcb *pcb;
+
+ if (pxping->npcbs >= PXPING_MAX_PCBS) {
+ return NULL;
+ }
+
+ pcb = (struct ping_pcb *)malloc(sizeof(*pcb));
+ if (pcb == NULL) {
+ return NULL;
+ }
+
+ ++pxping->npcbs;
+ return pcb;
+}
+
+
+static void
+pxping_pcb_delete(struct pxping *pxping, struct ping_pcb *pcb)
+{
+ LWIP_ASSERT1(pxping->npcbs > 0);
+ LWIP_ASSERT1(pcb->next == NULL);
+ LWIP_ASSERT1(pcb->pprev_timeout == NULL);
+
+ DPRINTF(("%s: ping %p\n", __func__, (void *)pcb));
+
+ --pxping->npcbs;
+ free(pcb);
+}
+
+
+static void
+pxping_timeout_add(struct pxping *pxping, struct ping_pcb *pcb)
+{
+ struct ping_pcb **chain;
+
+ LWIP_ASSERT1(pcb->pprev_timeout == NULL);
+
+ chain = &pxping->timeout_list[pcb->timeout_slot];
+ if ((pcb->next_timeout = *chain) != NULL) {
+ (*chain)->pprev_timeout = &pcb->next_timeout;
+ }
+ *chain = pcb;
+ pcb->pprev_timeout = chain;
+}
+
+
+static void
+pxping_timeout_del(struct pxping *pxping, struct ping_pcb *pcb)
+{
+ LWIP_UNUSED_ARG(pxping);
+
+ LWIP_ASSERT1(pcb->pprev_timeout != NULL);
+ if (pcb->next_timeout != NULL) {
+ pcb->next_timeout->pprev_timeout = pcb->pprev_timeout;
+ }
+ *pcb->pprev_timeout = pcb->next_timeout;
+ pcb->pprev_timeout = NULL;
+ pcb->next_timeout = NULL;
+}
+
+
+static void
+pxping_pcb_register(struct pxping *pxping, struct ping_pcb *pcb)
+{
+ pcb->next = pxping->pcbs;
+ pxping->pcbs = pcb;
+
+ pxping_timeout_add(pxping, pcb);
+}
+
+
+static void
+pxping_pcb_deregister(struct pxping *pxping, struct ping_pcb *pcb)
+{
+ struct ping_pcb **p;
+
+ for (p = &pxping->pcbs; *p != NULL; p = &(*p)->next) {
+ if (*p == pcb) {
+ *p = pcb->next;
+ pcb->next = NULL;
+ break;
+ }
+ }
+
+ pxping_timeout_del(pxping, pcb);
+}
+
+
+static struct ping_pcb *
+pxping_pcb_for_request(struct pxping *pxping,
+ int is_ipv6, ipX_addr_t *src, ipX_addr_t *dst,
+ u16_t guest_id)
+{
+ struct ping_pcb *pcb;
+
+ /* on lwip thread, so no concurrent updates */
+ for (pcb = pxping->pcbs; pcb != NULL; pcb = pcb->next) {
+ if (pcb->guest_id == guest_id
+ && pcb->is_ipv6 == is_ipv6
+ && ipX_addr_cmp(is_ipv6, &pcb->dst, dst)
+ && ipX_addr_cmp(is_ipv6, &pcb->src, src))
+ {
+ break;
+ }
+ }
+
+ if (pcb == NULL) {
+ int mapped;
+
+ pcb = pxping_pcb_allocate(pxping);
+ if (pcb == NULL) {
+ return NULL;
+ }
+
+ pcb->is_ipv6 = is_ipv6;
+ ipX_addr_copy(is_ipv6, pcb->src, *src);
+ ipX_addr_copy(is_ipv6, pcb->dst, *dst);
+
+ pcb->guest_id = guest_id;
+#ifdef RT_OS_WINDOWS
+# define random() (rand())
+#endif
+ pcb->host_id = random() & 0xffffUL;
+
+ pcb->pprev_timeout = NULL;
+ pcb->next_timeout = NULL;
+
+ if (is_ipv6) {
+ pcb->peer.sin6.sin6_family = AF_INET6;
+#if HAVE_SA_LEN
+ pcb->peer.sin6.sin6_len = sizeof(pcb->peer.sin6);
+#endif
+ pcb->peer.sin6.sin6_port = htons(IPPROTO_ICMPV6);
+ pcb->peer.sin6.sin6_flowinfo = 0;
+ mapped = pxremap_outbound_ip6((ip6_addr_t *)&pcb->peer.sin6.sin6_addr,
+ ipX_2_ip6(&pcb->dst));
+ }
+ else {
+ pcb->peer.sin.sin_family = AF_INET;
+#if HAVE_SA_LEN
+ pcb->peer.sin.sin_len = sizeof(pcb->peer.sin);
+#endif
+ pcb->peer.sin.sin_port = htons(IPPROTO_ICMP);
+ mapped = pxremap_outbound_ip4((ip_addr_t *)&pcb->peer.sin.sin_addr,
+ ipX_2_ip(&pcb->dst));
+ }
+
+ if (mapped == PXREMAP_FAILED) {
+ free(pcb);
+ return NULL;
+ }
+ else {
+ pcb->is_mapped = (mapped == PXREMAP_MAPPED);
+ }
+
+ pcb->timeout_slot = pxping->timeout_slot;
+
+ sys_mutex_lock(&pxping->lock);
+ pxping_pcb_register(pxping, pcb);
+ sys_mutex_unlock(&pxping->lock);
+
+ DPRINTF(("ping %p: %R[ping_pcb] - created\n", pcb, pcb));
+
+ pxping_timer_needed(pxping);
+ }
+ else {
+ /* just bump up expiration timeout lazily */
+ DPRINTF(("ping %p: %R[ping_pcb] - slot %d -> %d\n",
+ pcb, pcb,
+ (unsigned int)pcb->timeout_slot,
+ (unsigned int)pxping->timeout_slot));
+ pcb->timeout_slot = pxping->timeout_slot;
+ }
+
+ return pcb;
+}
+
+
+/**
+ * Called on pollmgr thread. Caller must do the locking since caller
+ * is going to use the returned pcb, which needs to be protected from
+ * being expired by pxping_timer() on lwip thread.
+ */
+static struct ping_pcb *
+pxping_pcb_for_reply(struct pxping *pxping,
+ int is_ipv6, ipX_addr_t *dst, u16_t host_id)
+{
+ struct ping_pcb *pcb;
+
+ for (pcb = pxping->pcbs; pcb != NULL; pcb = pcb->next) {
+ if (pcb->host_id == host_id
+ && pcb->is_ipv6 == is_ipv6
+ /* XXX: allow broadcast pings? */
+ && ipX_addr_cmp(is_ipv6, &pcb->dst, dst))
+ {
+ return pcb;
+ }
+ }
+
+ return NULL;
+}
+
+
+static void
+pxping_timer(void *arg)
+{
+ struct pxping *pxping = (struct pxping *)arg;
+ struct ping_pcb **chain, *pcb;
+
+ pxping->timer_active = 0;
+
+ /*
+ * New slot points to the list of pcbs to check for expiration.
+ */
+ LWIP_ASSERT1(pxping->timeout_slot < TIMEOUT);
+ if (++pxping->timeout_slot == TIMEOUT) {
+ pxping->timeout_slot = 0;
+ }
+
+ chain = &pxping->timeout_list[pxping->timeout_slot];
+ pcb = *chain;
+
+ /* protect from pollmgr concurrent reads */
+ sys_mutex_lock(&pxping->lock);
+
+ while (pcb != NULL) {
+ struct ping_pcb *xpcb = pcb;
+ pcb = pcb->next_timeout;
+
+ if (xpcb->timeout_slot == pxping->timeout_slot) {
+ /* expired */
+ pxping_pcb_deregister(pxping, xpcb);
+ pxping_pcb_delete(pxping, xpcb);
+ }
+ else {
+ /*
+ * If there was another request, we updated timeout_slot
+ * but delayed actually moving the pcb until now.
+ */
+ pxping_timeout_del(pxping, xpcb); /* from current slot */
+ pxping_timeout_add(pxping, xpcb); /* to new slot */
+ }
+ }
+
+ sys_mutex_unlock(&pxping->lock);
+ pxping_timer_needed(pxping);
+}
+
+
+static void
+pxping_timer_needed(struct pxping *pxping)
+{
+ if (!pxping->timer_active && pxping->pcbs != NULL) {
+ pxping->timer_active = 1;
+ sys_timeout(1 * 1000, pxping_timer, pxping);
+ }
+}
+
+
+static int
+pxping_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pxping *pxping;
+
+ pxping = (struct pxping *)handler->data;
+ LWIP_ASSERT1(fd == pxping->sock4 || fd == pxping->sock6);
+
+ if (revents & ~(POLLIN|POLLERR)) {
+ DPRINTF0(("%s: unexpected revents 0x%x\n", __func__, revents));
+ return POLLIN;
+ }
+
+ if (revents & POLLERR) {
+ int sockerr = -1;
+ socklen_t optlen = (socklen_t)sizeof(sockerr);
+ int status;
+
+ status = getsockopt(fd, SOL_SOCKET,
+ SO_ERROR, (char *)&sockerr, &optlen);
+ if (status < 0) {
+ DPRINTF(("%s: sock %d: SO_ERROR failed: %R[sockerr]\n",
+ __func__, fd, SOCKERRNO()));
+ }
+ else {
+ DPRINTF(("%s: sock %d: %R[sockerr]\n",
+ __func__, fd, sockerr));
+ }
+ }
+
+ if ((revents & POLLIN) == 0) {
+ return POLLIN;
+ }
+
+ if (fd == pxping->sock4) {
+ pxping_pmgr_icmp4(pxping);
+ }
+ else /* fd == pxping->sock6 */ {
+ pxping_pmgr_icmp6(pxping);
+ }
+
+ return POLLIN;
+}
+
+
+/**
+ * Process incoming ICMP message for the host.
+ * NB: we will get a lot of spam here and have to sift through it.
+ */
+static void
+pxping_pmgr_icmp4(struct pxping *pxping)
+{
+ struct sockaddr_in sin;
+ socklen_t salen = sizeof(sin);
+ ssize_t nread;
+ struct ip_hdr *iph;
+ struct icmp_echo_hdr *icmph;
+ u16_t iplen, ipoff;
+
+ memset(&sin, 0, sizeof(sin));
+
+ /*
+ * Reads from raw IPv4 sockets deliver complete IP datagrams with
+ * IP header included.
+ */
+ nread = recvfrom(pxping->sock4, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0,
+ (struct sockaddr *)&sin, &salen);
+ if (nread < 0) {
+ DPRINTF(("%s: %R[sockerr]\n", __func__, SOCKERRNO()));
+ return;
+ }
+
+ if (nread < IP_HLEN) {
+ DPRINTF2(("%s: read %d bytes, IP header truncated\n",
+ __func__, (unsigned int)nread));
+ return;
+ }
+
+ iph = (struct ip_hdr *)pollmgr_udpbuf;
+
+ /* match version */
+ if (IPH_V(iph) != 4) {
+ DPRINTF2(("%s: unexpected IP version %d\n", __func__, IPH_V(iph)));
+ return;
+ }
+
+ /* no fragmentation */
+ ipoff = IPH_OFFSET(iph);
+#if defined(RT_OS_DARWIN)
+ /* darwin reports IPH_OFFSET in host byte order */
+ ipoff = htons(ipoff);
+ IPH_OFFSET_SET(iph, ipoff);
+#endif
+ if ((ipoff & PP_HTONS(IP_OFFMASK | IP_MF)) != 0) {
+ DPRINTF2(("%s: dropping fragmented datagram (0x%04x)\n",
+ __func__, ntohs(ipoff)));
+ return;
+ }
+
+ /* no options */
+ if (IPH_HL(iph) * 4 != IP_HLEN) {
+ DPRINTF2(("%s: dropping datagram with options (IP header length %d)\n",
+ __func__, IPH_HL(iph) * 4));
+ return;
+ }
+
+ if (IPH_PROTO(iph) != IP_PROTO_ICMP) {
+ DPRINTF2(("%s: unexpected protocol %d\n", __func__, IPH_PROTO(iph)));
+ return;
+ }
+
+ iplen = IPH_LEN(iph);
+#if !defined(RT_OS_DARWIN)
+ /* darwin reports IPH_LEN in host byte order */
+ iplen = ntohs(iplen);
+#endif
+#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS)
+ /* darwin and solaris change IPH_LEN to payload length only */
+ iplen += IP_HLEN; /* we verified there are no options */
+ IPH_LEN_SET(iph, htons(iplen));
+#endif
+ if (nread < iplen) {
+ DPRINTF2(("%s: read %d bytes but total length is %d bytes\n",
+ __func__, (unsigned int)nread, (unsigned int)iplen));
+ return;
+ }
+
+ if (iplen < IP_HLEN + ICMP_HLEN) {
+ DPRINTF2(("%s: IP length %d bytes, ICMP header truncated\n",
+ __func__, iplen));
+ return;
+ }
+
+ icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN);
+ if (ICMPH_TYPE(icmph) == ICMP_ER) {
+ pxping_pmgr_icmp4_echo(pxping, iplen, &sin);
+ }
+ else if (ICMPH_TYPE(icmph) == ICMP_DUR || ICMPH_TYPE(icmph) == ICMP_TE) {
+ pxping_pmgr_icmp4_error(pxping, iplen, &sin);
+ }
+#if 1
+ else {
+ DPRINTF2(("%s: ignoring ICMP type %d\n", __func__, ICMPH_TYPE(icmph)));
+ }
+#endif
+}
+
+
+/**
+ * Check if this incoming ICMP echo reply is for one of our pings and
+ * forward it to the guest.
+ */
+static void
+pxping_pmgr_icmp4_echo(struct pxping *pxping,
+ u16_t iplen, struct sockaddr_in *peer)
+{
+ struct ip_hdr *iph;
+ struct icmp_echo_hdr *icmph;
+ u16_t id, seq;
+ ip_addr_t guest_ip, target_ip;
+ int mapped;
+ struct ping_pcb *pcb;
+ u16_t guest_id;
+ u16_t oipsum;
+ u32_t sum;
+ RT_NOREF(peer);
+
+ iph = (struct ip_hdr *)pollmgr_udpbuf;
+ icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN);
+
+ id = icmph->id;
+ seq = icmph->seqno;
+
+ DPRINTF(("<--- PING %RTnaipv4 id 0x%x seq %d\n",
+ peer->sin_addr.s_addr, ntohs(id), ntohs(seq)));
+
+ /*
+ * Is this a reply to one of our pings?
+ */
+
+ ip_addr_copy(target_ip, iph->src);
+ mapped = pxremap_inbound_ip4(&target_ip, &target_ip);
+ if (mapped == PXREMAP_FAILED) {
+ return;
+ }
+ if (mapped == PXREMAP_ASIS && IPH_TTL(iph) == 1) {
+ DPRINTF2(("%s: dropping packet with ttl 1\n", __func__));
+ return;
+ }
+
+ sys_mutex_lock(&pxping->lock);
+ pcb = pxping_pcb_for_reply(pxping, 0, ip_2_ipX(&target_ip), id);
+ if (pcb == NULL) {
+ sys_mutex_unlock(&pxping->lock);
+ DPRINTF2(("%s: no match\n", __func__));
+ return;
+ }
+
+ DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb));
+
+ /* save info before unlocking since pcb may expire */
+ ip_addr_copy(guest_ip, *ipX_2_ip(&pcb->src));
+ guest_id = pcb->guest_id;
+
+ sys_mutex_unlock(&pxping->lock);
+
+
+ /*
+ * Rewrite headers and forward to guest.
+ */
+
+ /* rewrite ICMP echo header */
+ sum = (u16_t)~icmph->chksum;
+ sum += chksum_update_16(&icmph->id, guest_id);
+ sum = FOLD_U32T(sum);
+ icmph->chksum = ~sum;
+
+ /* rewrite IP header */
+ oipsum = IPH_CHKSUM(iph);
+ if (oipsum == 0) {
+ /* Solaris doesn't compute checksum for local replies */
+ ip_addr_copy(iph->dest, guest_ip);
+ if (mapped == PXREMAP_MAPPED) {
+ ip_addr_copy(iph->src, target_ip);
+ }
+ else {
+ IPH_TTL_SET(iph, IPH_TTL(iph) - 1);
+ }
+ IPH_CHKSUM_SET(iph, inet_chksum(iph, ntohs(IPH_LEN(iph))));
+ }
+ else {
+ sum = (u16_t)~oipsum;
+ sum += chksum_update_32((u32_t *)&iph->dest,
+ ip4_addr_get_u32(&guest_ip));
+ if (mapped == PXREMAP_MAPPED) {
+ sum += chksum_update_32((u32_t *)&iph->src,
+ ip4_addr_get_u32(&target_ip));
+ }
+ else {
+ IPH_TTL_SET(iph, IPH_TTL(iph) - 1);
+ sum += PP_NTOHS(~0x0100);
+ }
+ sum = FOLD_U32T(sum);
+ IPH_CHKSUM_SET(iph, ~sum);
+ }
+
+ pxping_pmgr_forward_inbound(pxping, iplen);
+}
+
+
+/**
+ * Check if this incoming ICMP error (destination unreachable or time
+ * exceeded) is about one of our pings and forward it to the guest.
+ */
+static void
+pxping_pmgr_icmp4_error(struct pxping *pxping,
+ u16_t iplen, struct sockaddr_in *peer)
+{
+ struct ip_hdr *iph, *oiph;
+ struct icmp_echo_hdr *icmph, *oicmph;
+ u16_t oipoff, oiphlen, oiplen;
+ u16_t id, seq;
+ ip_addr_t guest_ip, target_ip, error_ip;
+ int target_mapped, error_mapped;
+ struct ping_pcb *pcb;
+ u16_t guest_id;
+ u32_t sum;
+ RT_NOREF(peer);
+
+ iph = (struct ip_hdr *)pollmgr_udpbuf;
+ icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN);
+
+ /*
+ * Inner IP datagram is not checked by the kernel and may be
+ * anything, possibly malicious.
+ */
+
+ oipoff = IP_HLEN + ICMP_HLEN;
+ oiplen = iplen - oipoff; /* NB: truncated length, not IPH_LEN(oiph) */
+ if (oiplen < IP_HLEN) {
+ DPRINTF2(("%s: original datagram truncated to %d bytes\n",
+ __func__, oiplen));
+ }
+
+ /* IP header of the original message */
+ oiph = (struct ip_hdr *)(pollmgr_udpbuf + oipoff);
+
+ /* match version */
+ if (IPH_V(oiph) != 4) {
+ DPRINTF2(("%s: unexpected IP version %d\n", __func__, IPH_V(oiph)));
+ return;
+ }
+
+ /* can't match fragments except the first one */
+ if ((IPH_OFFSET(oiph) & PP_HTONS(IP_OFFMASK)) != 0) {
+ DPRINTF2(("%s: ignoring fragment with offset %d\n",
+ __func__, ntohs(IPH_OFFSET(oiph) & PP_HTONS(IP_OFFMASK))));
+ return;
+ }
+
+ if (IPH_PROTO(oiph) != IP_PROTO_ICMP) {
+#if 0
+ /* don't spam with every "destination unreachable" in the system */
+ DPRINTF2(("%s: ignoring protocol %d\n", __func__, IPH_PROTO(oiph)));
+#endif
+ return;
+ }
+
+ oiphlen = IPH_HL(oiph) * 4;
+ if (oiplen < oiphlen + ICMP_HLEN) {
+ DPRINTF2(("%s: original datagram truncated to %d bytes\n",
+ __func__, oiplen));
+ return;
+ }
+
+ oicmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + oipoff + oiphlen);
+ if (ICMPH_TYPE(oicmph) != ICMP_ECHO) {
+ DPRINTF2(("%s: ignoring ICMP error for original ICMP type %d\n",
+ __func__, ICMPH_TYPE(oicmph)));
+ return;
+ }
+
+ id = oicmph->id;
+ seq = oicmph->seqno;
+
+ DPRINTF2(("%s: ping %RTnaipv4 id 0x%x seq %d",
+ __func__, ip4_addr_get_u32(&oiph->dest), ntohs(id), ntohs(seq)));
+ if (ICMPH_TYPE(icmph) == ICMP_DUR) {
+ DPRINTF2((" unreachable (code %d)\n", ICMPH_CODE(icmph)));
+ }
+ else {
+ DPRINTF2((" time exceeded\n"));
+ }
+
+
+ /*
+ * Is the inner (failed) datagram one of our pings?
+ */
+
+ ip_addr_copy(target_ip, oiph->dest); /* inner (failed) */
+ target_mapped = pxremap_inbound_ip4(&target_ip, &target_ip);
+ if (target_mapped == PXREMAP_FAILED) {
+ return;
+ }
+
+ sys_mutex_lock(&pxping->lock);
+ pcb = pxping_pcb_for_reply(pxping, 0, ip_2_ipX(&target_ip), id);
+ if (pcb == NULL) {
+ sys_mutex_unlock(&pxping->lock);
+ DPRINTF2(("%s: no match\n", __func__));
+ return;
+ }
+
+ DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb));
+
+ /* save info before unlocking since pcb may expire */
+ ip_addr_copy(guest_ip, *ipX_2_ip(&pcb->src));
+ guest_id = pcb->guest_id;
+
+ sys_mutex_unlock(&pxping->lock);
+
+
+ /*
+ * Rewrite both inner and outer headers and forward to guest.
+ * Note that the checksum of the outer ICMP error message is
+ * preserved by the changes we do to inner headers.
+ */
+
+ ip_addr_copy(error_ip, iph->src); /* node that reports the error */
+ error_mapped = pxremap_inbound_ip4(&error_ip, &error_ip);
+ if (error_mapped == PXREMAP_FAILED) {
+ return;
+ }
+ if (error_mapped == PXREMAP_ASIS && IPH_TTL(iph) == 1) {
+ DPRINTF2(("%s: dropping packet with ttl 1\n", __func__));
+ return;
+ }
+
+ /* rewrite inner ICMP echo header */
+ sum = (u16_t)~oicmph->chksum;
+ sum += chksum_update_16(&oicmph->id, guest_id);
+ sum = FOLD_U32T(sum);
+ oicmph->chksum = ~sum;
+
+ /* rewrite inner IP header */
+#if defined(RT_OS_DARWIN)
+ /* darwin converts inner length to host byte order too */
+ IPH_LEN_SET(oiph, htons(IPH_LEN(oiph)));
+#endif
+ sum = (u16_t)~IPH_CHKSUM(oiph);
+ sum += chksum_update_32((u32_t *)&oiph->src, ip4_addr_get_u32(&guest_ip));
+ if (target_mapped == PXREMAP_MAPPED) {
+ sum += chksum_update_32((u32_t *)&oiph->dest, ip4_addr_get_u32(&target_ip));
+ }
+ sum = FOLD_U32T(sum);
+ IPH_CHKSUM_SET(oiph, ~sum);
+
+ /* rewrite outer IP header */
+ sum = (u16_t)~IPH_CHKSUM(iph);
+ sum += chksum_update_32((u32_t *)&iph->dest, ip4_addr_get_u32(&guest_ip));
+ if (error_mapped == PXREMAP_MAPPED) {
+ sum += chksum_update_32((u32_t *)&iph->src, ip4_addr_get_u32(&error_ip));
+ }
+ else {
+ IPH_TTL_SET(iph, IPH_TTL(iph) - 1);
+ sum += PP_NTOHS(~0x0100);
+ }
+ sum = FOLD_U32T(sum);
+ IPH_CHKSUM_SET(iph, ~sum);
+
+ pxping_pmgr_forward_inbound(pxping, iplen);
+}
+
+
+/**
+ * Process incoming ICMPv6 message for the host.
+ * NB: we will get a lot of spam here and have to sift through it.
+ */
+static void
+pxping_pmgr_icmp6(struct pxping *pxping)
+{
+#ifndef RT_OS_WINDOWS
+ struct msghdr mh;
+ ssize_t nread;
+#else
+ WSAMSG mh;
+ DWORD nread;
+#endif
+ IOVEC iov[1];
+ static u8_t cmsgbuf[128];
+ struct cmsghdr *cmh;
+ struct sockaddr_in6 sin6;
+ /* socklen_t salen = sizeof(sin6); - unused */
+ struct icmp6_echo_hdr *icmph;
+ struct in6_pktinfo *pktinfo;
+ int hopl, tclass;
+#ifdef RT_OS_WINDOWS
+ int status;
+#endif
+
+ /*
+ * Reads from raw IPv6 sockets deliver only the payload. Full
+ * headers are available via recvmsg(2)/cmsg(3).
+ */
+ IOVEC_SET_BASE(iov[0], pollmgr_udpbuf);
+ IOVEC_SET_LEN(iov[0], sizeof(pollmgr_udpbuf));
+
+ memset(&mh, 0, sizeof(mh));
+#ifndef RT_OS_WINDOWS
+ mh.msg_name = &sin6;
+ mh.msg_namelen = sizeof(sin6);
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+ mh.msg_control = cmsgbuf;
+ mh.msg_controllen = sizeof(cmsgbuf);
+ mh.msg_flags = 0;
+
+ nread = recvmsg(pxping->sock6, &mh, 0);
+ if (nread < 0) {
+ DPRINTF(("%s: %R[sockerr]\n", __func__, SOCKERRNO()));
+ return;
+ }
+#else /* RT_OS_WINDOWS */
+ mh.name = (LPSOCKADDR)&sin6;
+ mh.namelen = sizeof(sin6);
+ mh.lpBuffers = iov;
+ mh.dwBufferCount = 1;
+ mh.Control.buf = cmsgbuf;
+ mh.Control.len = sizeof(cmsgbuf);
+ mh.dwFlags = 0;
+
+ status = (*pxping->pfWSARecvMsg6)(pxping->sock6, &mh, &nread, NULL, NULL);
+ if (status == SOCKET_ERROR) {
+ DPRINTF2(("%s: error %d\n", __func__, WSAGetLastError()));
+ return;
+ }
+#endif
+
+ icmph = (struct icmp6_echo_hdr *)pollmgr_udpbuf;
+
+ DPRINTF2(("%s: %RTnaipv6 ICMPv6: ", __func__, &sin6.sin6_addr));
+
+ if (icmph->type == ICMP6_TYPE_EREP) {
+ DPRINTF2(("echo reply %04x %u\n",
+ (unsigned int)icmph->id, (unsigned int)icmph->seqno));
+ }
+ else { /* XXX */
+ if (icmph->type == ICMP6_TYPE_EREQ) {
+ DPRINTF2(("echo request %04x %u\n",
+ (unsigned int)icmph->id, (unsigned int)icmph->seqno));
+ }
+ else if (icmph->type == ICMP6_TYPE_DUR) {
+ DPRINTF2(("destination unreachable\n"));
+ }
+ else if (icmph->type == ICMP6_TYPE_PTB) {
+ DPRINTF2(("packet too big\n"));
+ }
+ else if (icmph->type == ICMP6_TYPE_TE) {
+ DPRINTF2(("time exceeded\n"));
+ }
+ else if (icmph->type == ICMP6_TYPE_PP) {
+ DPRINTF2(("parameter problem\n"));
+ }
+ else {
+ DPRINTF2(("type %d len %u\n", icmph->type, (unsigned int)nread));
+ }
+
+ if (icmph->type >= ICMP6_TYPE_EREQ) {
+ return; /* informational message */
+ }
+ }
+
+ pktinfo = NULL;
+ hopl = -1;
+ tclass = -1;
+ for (cmh = CMSG_FIRSTHDR(&mh); cmh != NULL; cmh = CMSG_NXTHDR(&mh, cmh)) {
+ if (cmh->cmsg_len == 0)
+ break;
+
+ if (cmh->cmsg_level == IPPROTO_IPV6
+ && cmh->cmsg_type == IPV6_HOPLIMIT
+ && cmh->cmsg_len == CMSG_LEN(sizeof(int)))
+ {
+ hopl = *(int *)CMSG_DATA(cmh);
+ DPRINTF2(("hoplimit = %d\n", hopl));
+ }
+
+ if (cmh->cmsg_level == IPPROTO_IPV6
+ && cmh->cmsg_type == IPV6_PKTINFO
+ && cmh->cmsg_len == CMSG_LEN(sizeof(struct in6_pktinfo)))
+ {
+ pktinfo = (struct in6_pktinfo *)CMSG_DATA(cmh);
+ DPRINTF2(("pktinfo found\n"));
+ }
+ }
+
+ if (pktinfo == NULL) {
+ /*
+ * ip6_output_if() doesn't do checksum for us so we need to
+ * manually recompute it - for this we must know the
+ * destination address of the pseudo-header that we will
+ * rewrite with guest's address. (TODO: yeah, yeah, we can
+ * compute it from scratch...)
+ */
+ DPRINTF2(("%s: unable to get pktinfo\n", __func__));
+ return;
+ }
+
+ if (hopl < 0) {
+ hopl = LWIP_ICMP6_HL;
+ }
+
+ if (icmph->type == ICMP6_TYPE_EREP) {
+ pxping_pmgr_icmp6_echo(pxping,
+ (ip6_addr_t *)&sin6.sin6_addr,
+ (ip6_addr_t *)&pktinfo->ipi6_addr,
+ hopl, tclass, (u16_t)nread);
+ }
+ else if (icmph->type < ICMP6_TYPE_EREQ) {
+ pxping_pmgr_icmp6_error(pxping,
+ (ip6_addr_t *)&sin6.sin6_addr,
+ (ip6_addr_t *)&pktinfo->ipi6_addr,
+ hopl, tclass, (u16_t)nread);
+ }
+}
+
+
+/**
+ * Check if this incoming ICMPv6 echo reply is for one of our pings
+ * and forward it to the guest.
+ */
+static void
+pxping_pmgr_icmp6_echo(struct pxping *pxping,
+ ip6_addr_t *src, ip6_addr_t *dst,
+ int hopl, int tclass, u16_t icmplen)
+{
+ struct icmp6_echo_hdr *icmph;
+ ip6_addr_t guest_ip, target_ip;
+ int mapped;
+ struct ping_pcb *pcb;
+ u16_t id, guest_id;
+ u32_t sum;
+
+ ip6_addr_copy(target_ip, *src);
+ mapped = pxremap_inbound_ip6(&target_ip, &target_ip);
+ if (mapped == PXREMAP_FAILED) {
+ return;
+ }
+ else if (mapped == PXREMAP_ASIS) {
+ if (hopl == 1) {
+ DPRINTF2(("%s: dropping packet with ttl 1\n", __func__));
+ return;
+ }
+ --hopl;
+ }
+
+ icmph = (struct icmp6_echo_hdr *)pollmgr_udpbuf;
+ id = icmph->id;
+
+ sys_mutex_lock(&pxping->lock);
+ pcb = pxping_pcb_for_reply(pxping, 1, ip6_2_ipX(&target_ip), id);
+ if (pcb == NULL) {
+ sys_mutex_unlock(&pxping->lock);
+ DPRINTF2(("%s: no match\n", __func__));
+ return;
+ }
+
+ DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb));
+
+ /* save info before unlocking since pcb may expire */
+ ip6_addr_copy(guest_ip, *ipX_2_ip6(&pcb->src));
+ guest_id = pcb->guest_id;
+
+ sys_mutex_unlock(&pxping->lock);
+
+ /* rewrite ICMPv6 echo header */
+ sum = (u16_t)~icmph->chksum;
+ sum += chksum_update_16(&icmph->id, guest_id);
+ sum += chksum_delta_ipv6(dst, &guest_ip); /* pseudo */
+ if (mapped) {
+ sum += chksum_delta_ipv6(src, &target_ip); /* pseudo */
+ }
+ sum = FOLD_U32T(sum);
+ icmph->chksum = ~sum;
+
+ pxping_pmgr_forward_inbound6(pxping,
+ &target_ip, /* echo reply src */
+ &guest_ip, /* echo reply dst */
+ hopl, tclass, icmplen);
+}
+
+
+/**
+ * Check if this incoming ICMPv6 error is about one of our pings and
+ * forward it to the guest.
+ */
+static void
+pxping_pmgr_icmp6_error(struct pxping *pxping,
+ ip6_addr_t *src, ip6_addr_t *dst,
+ int hopl, int tclass, u16_t icmplen)
+{
+ struct icmp6_hdr *icmph;
+ u8_t *bufptr;
+ size_t buflen, hlen;
+ int proto;
+ struct ip6_hdr *oiph;
+ struct icmp6_echo_hdr *oicmph;
+ struct ping_pcb *pcb;
+ ip6_addr_t guest_ip, target_ip, error_ip;
+ int target_mapped, error_mapped;
+ u16_t guest_id;
+ u32_t sum;
+
+ icmph = (struct icmp6_hdr *)pollmgr_udpbuf;
+
+ /*
+ * Inner IP datagram is not checked by the kernel and may be
+ * anything, possibly malicious.
+ */
+ oiph = NULL;
+ oicmph = NULL;
+
+ bufptr = pollmgr_udpbuf;
+ buflen = icmplen;
+
+ hlen = sizeof(*icmph);
+ proto = IP6_NEXTH_ENCAPS; /* i.e. IPv6, lwIP's name is unfortuate */
+ for (;;) {
+ if (hlen > buflen) {
+ DPRINTF2(("truncated datagram inside ICMPv6 error message is too short\n"));
+ return;
+ }
+ buflen -= hlen;
+ bufptr += hlen;
+
+ if (proto == IP6_NEXTH_ENCAPS && oiph == NULL) { /* outermost IPv6 */
+ oiph = (struct ip6_hdr *)bufptr;
+ if (IP6H_V(oiph) != 6) {
+ DPRINTF2(("%s: unexpected IP version %d\n", __func__, IP6H_V(oiph)));
+ return;
+ }
+
+ proto = IP6H_NEXTH(oiph);
+ hlen = IP6_HLEN;
+ }
+ else if (proto == IP6_NEXTH_ICMP6) {
+ oicmph = (struct icmp6_echo_hdr *)bufptr;
+ break;
+ }
+ else if (proto == IP6_NEXTH_ROUTING
+ || proto == IP6_NEXTH_HOPBYHOP
+ || proto == IP6_NEXTH_DESTOPTS)
+ {
+ proto = bufptr[0];
+ hlen = (bufptr[1] + 1) * 8;
+ }
+ else {
+ DPRINTF2(("%s: stopping at protocol %d\n", __func__, proto));
+ break;
+ }
+ }
+
+ if (oiph == NULL || oicmph == NULL) {
+ return;
+ }
+
+ if (buflen < sizeof(*oicmph)) {
+ DPRINTF2(("%s: original ICMPv6 is truncated too short\n", __func__));
+ return;
+ }
+
+ if (oicmph->type != ICMP6_TYPE_EREQ) {
+ DPRINTF2(("%s: ignoring original ICMPv6 type %d\n", __func__, oicmph->type));
+ return;
+ }
+
+ ip6_addr_copy(target_ip, oiph->dest); /* inner (failed) */
+ target_mapped = pxremap_inbound_ip6(&target_ip, &target_ip);
+ if (target_mapped == PXREMAP_FAILED) {
+ return;
+ }
+
+ sys_mutex_lock(&pxping->lock);
+ pcb = pxping_pcb_for_reply(pxping, 1, ip6_2_ipX(&target_ip), oicmph->id);
+ if (pcb == NULL) {
+ sys_mutex_unlock(&pxping->lock);
+ DPRINTF2(("%s: no match\n", __func__));
+ return;
+ }
+
+ DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb));
+
+ /* save info before unlocking since pcb may expire */
+ ip6_addr_copy(guest_ip, *ipX_2_ip6(&pcb->src));
+ guest_id = pcb->guest_id;
+
+ sys_mutex_unlock(&pxping->lock);
+
+
+ /*
+ * Rewrite inner and outer headers and forward to guest. Note
+ * that IPv6 has no IP header checksum, but uses pseudo-header for
+ * ICMPv6, so we update both in one go, adjusting ICMPv6 checksum
+ * as we rewrite IP header.
+ */
+
+ ip6_addr_copy(error_ip, *src); /* node that reports the error */
+ error_mapped = pxremap_inbound_ip6(&error_ip, &error_ip);
+ if (error_mapped == PXREMAP_FAILED) {
+ return;
+ }
+ if (error_mapped == PXREMAP_ASIS && hopl == 1) {
+ DPRINTF2(("%s: dropping packet with ttl 1\n", __func__));
+ return;
+ }
+
+ /* rewrite inner ICMPv6 echo header and inner IPv6 header */
+ sum = (u16_t)~oicmph->chksum;
+ sum += chksum_update_16(&oicmph->id, guest_id);
+ sum += chksum_update_ipv6((ip6_addr_t *)&oiph->src, &guest_ip);
+ if (target_mapped) {
+ sum += chksum_delta_ipv6((ip6_addr_t *)&oiph->dest, &target_ip);
+ }
+ sum = FOLD_U32T(sum);
+ oicmph->chksum = ~sum;
+
+ /* rewrite outer ICMPv6 error header */
+ sum = (u16_t)~icmph->chksum;
+ sum += chksum_delta_ipv6(dst, &guest_ip); /* pseudo */
+ if (error_mapped) {
+ sum += chksum_delta_ipv6(src, &error_ip); /* pseudo */
+ }
+ sum = FOLD_U32T(sum);
+ icmph->chksum = ~sum;
+
+ pxping_pmgr_forward_inbound6(pxping,
+ &error_ip, /* error src */
+ &guest_ip, /* error dst */
+ hopl, tclass, icmplen);
+}
+
+
+/**
+ * Hand off ICMP datagram to the lwip thread where it will be
+ * forwarded to the guest.
+ *
+ * We no longer need ping_pcb. The pcb may get expired on the lwip
+ * thread, but we have already patched necessary information into the
+ * datagram.
+ */
+static void
+pxping_pmgr_forward_inbound(struct pxping *pxping, u16_t iplen)
+{
+ struct pbuf *p;
+ struct ping_msg *msg;
+ err_t error;
+
+ p = pbuf_alloc(PBUF_LINK, iplen, PBUF_RAM);
+ if (p == NULL) {
+ DPRINTF(("%s: pbuf_alloc(%d) failed\n",
+ __func__, (unsigned int)iplen));
+ return;
+ }
+
+ error = pbuf_take(p, pollmgr_udpbuf, iplen);
+ if (error != ERR_OK) {
+ DPRINTF(("%s: pbuf_take(%d) failed\n",
+ __func__, (unsigned int)iplen));
+ pbuf_free(p);
+ return;
+ }
+
+ msg = (struct ping_msg *)malloc(sizeof(*msg));
+ if (msg == NULL) {
+ pbuf_free(p);
+ return;
+ }
+
+ msg->msg.type = TCPIP_MSG_CALLBACK_STATIC;
+ msg->msg.sem = NULL;
+ msg->msg.msg.cb.function = pxping_pcb_forward_inbound;
+ msg->msg.msg.cb.ctx = (void *)msg;
+
+ msg->pxping = pxping;
+ msg->p = p;
+
+ proxy_lwip_post(&msg->msg);
+}
+
+
+static void
+pxping_pcb_forward_inbound(void *arg)
+{
+ struct ping_msg *msg = (struct ping_msg *)arg;
+ err_t error;
+
+ LWIP_ASSERT1(msg != NULL);
+ LWIP_ASSERT1(msg->pxping != NULL);
+ LWIP_ASSERT1(msg->p != NULL);
+
+ error = ip_raw_output_if(msg->p, msg->pxping->netif);
+ if (error != ERR_OK) {
+ DPRINTF(("%s: ip_output_if: %s\n",
+ __func__, proxy_lwip_strerr(error)));
+ }
+ pbuf_free(msg->p);
+ free(msg);
+}
+
+
+static void
+pxping_pmgr_forward_inbound6(struct pxping *pxping,
+ ip6_addr_t *src, ip6_addr_t *dst,
+ u8_t hopl, u8_t tclass,
+ u16_t icmplen)
+{
+ struct pbuf *p;
+ struct ping6_msg *msg;
+
+ err_t error;
+
+ p = pbuf_alloc(PBUF_IP, icmplen, PBUF_RAM);
+ if (p == NULL) {
+ DPRINTF(("%s: pbuf_alloc(%d) failed\n",
+ __func__, (unsigned int)icmplen));
+ return;
+ }
+
+ error = pbuf_take(p, pollmgr_udpbuf, icmplen);
+ if (error != ERR_OK) {
+ DPRINTF(("%s: pbuf_take(%d) failed\n",
+ __func__, (unsigned int)icmplen));
+ pbuf_free(p);
+ return;
+ }
+
+ msg = (struct ping6_msg *)malloc(sizeof(*msg));
+ if (msg == NULL) {
+ pbuf_free(p);
+ return;
+ }
+
+ msg->msg.type = TCPIP_MSG_CALLBACK_STATIC;
+ msg->msg.sem = NULL;
+ msg->msg.msg.cb.function = pxping_pcb_forward_inbound6;
+ msg->msg.msg.cb.ctx = (void *)msg;
+
+ msg->pxping = pxping;
+ msg->p = p;
+ ip6_addr_copy(msg->src, *src);
+ ip6_addr_copy(msg->dst, *dst);
+ msg->hopl = hopl;
+ msg->tclass = tclass;
+
+ proxy_lwip_post(&msg->msg);
+}
+
+
+static void
+pxping_pcb_forward_inbound6(void *arg)
+{
+ struct ping6_msg *msg = (struct ping6_msg *)arg;
+ err_t error;
+
+ LWIP_ASSERT1(msg != NULL);
+ LWIP_ASSERT1(msg->pxping != NULL);
+ LWIP_ASSERT1(msg->p != NULL);
+
+ error = ip6_output_if(msg->p,
+ &msg->src, &msg->dst, msg->hopl, msg->tclass,
+ IP6_NEXTH_ICMP6, msg->pxping->netif);
+ if (error != ERR_OK) {
+ DPRINTF(("%s: ip6_output_if: %s\n",
+ __func__, proxy_lwip_strerr(error)));
+ }
+ pbuf_free(msg->p);
+ free(msg);
+}
diff --git a/src/VBox/NetworkServices/NAT/pxping_win.c b/src/VBox/NetworkServices/NAT/pxping_win.c
new file mode 100644
index 00000000..95a75dc0
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/pxping_win.c
@@ -0,0 +1,662 @@
+/* $Id: pxping_win.c $ */
+/** @file
+ * NAT Network - ping proxy, Windows ICMP API version.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+#include "proxy.h"
+#include "pxremap.h"
+
+#include "lwip/ip.h"
+#include "lwip/icmp.h"
+#include "lwip/inet_chksum.h"
+
+/* XXX: lwIP names conflict with winsock <iphlpapi.h> */
+#undef IP_STATS
+#undef ICMP_STATS
+#undef TCP_STATS
+#undef UDP_STATS
+#undef IP6_STATS
+
+#include <winternl.h> /* for PIO_APC_ROUTINE &c */
+#ifndef PIO_APC_ROUTINE_DEFINED
+# define PIO_APC_ROUTINE_DEFINED 1
+#endif
+#include <iprt/win/iphlpapi.h>
+#include <icmpapi.h>
+
+#include <stdio.h>
+
+
+struct pxping {
+ /*
+ * We use single ICMP handle for all pings. This means that all
+ * proxied pings will have the same id and share single sequence
+ * of sequence numbers.
+ */
+ HANDLE hdl4;
+ HANDLE hdl6;
+
+ struct netif *netif;
+
+ /*
+ * On Windows XP and Windows Server 2003 IcmpSendEcho2() callback
+ * is FARPROC, but starting from Vista it's PIO_APC_ROUTINE with
+ * two extra arguments. Callbacks use WINAPI (stdcall) calling
+ * convention with callee responsible for popping the arguments,
+ * so to avoid stack corruption we check windows version at run
+ * time and provide correct callback.
+ */
+ PIO_APC_ROUTINE pfnCallback4;
+ PIO_APC_ROUTINE pfnCallback6;
+};
+
+
+struct pong4 {
+ struct netif *netif;
+
+ struct ip_hdr reqiph;
+ struct icmp_echo_hdr reqicmph;
+
+ size_t bufsize;
+ u8_t buf[1];
+};
+
+
+struct pong6 {
+ struct netif *netif;
+
+ ip6_addr_t reqsrc;
+ struct icmp6_echo_hdr reqicmph;
+ size_t reqsize;
+
+ size_t bufsize;
+ u8_t buf[1];
+};
+
+
+static void pxping_recv4(void *arg, struct pbuf *p);
+static void pxping_recv6(void *arg, struct pbuf *p);
+
+static VOID WINAPI pxping_icmp4_callback_old(void *);
+static VOID WINAPI pxping_icmp4_callback_apc(void *, PIO_STATUS_BLOCK, ULONG);
+static void pxping_icmp4_callback(struct pong4 *pong);
+
+static VOID WINAPI pxping_icmp6_callback_old(void *);
+static VOID WINAPI pxping_icmp6_callback_apc(void *, PIO_STATUS_BLOCK, ULONG);
+static void pxping_icmp6_callback(struct pong6 *pong);
+
+
+struct pxping g_pxping;
+
+
+err_t
+pxping_init(struct netif *netif, SOCKET sock4, SOCKET sock6)
+{
+ OSVERSIONINFO osvi;
+ int status;
+
+ LWIP_UNUSED_ARG(sock4);
+ LWIP_UNUSED_ARG(sock6);
+
+ ZeroMemory(&osvi, sizeof(OSVERSIONINFO));
+ osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+ status = GetVersionEx(&osvi);
+ if (status == 0) {
+ return ERR_ARG;
+ }
+
+ if (osvi.dwMajorVersion >= 6) {
+ g_pxping.pfnCallback4 = pxping_icmp4_callback_apc;
+ g_pxping.pfnCallback6 = pxping_icmp6_callback_apc;
+ }
+ else {
+ g_pxping.pfnCallback4 = (PIO_APC_ROUTINE)pxping_icmp4_callback_old;
+ g_pxping.pfnCallback6 = (PIO_APC_ROUTINE)pxping_icmp6_callback_old;
+ }
+
+
+ g_pxping.hdl4 = IcmpCreateFile();
+ if (g_pxping.hdl4 != INVALID_HANDLE_VALUE) {
+ ping_proxy_accept(pxping_recv4, &g_pxping);
+ }
+ else {
+ DPRINTF(("IcmpCreateFile: error %d\n", GetLastError()));
+ }
+
+ g_pxping.hdl6 = Icmp6CreateFile();
+ if (g_pxping.hdl6 != INVALID_HANDLE_VALUE) {
+ ping6_proxy_accept(pxping_recv6, &g_pxping);
+ }
+ else {
+ DPRINTF(("Icmp6CreateFile: error %d\n", GetLastError()));
+ }
+
+ if (g_pxping.hdl4 == INVALID_HANDLE_VALUE
+ && g_pxping.hdl6 == INVALID_HANDLE_VALUE)
+ {
+ return ERR_ARG;
+ }
+
+ g_pxping.netif = netif;
+
+ return ERR_OK;
+}
+
+
+/**
+ * ICMP Echo Request in pbuf "p" is to be proxied.
+ */
+static void
+pxping_recv4(void *arg, struct pbuf *p)
+{
+ struct pxping *pxping = (struct pxping *)arg;
+ const struct ip_hdr *iph;
+ const struct icmp_echo_hdr *icmph;
+ u16_t iphlen;
+ size_t bufsize;
+ struct pong4 *pong;
+ IPAddr dst;
+ int mapped;
+ int ttl;
+ IP_OPTION_INFORMATION opts;
+ void *reqdata;
+ size_t reqsize;
+ int status;
+
+ pong = NULL;
+
+ iphlen = ip_current_header_tot_len();
+ if (RT_UNLIKELY(iphlen != IP_HLEN)) { /* we don't do options */
+ goto out;
+ }
+
+ iph = (const struct ip_hdr *)ip_current_header();
+ icmph = (const struct icmp_echo_hdr *)p->payload;
+
+ mapped = pxremap_outbound_ip4((ip_addr_t *)&dst, (ip_addr_t *)&iph->dest);
+ if (RT_UNLIKELY(mapped == PXREMAP_FAILED)) {
+ goto out;
+ }
+
+ ttl = IPH_TTL(iph);
+ if (mapped == PXREMAP_ASIS) {
+ if (RT_UNLIKELY(ttl == 1)) {
+ status = pbuf_header(p, iphlen); /* back to IP header */
+ if (RT_LIKELY(status == 0)) {
+ icmp_time_exceeded(p, ICMP_TE_TTL);
+ }
+ goto out;
+ }
+ --ttl;
+ }
+
+ status = pbuf_header(p, -(u16_t)sizeof(*icmph)); /* to ping payload */
+ if (RT_UNLIKELY(status != 0)) {
+ goto out;
+ }
+
+ bufsize = sizeof(ICMP_ECHO_REPLY);
+ if (p->tot_len < sizeof(IO_STATUS_BLOCK) + sizeof(struct icmp_echo_hdr))
+ bufsize += sizeof(IO_STATUS_BLOCK) + sizeof(struct icmp_echo_hdr);
+ else
+ bufsize += p->tot_len;
+ bufsize += 16; /* whatever that is; empirically at least XP needs it */
+
+ pong = (struct pong4 *)malloc(RT_UOFFSETOF(struct pong4, buf) + bufsize);
+ if (RT_UNLIKELY(pong == NULL)) {
+ goto out;
+ }
+ pong->bufsize = bufsize;
+ pong->netif = pxping->netif;
+
+ memcpy(&pong->reqiph, iph, sizeof(*iph));
+ memcpy(&pong->reqicmph, icmph, sizeof(*icmph));
+
+ reqsize = p->tot_len;
+ if (p->next == NULL) {
+ /* single pbuf can be directly used as request data source */
+ reqdata = p->payload;
+ }
+ else {
+ /* data from pbuf chain must be concatenated */
+ pbuf_copy_partial(p, pong->buf, p->tot_len, 0);
+ reqdata = pong->buf;
+ }
+
+ opts.Ttl = ttl;
+ opts.Tos = IPH_TOS(iph); /* affected by DisableUserTOSSetting key */
+ opts.Flags = (IPH_OFFSET(iph) & PP_HTONS(IP_DF)) != 0 ? IP_FLAG_DF : 0;
+ opts.OptionsSize = 0;
+ opts.OptionsData = 0;
+
+ status = IcmpSendEcho2(pxping->hdl4, NULL,
+ pxping->pfnCallback4, pong,
+ dst, reqdata, (WORD)reqsize, &opts,
+ pong->buf, (DWORD)pong->bufsize,
+ 5 * 1000 /* ms */);
+
+ if (RT_UNLIKELY(status != 0)) {
+ DPRINTF(("IcmpSendEcho2: unexpected status %d\n", status));
+ goto out;
+ }
+ if ((status = GetLastError()) != ERROR_IO_PENDING) {
+ int code;
+
+ DPRINTF(("IcmpSendEcho2: error %d\n", status));
+ switch (status) {
+ case ERROR_NETWORK_UNREACHABLE:
+ code = ICMP_DUR_NET;
+ break;
+ case ERROR_HOST_UNREACHABLE:
+ code = ICMP_DUR_HOST;
+ break;
+ default:
+ code = -1;
+ break;
+ }
+
+ if (code != -1) {
+ /* move payload back to IP header */
+ status = pbuf_header(p, (u16_t)(sizeof(*icmph) + iphlen));
+ if (RT_LIKELY(status == 0)) {
+ icmp_dest_unreach(p, code);
+ }
+ }
+ goto out;
+ }
+
+ pong = NULL; /* callback owns it now */
+ out:
+ if (pong != NULL) {
+ free(pong);
+ }
+ pbuf_free(p);
+}
+
+
+static VOID WINAPI
+pxping_icmp4_callback_apc(void *ctx, PIO_STATUS_BLOCK iob, ULONG reserved)
+{
+ struct pong4 *pong = (struct pong4 *)ctx;
+ LWIP_UNUSED_ARG(iob);
+ LWIP_UNUSED_ARG(reserved);
+
+ if (pong != NULL) {
+ pxping_icmp4_callback(pong);
+ free(pong);
+ }
+}
+
+
+static VOID WINAPI
+pxping_icmp4_callback_old(void *ctx)
+{
+ struct pong4 *pong = (struct pong4 *)ctx;
+
+ if (pong != NULL) {
+ pxping_icmp4_callback(pong);
+ free(pong);
+ }
+}
+
+
+static void
+pxping_icmp4_callback(struct pong4 *pong)
+{
+ ICMP_ECHO_REPLY *reply;
+ DWORD nreplies;
+ size_t icmplen;
+ struct pbuf *p;
+ struct icmp_echo_hdr *icmph;
+ ip_addr_t src;
+ int mapped;
+
+ nreplies = IcmpParseReplies(pong->buf, (DWORD)pong->bufsize);
+ if (nreplies == 0) {
+ DWORD error = GetLastError();
+ if (error == IP_REQ_TIMED_OUT) {
+ DPRINTF2(("pong4: %p timed out\n", (void *)pong));
+ }
+ else {
+ DPRINTF(("pong4: %p: IcmpParseReplies: error %d\n",
+ (void *)pong, error));
+ }
+ return;
+ }
+
+ reply = (ICMP_ECHO_REPLY *)pong->buf;
+
+ if (reply->Options.OptionsSize != 0) { /* don't do options */
+ return;
+ }
+
+ mapped = pxremap_inbound_ip4(&src, (ip_addr_t *)&reply->Address);
+ if (mapped == PXREMAP_FAILED) {
+ return;
+ }
+ if (mapped == PXREMAP_ASIS) {
+ if (reply->Options.Ttl == 1) {
+ return;
+ }
+ --reply->Options.Ttl;
+ }
+
+ if (reply->Status == IP_SUCCESS) {
+ icmplen = sizeof(struct icmp_echo_hdr) + reply->DataSize;
+ if ((reply->Options.Flags & IP_FLAG_DF) != 0
+ && IP_HLEN + icmplen > pong->netif->mtu)
+ {
+ return;
+ }
+
+ p = pbuf_alloc(PBUF_IP, (u16_t)icmplen, PBUF_RAM);
+ if (RT_UNLIKELY(p == NULL)) {
+ return;
+ }
+
+ icmph = (struct icmp_echo_hdr *)p->payload;
+ icmph->type = ICMP_ER;
+ icmph->code = 0;
+ icmph->chksum = 0;
+ icmph->id = pong->reqicmph.id;
+ icmph->seqno = pong->reqicmph.seqno;
+
+ memcpy((u8_t *)p->payload + sizeof(*icmph),
+ reply->Data, reply->DataSize);
+ }
+ else {
+ u8_t type, code;
+
+ switch (reply->Status) {
+ case IP_DEST_NET_UNREACHABLE:
+ type = ICMP_DUR; code = ICMP_DUR_NET;
+ break;
+ case IP_DEST_HOST_UNREACHABLE:
+ type = ICMP_DUR; code = ICMP_DUR_HOST;
+ break;
+ case IP_DEST_PROT_UNREACHABLE:
+ type = ICMP_DUR; code = ICMP_DUR_PROTO;
+ break;
+ case IP_PACKET_TOO_BIG:
+ type = ICMP_DUR; code = ICMP_DUR_FRAG;
+ break;
+ case IP_SOURCE_QUENCH:
+ type = ICMP_SQ; code = 0;
+ break;
+ case IP_TTL_EXPIRED_TRANSIT:
+ type = ICMP_TE; code = ICMP_TE_TTL;
+ break;
+ case IP_TTL_EXPIRED_REASSEM:
+ type = ICMP_TE; code = ICMP_TE_FRAG;
+ break;
+ default:
+ DPRINTF(("pong4: reply status %d, dropped\n", reply->Status));
+ return;
+ }
+
+ DPRINTF(("pong4: reply status %d -> type %d/code %d\n",
+ reply->Status, type, code));
+
+ icmplen = sizeof(*icmph) + sizeof(pong->reqiph) + sizeof(pong->reqicmph);
+
+ p = pbuf_alloc(PBUF_IP, (u16_t)icmplen, PBUF_RAM);
+ if (RT_UNLIKELY(p == NULL)) {
+ return;
+ }
+
+ icmph = (struct icmp_echo_hdr *)p->payload;
+ icmph->type = type;
+ icmph->code = code;
+ icmph->chksum = 0;
+ icmph->id = 0;
+ icmph->seqno = 0;
+
+ /*
+ * XXX: we don't know the TTL of the request at the time this
+ * ICMP error was generated (we can guess it was 1 for ttl
+ * exceeded, but don't bother faking it).
+ */
+ memcpy((u8_t *)p->payload + sizeof(*icmph),
+ &pong->reqiph, sizeof(pong->reqiph));
+
+ memcpy((u8_t *)p->payload + sizeof(*icmph) + sizeof(pong->reqiph),
+ &pong->reqicmph, sizeof(pong->reqicmph));
+ }
+
+ icmph->chksum = inet_chksum(p->payload, (u16_t)icmplen);
+ ip_output_if(p, &src,
+ (ip_addr_t *)&pong->reqiph.src, /* dst */
+ reply->Options.Ttl,
+ reply->Options.Tos,
+ IPPROTO_ICMP,
+ pong->netif);
+ pbuf_free(p);
+}
+
+
+static void
+pxping_recv6(void *arg, struct pbuf *p)
+{
+ struct pxping *pxping = (struct pxping *)arg;
+ struct icmp6_echo_hdr *icmph;
+ size_t bufsize;
+ struct pong6 *pong;
+ int mapped;
+ void *reqdata;
+ size_t reqsize;
+ struct sockaddr_in6 src, dst;
+ int hopl;
+ IP_OPTION_INFORMATION opts;
+ int status;
+
+ pong = NULL;
+
+ icmph = (struct icmp6_echo_hdr *)p->payload;
+
+ memset(&dst, 0, sizeof(dst));
+ dst.sin6_family = AF_INET6;
+ mapped = pxremap_outbound_ip6((ip6_addr_t *)&dst.sin6_addr,
+ ip6_current_dest_addr());
+ if (RT_UNLIKELY(mapped == PXREMAP_FAILED)) {
+ goto out;
+ }
+
+ hopl = IP6H_HOPLIM(ip6_current_header());
+ if (mapped == PXREMAP_ASIS) {
+ if (RT_UNLIKELY(hopl == 1)) {
+ status = pbuf_header(p, ip_current_header_tot_len());
+ if (RT_LIKELY(status == 0)) {
+ icmp6_time_exceeded(p, ICMP6_TE_HL);
+ }
+ goto out;
+ }
+ --hopl;
+ }
+
+ status = pbuf_header(p, -(u16_t)sizeof(*icmph)); /* to ping payload */
+ if (RT_UNLIKELY(status != 0)) {
+ goto out;
+ }
+
+ /* XXX: parrotted from IPv4 version, not tested all os version/bitness */
+ bufsize = sizeof(ICMPV6_ECHO_REPLY);
+ if (p->tot_len < sizeof(IO_STATUS_BLOCK) + sizeof(struct icmp6_echo_hdr))
+ bufsize += sizeof(IO_STATUS_BLOCK) + sizeof(struct icmp6_echo_hdr);
+ else
+ bufsize += p->tot_len;
+ bufsize += 16;
+
+ pong = (struct pong6 *)malloc(RT_UOFFSETOF(struct pong6, buf) + bufsize);
+ if (RT_UNLIKELY(pong == NULL)) {
+ goto out;
+ }
+ pong->bufsize = bufsize;
+ pong->netif = pxping->netif;
+
+ ip6_addr_copy(pong->reqsrc, *ip6_current_src_addr());
+ memcpy(&pong->reqicmph, icmph, sizeof(*icmph));
+
+ memset(pong->buf, 0xa5, pong->bufsize);
+
+ pong->reqsize = reqsize = p->tot_len;
+ if (p->next == NULL) {
+ /* single pbuf can be directly used as request data source */
+ reqdata = p->payload;
+ }
+ else {
+ /* data from pbuf chain must be concatenated */
+ pbuf_copy_partial(p, pong->buf, p->tot_len, 0);
+ reqdata = pong->buf;
+ }
+
+ memset(&src, 0, sizeof(src));
+ src.sin6_family = AF_INET6;
+ src.sin6_addr = in6addr_any; /* let the OS select host source address */
+
+ memset(&opts, 0, sizeof(opts));
+ opts.Ttl = hopl;
+
+ status = Icmp6SendEcho2(pxping->hdl6, NULL,
+ pxping->pfnCallback6, pong,
+ &src, &dst, reqdata, (WORD)reqsize, &opts,
+ pong->buf, (DWORD)pong->bufsize,
+ 5 * 1000 /* ms */);
+
+ if (RT_UNLIKELY(status != 0)) {
+ DPRINTF(("Icmp6SendEcho2: unexpected status %d\n", status));
+ goto out;
+ }
+ if ((status = GetLastError()) != ERROR_IO_PENDING) {
+ int code;
+
+ DPRINTF(("Icmp6SendEcho2: error %d\n", status));
+ switch (status) {
+ case ERROR_NETWORK_UNREACHABLE:
+ case ERROR_HOST_UNREACHABLE:
+ code = ICMP6_DUR_NO_ROUTE;
+ break;
+ default:
+ code = -1;
+ break;
+ }
+
+ if (code != -1) {
+ /* move payload back to IP header */
+ status = pbuf_header(p, (u16_t)(sizeof(*icmph)
+ + ip_current_header_tot_len()));
+ if (RT_LIKELY(status == 0)) {
+ icmp6_dest_unreach(p, code);
+ }
+ }
+ goto out;
+ }
+
+ pong = NULL; /* callback owns it now */
+ out:
+ if (pong != NULL) {
+ free(pong);
+ }
+ pbuf_free(p);
+}
+
+
+static VOID WINAPI
+pxping_icmp6_callback_apc(void *ctx, PIO_STATUS_BLOCK iob, ULONG reserved)
+{
+ struct pong6 *pong = (struct pong6 *)ctx;
+ LWIP_UNUSED_ARG(iob);
+ LWIP_UNUSED_ARG(reserved);
+
+ if (pong != NULL) {
+ pxping_icmp6_callback(pong);
+ free(pong);
+ }
+}
+
+
+static VOID WINAPI
+pxping_icmp6_callback_old(void *ctx)
+{
+ struct pong6 *pong = (struct pong6 *)ctx;
+
+ if (pong != NULL) {
+ pxping_icmp6_callback(pong);
+ free(pong);
+ }
+}
+
+
+static void
+pxping_icmp6_callback(struct pong6 *pong)
+{
+ DWORD nreplies;
+ ICMPV6_ECHO_REPLY *reply;
+ struct pbuf *p;
+ struct icmp6_echo_hdr *icmph;
+ size_t icmplen;
+ ip6_addr_t src;
+ int mapped;
+
+ nreplies = Icmp6ParseReplies(pong->buf, (DWORD)pong->bufsize);
+ if (nreplies == 0) {
+ DWORD error = GetLastError();
+ if (error == IP_REQ_TIMED_OUT) {
+ DPRINTF2(("pong6: %p timed out\n", (void *)pong));
+ }
+ else {
+ DPRINTF(("pong6: %p: Icmp6ParseReplies: error %d\n",
+ (void *)pong, error));
+ }
+ return;
+ }
+
+ reply = (ICMPV6_ECHO_REPLY *)pong->buf;
+
+ mapped = pxremap_inbound_ip6(&src, (ip6_addr_t *)reply->Address.sin6_addr);
+ if (mapped == PXREMAP_FAILED) {
+ return;
+ }
+
+ /*
+ * Reply data follows ICMPV6_ECHO_REPLY structure in memory, but
+ * it doesn't tell us its size. Assume it's equal the size of the
+ * request.
+ */
+ icmplen = sizeof(*icmph) + pong->reqsize;
+ p = pbuf_alloc(PBUF_IP, (u16_t)icmplen, PBUF_RAM);
+ if (RT_UNLIKELY(p == NULL)) {
+ return;
+ }
+
+ icmph = (struct icmp6_echo_hdr *)p->payload;
+ icmph->type = ICMP6_TYPE_EREP;
+ icmph->code = 0;
+ icmph->chksum = 0;
+ icmph->id = pong->reqicmph.id;
+ icmph->seqno = pong->reqicmph.seqno;
+
+ memcpy((u8_t *)p->payload + sizeof(*icmph),
+ pong->buf + sizeof(*reply), pong->reqsize);
+
+ icmph->chksum = ip6_chksum_pseudo(p, IP6_NEXTH_ICMP6, p->tot_len,
+ &src, &pong->reqsrc);
+ ip6_output_if(p, /* :src */ &src, /* :dst */ &pong->reqsrc,
+ LWIP_ICMP6_HL, 0, IP6_NEXTH_ICMP6,
+ pong->netif);
+ pbuf_free(p);
+}
diff --git a/src/VBox/NetworkServices/NAT/pxremap.c b/src/VBox/NetworkServices/NAT/pxremap.c
new file mode 100644
index 00000000..5332fabf
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/pxremap.c
@@ -0,0 +1,323 @@
+/* $Id: pxremap.c $ */
+/** @file
+ * NAT Network - Loopback remapping.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+/*
+ * This file contains functions pertinent to magic address remapping.
+ *
+ * We want to expose host's loopback interfaces to the guest by
+ * mapping them to the addresses from the same prefix/subnet, so if,
+ * for example proxy interface is 10.0.2.1, we redirect traffic to
+ * 10.0.2.2 to host's 127.0.0.1 loopback. If need be, we may extend
+ * this to provide additional mappings, e.g. 127.0.1.1 loopback
+ * address is used on Ubuntu 12.10+ for NetworkManager's dnsmasq.
+ *
+ * Ditto for IPv6, except that IPv6 only has one loopback address.
+ */
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+#include "pxremap.h"
+#include "proxy.h"
+
+#include "lwip/netif.h"
+#include "netif/etharp.h" /* proxy arp hook */
+
+#include "lwip/ip4.h" /* IPv4 divert hook */
+#include "lwip/ip6.h" /* IPv6 divert hook */
+
+#include <string.h>
+
+
+/**
+ * Check if "dst" is an IPv4 address that proxy remaps to host's
+ * loopback.
+ */
+static int
+proxy_ip4_is_mapped_loopback(struct netif *netif, const ip_addr_t *dst, ip_addr_t *lo)
+{
+ u32_t off;
+ const struct ip4_lomap *lomap;
+ size_t i;
+
+ LWIP_ASSERT1(dst != NULL);
+
+ if (g_proxy_options->lomap_desc == NULL) {
+ return 0;
+ }
+
+ if (!ip_addr_netcmp(dst, &netif->ip_addr, &netif->netmask)) {
+ return 0;
+ }
+
+ /* XXX: TODO: check netif is a proxying netif! */
+
+ off = ntohl(ip4_addr_get_u32(dst) & ~ip4_addr_get_u32(&netif->netmask));
+ lomap = g_proxy_options->lomap_desc->lomap;
+ for (i = 0; i < g_proxy_options->lomap_desc->num_lomap; ++i) {
+ if (off == lomap[i].off) {
+ if (lo != NULL) {
+ ip_addr_copy(*lo, lomap[i].loaddr);
+ }
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+#if ARP_PROXY
+/**
+ * Hook function for etharp_arp_input() - returns true to cause proxy
+ * ARP reply to be generated for "dst".
+ */
+int
+pxremap_proxy_arp(struct netif *netif, ip_addr_t *dst)
+{
+ return proxy_ip4_is_mapped_loopback(netif, dst, NULL);
+}
+#endif /* ARP_PROXY */
+
+
+/**
+ * Hook function for ip_forward() - returns true to divert packets to
+ * "dst" to proxy (instead of forwarding them via "netif" or dropping).
+ */
+int
+pxremap_ip4_divert(struct netif *netif, ip_addr_t *dst)
+{
+ return proxy_ip4_is_mapped_loopback(netif, dst, NULL);
+}
+
+
+/**
+ * Mapping from local network to loopback for outbound connections.
+ *
+ * Copy "src" to "dst" with ip_addr_set(dst, src), but if "src" is a
+ * local network address that maps host's loopback address, copy
+ * loopback address to "dst".
+ */
+int
+pxremap_outbound_ip4(ip_addr_t *dst, ip_addr_t *src)
+{
+ struct netif *netif;
+
+ LWIP_ASSERT1(dst != NULL);
+ LWIP_ASSERT1(src != NULL);
+
+ for (netif = netif_list; netif != NULL; netif = netif->next) {
+ if (netif_is_up(netif) /* && this is a proxy netif */) {
+ if (proxy_ip4_is_mapped_loopback(netif, src, dst)) {
+ return PXREMAP_MAPPED;
+ }
+ }
+ }
+
+ /* not remapped, just copy src */
+ ip_addr_set(dst, src);
+ return PXREMAP_ASIS;
+}
+
+
+/**
+ * Mapping from loopback to local network for inbound (port-forwarded)
+ * connections.
+ *
+ * Copy "src" to "dst" with ip_addr_set(dst, src), but if "src" is a
+ * host's loopback address, copy local network address that maps it to
+ * "dst".
+ */
+int
+pxremap_inbound_ip4(ip_addr_t *dst, ip_addr_t *src)
+{
+ struct netif *netif;
+ const struct ip4_lomap *lomap;
+ unsigned int i;
+
+ if (ip4_addr1(src) != IP_LOOPBACKNET) {
+ ip_addr_set(dst, src);
+ return PXREMAP_ASIS;
+ }
+
+ if (g_proxy_options->lomap_desc == NULL) {
+ return PXREMAP_FAILED;
+ }
+
+#if 0 /* ?TODO: with multiple interfaces we need to consider fwspec::dst */
+ netif = ip_route(target);
+ if (netif == NULL) {
+ return PXREMAP_FAILED;
+ }
+#else
+ netif = netif_list;
+ LWIP_ASSERT1(netif != NULL);
+ LWIP_ASSERT1(netif->next == NULL);
+#endif
+
+ lomap = g_proxy_options->lomap_desc->lomap;
+ for (i = 0; i < g_proxy_options->lomap_desc->num_lomap; ++i) {
+ if (ip_addr_cmp(src, &lomap[i].loaddr)) {
+ ip_addr_t net;
+
+ ip_addr_get_network(&net, &netif->ip_addr, &netif->netmask);
+ ip4_addr_set_u32(dst,
+ htonl(ntohl(ip4_addr_get_u32(&net))
+ + lomap[i].off));
+ return PXREMAP_MAPPED;
+ }
+ }
+
+ return PXREMAP_FAILED;
+}
+
+
+static int
+proxy_ip6_is_mapped_loopback(struct netif *netif, ip6_addr_t *dst)
+{
+ int i;
+
+ /* XXX: TODO: check netif is a proxying netif! */
+
+ LWIP_ASSERT1(dst != NULL);
+
+ for (i = 0; i < LWIP_IPV6_NUM_ADDRESSES; ++i) {
+ if (ip6_addr_ispreferred(netif_ip6_addr_state(netif, i))
+ && ip6_addr_isuniquelocal(netif_ip6_addr(netif, i)))
+ {
+ ip6_addr_t *ifaddr = netif_ip6_addr(netif, i);
+ if (memcmp(dst, ifaddr, sizeof(ip6_addr_t) - 1) == 0
+ && ((IP6_ADDR_BLOCK8(dst) & 0xff)
+ == (IP6_ADDR_BLOCK8(ifaddr) & 0xff) + 1))
+ {
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+/**
+ * Hook function for nd6_input() - returns true to cause proxy NA
+ * reply to be generated for "dst".
+ */
+int
+pxremap_proxy_na(struct netif *netif, ip6_addr_t *dst)
+{
+ return proxy_ip6_is_mapped_loopback(netif, dst);
+}
+
+
+/**
+ * Hook function for ip6_forward() - returns true to divert packets to
+ * "dst" to proxy (instead of forwarding them via "netif" or dropping).
+ */
+int
+pxremap_ip6_divert(struct netif *netif, ip6_addr_t *dst)
+{
+ return proxy_ip6_is_mapped_loopback(netif, dst);
+}
+
+
+/**
+ * Mapping from local network to loopback for outbound connections.
+ *
+ * Copy "src" to "dst" with ip6_addr_set(dst, src), but if "src" is a
+ * local network address that maps host's loopback address, copy IPv6
+ * loopback address to "dst".
+ */
+int
+pxremap_outbound_ip6(ip6_addr_t *dst, ip6_addr_t *src)
+{
+ struct netif *netif;
+ int i;
+
+ LWIP_ASSERT1(dst != NULL);
+ LWIP_ASSERT1(src != NULL);
+
+ for (netif = netif_list; netif != NULL; netif = netif->next) {
+ if (!netif_is_up(netif) /* || this is not a proxy netif */) {
+ continue;
+ }
+
+ for (i = 0; i < LWIP_IPV6_NUM_ADDRESSES; ++i) {
+ if (ip6_addr_ispreferred(netif_ip6_addr_state(netif, i))
+ && ip6_addr_isuniquelocal(netif_ip6_addr(netif, i)))
+ {
+ ip6_addr_t *ifaddr = netif_ip6_addr(netif, i);
+ if (memcmp(src, ifaddr, sizeof(ip6_addr_t) - 1) == 0
+ && ((IP6_ADDR_BLOCK8(src) & 0xff)
+ == (IP6_ADDR_BLOCK8(ifaddr) & 0xff) + 1))
+ {
+ ip6_addr_set_loopback(dst);
+ return PXREMAP_MAPPED;
+ }
+ }
+ }
+ }
+
+ /* not remapped, just copy src */
+ ip6_addr_set(dst, src);
+ return PXREMAP_ASIS;
+}
+
+
+/**
+ * Mapping from loopback to local network for inbound (port-forwarded)
+ * connections.
+ *
+ * Copy "src" to "dst" with ip6_addr_set(dst, src), but if "src" is a
+ * host's loopback address, copy local network address that maps it to
+ * "dst".
+ */
+int
+pxremap_inbound_ip6(ip6_addr_t *dst, ip6_addr_t *src)
+{
+ ip6_addr_t loopback;
+ struct netif *netif;
+ int i;
+
+ ip6_addr_set_loopback(&loopback);
+ if (!ip6_addr_cmp(src, &loopback)) {
+ ip6_addr_set(dst, src);
+ return PXREMAP_ASIS;
+ }
+
+#if 0 /* ?TODO: with multiple interfaces we need to consider fwspec::dst */
+ netif = ip6_route_fwd(target);
+ if (netif == NULL) {
+ return PXREMAP_FAILED;
+ }
+#else
+ netif = netif_list;
+ LWIP_ASSERT1(netif != NULL);
+ LWIP_ASSERT1(netif->next == NULL);
+#endif
+
+ for (i = 0; i < LWIP_IPV6_NUM_ADDRESSES; ++i) {
+ ip6_addr_t *ifaddr = netif_ip6_addr(netif, i);
+ if (ip6_addr_ispreferred(netif_ip6_addr_state(netif, i))
+ && ip6_addr_isuniquelocal(ifaddr))
+ {
+ ip6_addr_set(dst, ifaddr);
+ ++((u8_t *)&dst->addr[3])[3];
+ return PXREMAP_MAPPED;
+ }
+ }
+
+ return PXREMAP_FAILED;
+}
diff --git a/src/VBox/NetworkServices/NAT/pxremap.h b/src/VBox/NetworkServices/NAT/pxremap.h
new file mode 100644
index 00000000..9a2eecb6
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/pxremap.h
@@ -0,0 +1,52 @@
+/* $Id: pxremap.h $ */
+/** @file
+ * NAT Network - Loopback remapping, declarations and definitions.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NAT_pxremap_h
+#define VBOX_INCLUDED_SRC_NAT_pxremap_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include "lwip/err.h"
+#include "lwip/ip_addr.h"
+
+struct netif;
+
+
+#define PXREMAP_FAILED (-1)
+#define PXREMAP_ASIS 0
+#define PXREMAP_MAPPED 1
+
+/* IPv4 */
+#if ARP_PROXY
+int pxremap_proxy_arp(struct netif *netif, ip_addr_t *dst);
+#endif
+int pxremap_ip4_divert(struct netif *netif, ip_addr_t *dst);
+int pxremap_outbound_ip4(ip_addr_t *dst, ip_addr_t *src);
+int pxremap_inbound_ip4(ip_addr_t *dst, ip_addr_t *src);
+
+/* IPv6 */
+int pxremap_proxy_na(struct netif *netif, ip6_addr_t *dst);
+int pxremap_ip6_divert(struct netif *netif, ip6_addr_t *dst);
+int pxremap_outbound_ip6(ip6_addr_t *dst, ip6_addr_t *src);
+int pxremap_inbound_ip6(ip6_addr_t *dst, ip6_addr_t *src);
+
+#define pxremap_outbound_ipX(is_ipv6, dst, src) \
+ ((is_ipv6) ? pxremap_outbound_ip6(&(dst)->ip6, &(src)->ip6) \
+ : pxremap_outbound_ip4(&(dst)->ip4, &(src)->ip4))
+
+#endif /* !VBOX_INCLUDED_SRC_NAT_pxremap_h */
diff --git a/src/VBox/NetworkServices/NAT/pxtcp.c b/src/VBox/NetworkServices/NAT/pxtcp.c
new file mode 100644
index 00000000..bbfd1b2d
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/pxtcp.c
@@ -0,0 +1,2506 @@
+/* $Id: pxtcp.c $ */
+/** @file
+ * NAT Network - TCP proxy.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+
+#include "pxtcp.h"
+
+#include "proxy.h"
+#include "proxy_pollmgr.h"
+#include "pxremap.h"
+#include "portfwd.h" /* fwspec */
+
+#ifndef RT_OS_WINDOWS
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#ifdef RT_OS_SOLARIS
+#include <sys/filio.h> /* FIONREAD is BSD'ism */
+#endif
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <poll.h>
+
+#include <err.h> /* BSD'ism */
+#else
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <iprt/stdint.h>
+#include "winpoll.h"
+#endif
+
+#include "lwip/opt.h"
+
+#include "lwip/sys.h"
+#include "lwip/tcpip.h"
+#include "lwip/netif.h"
+#include "lwip/tcp_impl.h" /* XXX: to access tcp_abandon() */
+#include "lwip/icmp.h"
+#include "lwip/icmp6.h"
+
+/*
+ * Different OSes have different quirks in reporting POLLHUP for TCP
+ * sockets.
+ *
+ * Using shutdown(2) "how" values here would be more readable, but
+ * since SHUT_RD is 0, we can't use 0 for "none", unfortunately.
+ */
+#if defined(RT_OS_NETBSD) || defined(RT_OS_SOLARIS)
+# define HAVE_TCP_POLLHUP 0 /* not reported */
+#elif defined(RT_OS_DARWIN) || defined(RT_OS_WINDOWS)
+# define HAVE_TCP_POLLHUP POLLIN /* reported when remote closes */
+#else
+# define HAVE_TCP_POLLHUP (POLLIN|POLLOUT) /* reported when both directions are closed */
+#endif
+
+
+/**
+ * Ring buffer for inbound data. Filled with data from the host
+ * socket on poll manager thread. Data consumed by scheduling
+ * tcp_write() to the pcb on the lwip thread.
+ *
+ * NB: There is actually third party present, the lwip stack itself.
+ * Thus the buffer doesn't have dual free vs. data split, but rather
+ * three-way free / send and unACKed data / unsent data split.
+ */
+struct ringbuf {
+ char *buf;
+ size_t bufsize;
+
+ /*
+ * Start of free space, producer writes here (up till "unacked").
+ */
+ volatile size_t vacant;
+
+ /*
+ * Start of sent but unacknowledged data. The data are "owned" by
+ * the stack as it may need to retransmit. This is the free space
+ * limit for producer.
+ */
+ volatile size_t unacked;
+
+ /*
+ * Start of unsent data, consumer reads/sends from here (up till
+ * "vacant"). Not declared volatile since it's only accessed from
+ * the consumer thread.
+ */
+ size_t unsent;
+};
+
+
+/**
+ */
+struct pxtcp {
+ /**
+ * Our poll manager handler. Must be first, strong/weak
+ * references depend on this "inheritance".
+ */
+ struct pollmgr_handler pmhdl;
+
+ /**
+ * lwIP (internal/guest) side of the proxied connection.
+ */
+ struct tcp_pcb *pcb;
+
+ /**
+ * Host (external) side of the proxied connection.
+ */
+ SOCKET sock;
+
+ /**
+ * Socket events we are currently polling for.
+ */
+ int events;
+
+ /**
+ * Socket error. Currently used to save connect(2) errors so that
+ * we can decide if we need to send ICMP error.
+ */
+ int sockerr;
+
+ /**
+ * Interface that we have got the SYN from. Needed to send ICMP
+ * with correct source address.
+ */
+ struct netif *netif;
+
+ /**
+ * For tentatively accepted connections for which we are in
+ * process of connecting to the real destination this is the
+ * initial pbuf that we might need to build ICMP error.
+ *
+ * When connection is established this is used to hold outbound
+ * pbuf chain received by pxtcp_pcb_recv() but not yet completely
+ * forwarded over the socket. We cannot "return" it to lwIP since
+ * the head of the chain is already sent and freed.
+ */
+ struct pbuf *unsent;
+
+ /**
+ * Guest has closed its side. Reported to pxtcp_pcb_recv() only
+ * once and we might not be able to forward it immediately if we
+ * have unsent pbuf.
+ */
+ int outbound_close;
+
+ /**
+ * Outbound half-close has been done on the socket.
+ */
+ int outbound_close_done;
+
+ /**
+ * External has closed its side. We might not be able to forward
+ * it immediately if we have unforwarded data.
+ */
+ int inbound_close;
+
+ /**
+ * Inbound half-close has been done on the pcb.
+ */
+ int inbound_close_done;
+
+ /**
+ * On systems that report POLLHUP as soon as the final FIN is
+ * received on a socket we cannot continue polling for the rest of
+ * input, so we have to read (pull) last data from the socket on
+ * the lwIP thread instead of polling/pushing it from the poll
+ * manager thread. See comment in pxtcp_pmgr_pump() POLLHUP case.
+ */
+ int inbound_pull;
+
+
+ /**
+ * When poll manager schedules delete we may not be able to delete
+ * a pxtcp immediately if not all inbound data has been acked by
+ * the guest: lwIP may need to resend and the data are in pxtcp's
+ * inbuf::buf. We defer delete until all data are acked to
+ * pxtcp_pcb_sent().
+ */
+ int deferred_delete;
+
+ /**
+ * Ring-buffer for inbound data.
+ */
+ struct ringbuf inbuf;
+
+ /**
+ * lwIP thread's strong reference to us.
+ */
+ struct pollmgr_refptr *rp;
+
+
+ /*
+ * We use static messages to call functions on the lwIP thread to
+ * void malloc/free overhead.
+ */
+ struct tcpip_msg msg_delete; /* delete pxtcp */
+ struct tcpip_msg msg_reset; /* reset connection and delete pxtcp */
+ struct tcpip_msg msg_accept; /* confirm accept of proxied connection */
+ struct tcpip_msg msg_outbound; /* trigger send of outbound data */
+ struct tcpip_msg msg_inbound; /* trigger send of inbound data */
+ struct tcpip_msg msg_inpull; /* trigger pull of last inbound data */
+};
+
+
+
+static struct pxtcp *pxtcp_allocate(void);
+static void pxtcp_free(struct pxtcp *);
+
+static void pxtcp_pcb_associate(struct pxtcp *, struct tcp_pcb *);
+static void pxtcp_pcb_dissociate(struct pxtcp *);
+
+/* poll manager callbacks for pxtcp related channels */
+static int pxtcp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int);
+static int pxtcp_pmgr_chan_pollout(struct pollmgr_handler *, SOCKET, int);
+static int pxtcp_pmgr_chan_pollin(struct pollmgr_handler *, SOCKET, int);
+#if !(HAVE_TCP_POLLHUP & POLLOUT)
+static int pxtcp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int);
+#endif
+static int pxtcp_pmgr_chan_reset(struct pollmgr_handler *, SOCKET, int);
+
+/* helper functions for sending/receiving pxtcp over poll manager channels */
+static ssize_t pxtcp_chan_send(enum pollmgr_slot_t, struct pxtcp *);
+static ssize_t pxtcp_chan_send_weak(enum pollmgr_slot_t, struct pxtcp *);
+static struct pxtcp *pxtcp_chan_recv(struct pollmgr_handler *, SOCKET, int);
+static struct pxtcp *pxtcp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int);
+
+/* poll manager callbacks for individual sockets */
+static int pxtcp_pmgr_connect(struct pollmgr_handler *, SOCKET, int);
+static int pxtcp_pmgr_pump(struct pollmgr_handler *, SOCKET, int);
+
+/* get incoming traffic into ring buffer */
+static ssize_t pxtcp_sock_read(struct pxtcp *, int *);
+static ssize_t pxtcp_sock_recv(struct pxtcp *, IOVEC *, size_t); /* default */
+
+/* convenience functions for poll manager callbacks */
+static int pxtcp_schedule_delete(struct pxtcp *);
+static int pxtcp_schedule_reset(struct pxtcp *);
+static int pxtcp_schedule_reject(struct pxtcp *);
+
+/* lwip thread callbacks called via proxy_lwip_post() */
+static void pxtcp_pcb_delete_pxtcp(void *);
+static void pxtcp_pcb_reset_pxtcp(void *);
+static void pxtcp_pcb_accept_refuse(void *);
+static void pxtcp_pcb_accept_confirm(void *);
+static void pxtcp_pcb_write_outbound(void *);
+static void pxtcp_pcb_write_inbound(void *);
+static void pxtcp_pcb_pull_inbound(void *);
+
+/* tcp pcb callbacks */
+static err_t pxtcp_pcb_heard(void *, struct tcp_pcb *, struct pbuf *); /* global */
+static err_t pxtcp_pcb_accept(void *, struct tcp_pcb *, err_t);
+static err_t pxtcp_pcb_connected(void *, struct tcp_pcb *, err_t);
+static err_t pxtcp_pcb_recv(void *, struct tcp_pcb *, struct pbuf *, err_t);
+static err_t pxtcp_pcb_sent(void *, struct tcp_pcb *, u16_t);
+static err_t pxtcp_pcb_poll(void *, struct tcp_pcb *);
+static void pxtcp_pcb_err(void *, err_t);
+
+static err_t pxtcp_pcb_forward_outbound(struct pxtcp *, struct pbuf *);
+static void pxtcp_pcb_forward_outbound_close(struct pxtcp *);
+
+static ssize_t pxtcp_sock_send(struct pxtcp *, IOVEC *, size_t);
+
+static void pxtcp_pcb_forward_inbound(struct pxtcp *);
+static void pxtcp_pcb_forward_inbound_close(struct pxtcp *);
+DECLINLINE(int) pxtcp_pcb_forward_inbound_done(const struct pxtcp *);
+static void pxtcp_pcb_schedule_poll(struct pxtcp *);
+static void pxtcp_pcb_cancel_poll(struct pxtcp *);
+
+static void pxtcp_pcb_reject(struct tcp_pcb *, int, struct netif *, struct pbuf *);
+DECLINLINE(void) pxtcp_pcb_maybe_deferred_delete(struct pxtcp *);
+
+/* poll manager handlers for pxtcp channels */
+static struct pollmgr_handler pxtcp_pmgr_chan_add_hdl;
+static struct pollmgr_handler pxtcp_pmgr_chan_pollout_hdl;
+static struct pollmgr_handler pxtcp_pmgr_chan_pollin_hdl;
+#if !(HAVE_TCP_POLLHUP & POLLOUT)
+static struct pollmgr_handler pxtcp_pmgr_chan_del_hdl;
+#endif
+static struct pollmgr_handler pxtcp_pmgr_chan_reset_hdl;
+
+
+/**
+ * Init PXTCP - must be run when neither lwIP tcpip thread, nor poll
+ * manager threads haven't been created yet.
+ */
+void
+pxtcp_init(void)
+{
+ /*
+ * Create channels.
+ */
+#define CHANNEL(SLOT, NAME) do { \
+ NAME##_hdl.callback = NAME; \
+ NAME##_hdl.data = NULL; \
+ NAME##_hdl.slot = -1; \
+ pollmgr_add_chan(SLOT, &NAME##_hdl); \
+ } while (0)
+
+ CHANNEL(POLLMGR_CHAN_PXTCP_ADD, pxtcp_pmgr_chan_add);
+ CHANNEL(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp_pmgr_chan_pollin);
+ CHANNEL(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp_pmgr_chan_pollout);
+#if !(HAVE_TCP_POLLHUP & POLLOUT)
+ CHANNEL(POLLMGR_CHAN_PXTCP_DEL, pxtcp_pmgr_chan_del);
+#endif
+ CHANNEL(POLLMGR_CHAN_PXTCP_RESET, pxtcp_pmgr_chan_reset);
+
+#undef CHANNEL
+
+ /*
+ * Listen to outgoing connection from guest(s).
+ */
+ tcp_proxy_accept(pxtcp_pcb_heard);
+}
+
+
+/**
+ * Syntactic sugar for sending pxtcp pointer over poll manager
+ * channel. Used by lwip thread functions.
+ */
+static ssize_t
+pxtcp_chan_send(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
+{
+ return pollmgr_chan_send(slot, &pxtcp, sizeof(pxtcp));
+}
+
+
+/**
+ * Syntactic sugar for sending weak reference to pxtcp over poll
+ * manager channel. Used by lwip thread functions.
+ */
+static ssize_t
+pxtcp_chan_send_weak(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
+{
+ pollmgr_refptr_weak_ref(pxtcp->rp);
+ return pollmgr_chan_send(slot, &pxtcp->rp, sizeof(pxtcp->rp));
+}
+
+
+/**
+ * Counterpart of pxtcp_chan_send().
+ */
+static struct pxtcp *
+pxtcp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pxtcp *pxtcp;
+
+ pxtcp = (struct pxtcp *)pollmgr_chan_recv_ptr(handler, fd, revents);
+ return pxtcp;
+}
+
+
+/**
+ * Counterpart of pxtcp_chan_send_weak().
+ */
+static struct pxtcp *
+pxtcp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pollmgr_refptr *rp;
+ struct pollmgr_handler *base;
+ struct pxtcp *pxtcp;
+
+ rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents);
+ base = (struct pollmgr_handler *)pollmgr_refptr_get(rp);
+ pxtcp = (struct pxtcp *)base;
+
+ return pxtcp;
+}
+
+
+/**
+ * Register pxtcp with poll manager.
+ *
+ * Used for POLLMGR_CHAN_PXTCP_ADD and by port-forwarding. Since
+ * error handling is different in these two cases, we leave it up to
+ * the caller.
+ */
+int
+pxtcp_pmgr_add(struct pxtcp *pxtcp)
+{
+ int status;
+
+ LWIP_ASSERT1(pxtcp != NULL);
+#ifdef RT_OS_WINDOWS
+ LWIP_ASSERT1(pxtcp->sock != INVALID_SOCKET);
+#else
+ LWIP_ASSERT1(pxtcp->sock >= 0);
+#endif
+ LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
+ LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
+ LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
+
+ status = pollmgr_add(&pxtcp->pmhdl, pxtcp->sock, pxtcp->events);
+ return status;
+}
+
+
+/**
+ * Unregister pxtcp with poll manager.
+ *
+ * Used for POLLMGR_CHAN_PXTCP_RESET and by port-forwarding (on error
+ * leg).
+ */
+void
+pxtcp_pmgr_del(struct pxtcp *pxtcp)
+{
+ LWIP_ASSERT1(pxtcp != NULL);
+
+ pollmgr_del_slot(pxtcp->pmhdl.slot);
+}
+
+
+/**
+ * POLLMGR_CHAN_PXTCP_ADD handler.
+ *
+ * Get new pxtcp from lwip thread and start polling its socket.
+ */
+static int
+pxtcp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pxtcp *pxtcp;
+ int status;
+
+ pxtcp = pxtcp_chan_recv(handler, fd, revents);
+ DPRINTF0(("pxtcp_add: new pxtcp %p; pcb %p; sock %d\n",
+ (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
+
+ status = pxtcp_pmgr_add(pxtcp);
+ if (status < 0) {
+ (void) pxtcp_schedule_reset(pxtcp);
+ }
+
+ return POLLIN;
+}
+
+
+/**
+ * POLLMGR_CHAN_PXTCP_POLLOUT handler.
+ *
+ * pxtcp_pcb_forward_outbound() on the lwIP thread tried to send data
+ * and failed, it now requests us to poll the socket for POLLOUT and
+ * schedule pxtcp_pcb_forward_outbound() when sock is writable again.
+ */
+static int
+pxtcp_pmgr_chan_pollout(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pxtcp *pxtcp;
+
+ pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
+ DPRINTF0(("pxtcp_pollout: pxtcp %p\n", (void *)pxtcp));
+
+ if (pxtcp == NULL) {
+ return POLLIN;
+ }
+
+ LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
+ LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
+
+ pxtcp->events |= POLLOUT;
+ pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
+
+ return POLLIN;
+}
+
+
+/**
+ * POLLMGR_CHAN_PXTCP_POLLIN handler.
+ */
+static int
+pxtcp_pmgr_chan_pollin(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pxtcp *pxtcp;
+
+ pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
+ DPRINTF2(("pxtcp_pollin: pxtcp %p\n", (void *)pxtcp));
+
+ if (pxtcp == NULL) {
+ return POLLIN;
+ }
+
+ LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
+ LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
+
+ if (pxtcp->inbound_close) {
+ return POLLIN;
+ }
+
+ pxtcp->events |= POLLIN;
+ pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
+
+ return POLLIN;
+}
+
+
+#if !(HAVE_TCP_POLLHUP & POLLOUT)
+/**
+ * POLLMGR_CHAN_PXTCP_DEL handler.
+ *
+ * Schedule pxtcp deletion. We only need this if host system doesn't
+ * report POLLHUP for fully closed tcp sockets.
+ */
+static int
+pxtcp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pxtcp *pxtcp;
+
+ pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
+ if (pxtcp == NULL) {
+ return POLLIN;
+ }
+
+ DPRINTF(("PXTCP_DEL: pxtcp %p; pcb %p; sock %d\n",
+ (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
+
+ LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
+ LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
+
+ LWIP_ASSERT1(pxtcp->inbound_close); /* EOF read */
+ LWIP_ASSERT1(pxtcp->outbound_close_done); /* EOF sent */
+
+ pxtcp_pmgr_del(pxtcp);
+ (void) pxtcp_schedule_delete(pxtcp);
+
+ return POLLIN;
+}
+#endif /* !(HAVE_TCP_POLLHUP & POLLOUT) */
+
+
+/**
+ * POLLMGR_CHAN_PXTCP_RESET handler.
+ *
+ * Close the socket with RST and delete pxtcp.
+ */
+static int
+pxtcp_pmgr_chan_reset(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pxtcp *pxtcp;
+
+ pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
+ if (pxtcp == NULL) {
+ return POLLIN;
+ }
+
+ DPRINTF0(("PXTCP_RESET: pxtcp %p; pcb %p; sock %d\n",
+ (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
+
+ LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
+ LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
+
+ pxtcp_pmgr_del(pxtcp);
+
+ proxy_reset_socket(pxtcp->sock);
+ pxtcp->sock = INVALID_SOCKET;
+
+ (void) pxtcp_schedule_reset(pxtcp);
+
+ return POLLIN;
+}
+
+
+static struct pxtcp *
+pxtcp_allocate(void)
+{
+ struct pxtcp *pxtcp;
+
+ pxtcp = (struct pxtcp *)malloc(sizeof(*pxtcp));
+ if (pxtcp == NULL) {
+ return NULL;
+ }
+
+ pxtcp->pmhdl.callback = NULL;
+ pxtcp->pmhdl.data = (void *)pxtcp;
+ pxtcp->pmhdl.slot = -1;
+
+ pxtcp->pcb = NULL;
+ pxtcp->sock = INVALID_SOCKET;
+ pxtcp->events = 0;
+ pxtcp->sockerr = 0;
+ pxtcp->netif = NULL;
+ pxtcp->unsent = NULL;
+ pxtcp->outbound_close = 0;
+ pxtcp->outbound_close_done = 0;
+ pxtcp->inbound_close = 0;
+ pxtcp->inbound_close_done = 0;
+ pxtcp->inbound_pull = 0;
+ pxtcp->deferred_delete = 0;
+
+ pxtcp->inbuf.bufsize = 64 * 1024;
+ pxtcp->inbuf.buf = (char *)malloc(pxtcp->inbuf.bufsize);
+ if (pxtcp->inbuf.buf == NULL) {
+ free(pxtcp);
+ return NULL;
+ }
+ pxtcp->inbuf.vacant = 0;
+ pxtcp->inbuf.unacked = 0;
+ pxtcp->inbuf.unsent = 0;
+
+ pxtcp->rp = pollmgr_refptr_create(&pxtcp->pmhdl);
+ if (pxtcp->rp == NULL) {
+ free(pxtcp->inbuf.buf);
+ free(pxtcp);
+ return NULL;
+ }
+
+#define CALLBACK_MSG(MSG, FUNC) \
+ do { \
+ pxtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \
+ pxtcp->MSG.sem = NULL; \
+ pxtcp->MSG.msg.cb.function = FUNC; \
+ pxtcp->MSG.msg.cb.ctx = (void *)pxtcp; \
+ } while (0)
+
+ CALLBACK_MSG(msg_delete, pxtcp_pcb_delete_pxtcp);
+ CALLBACK_MSG(msg_reset, pxtcp_pcb_reset_pxtcp);
+ CALLBACK_MSG(msg_accept, pxtcp_pcb_accept_confirm);
+ CALLBACK_MSG(msg_outbound, pxtcp_pcb_write_outbound);
+ CALLBACK_MSG(msg_inbound, pxtcp_pcb_write_inbound);
+ CALLBACK_MSG(msg_inpull, pxtcp_pcb_pull_inbound);
+
+#undef CALLBACK_MSG
+
+ return pxtcp;
+}
+
+
+/**
+ * Exported to fwtcp to create pxtcp for incoming port-forwarded
+ * connections. Completed with pcb in pxtcp_pcb_connect().
+ */
+struct pxtcp *
+pxtcp_create_forwarded(SOCKET sock)
+{
+ struct pxtcp *pxtcp;
+
+ pxtcp = pxtcp_allocate();
+ if (pxtcp == NULL) {
+ return NULL;
+ }
+
+ pxtcp->sock = sock;
+ pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
+ pxtcp->events = 0;
+
+ return pxtcp;
+}
+
+
+static void
+pxtcp_pcb_associate(struct pxtcp *pxtcp, struct tcp_pcb *pcb)
+{
+ LWIP_ASSERT1(pxtcp != NULL);
+ LWIP_ASSERT1(pcb != NULL);
+
+ pxtcp->pcb = pcb;
+
+ tcp_arg(pcb, pxtcp);
+
+ tcp_recv(pcb, pxtcp_pcb_recv);
+ tcp_sent(pcb, pxtcp_pcb_sent);
+ tcp_poll(pcb, NULL, 255);
+ tcp_err(pcb, pxtcp_pcb_err);
+}
+
+
+static void
+pxtcp_free(struct pxtcp *pxtcp)
+{
+ if (pxtcp->unsent != NULL) {
+ pbuf_free(pxtcp->unsent);
+ }
+ if (pxtcp->inbuf.buf != NULL) {
+ free(pxtcp->inbuf.buf);
+ }
+ free(pxtcp);
+}
+
+
+/**
+ * Counterpart to pxtcp_create_forwarded() to destruct pxtcp that
+ * fwtcp failed to register with poll manager to post to lwip thread
+ * for doing connect.
+ */
+void
+pxtcp_cancel_forwarded(struct pxtcp *pxtcp)
+{
+ LWIP_ASSERT1(pxtcp->pcb == NULL);
+ pxtcp_pcb_reset_pxtcp(pxtcp);
+}
+
+
+static void
+pxtcp_pcb_dissociate(struct pxtcp *pxtcp)
+{
+ if (pxtcp == NULL || pxtcp->pcb == NULL) {
+ return;
+ }
+
+ DPRINTF(("%s: pxtcp %p <-> pcb %p\n",
+ __func__, (void *)pxtcp, (void *)pxtcp->pcb));
+
+ /*
+ * We must have dissociated from a fully closed pcb immediately
+ * since lwip recycles them and we don't wan't to mess with what
+ * would be someone else's pcb that we happen to have a stale
+ * pointer to.
+ */
+ LWIP_ASSERT1(pxtcp->pcb->callback_arg == pxtcp);
+
+ tcp_recv(pxtcp->pcb, NULL);
+ tcp_sent(pxtcp->pcb, NULL);
+ tcp_poll(pxtcp->pcb, NULL, 255);
+ tcp_err(pxtcp->pcb, NULL);
+ tcp_arg(pxtcp->pcb, NULL);
+ pxtcp->pcb = NULL;
+}
+
+
+/**
+ * Lwip thread callback invoked via pxtcp::msg_delete
+ *
+ * Since we use static messages to communicate to the lwip thread, we
+ * cannot delete pxtcp without making sure there are no unprocessed
+ * messages in the lwip thread mailbox.
+ *
+ * The easiest way to ensure that is to send this "delete" message as
+ * the last one and when it's processed we know there are no more and
+ * it's safe to delete pxtcp.
+ *
+ * Poll manager handlers should use pxtcp_schedule_delete()
+ * convenience function.
+ */
+static void
+pxtcp_pcb_delete_pxtcp(void *ctx)
+{
+ struct pxtcp *pxtcp = (struct pxtcp *)ctx;
+
+ DPRINTF(("%s: pxtcp %p, pcb %p, sock %d%s\n",
+ __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock,
+ (pxtcp->deferred_delete && !pxtcp->inbound_pull
+ ? " (was deferred)" : "")));
+
+ LWIP_ASSERT1(pxtcp != NULL);
+ LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
+ LWIP_ASSERT1(pxtcp->outbound_close_done);
+ LWIP_ASSERT1(pxtcp->inbound_close); /* not necessarily done */
+
+
+ /*
+ * pxtcp is no longer registered with poll manager, so it's safe
+ * to close the socket.
+ */
+ if (pxtcp->sock != INVALID_SOCKET) {
+ closesocket(pxtcp->sock);
+ pxtcp->sock = INVALID_SOCKET;
+ }
+
+ /*
+ * We might have already dissociated from a fully closed pcb, or
+ * guest might have sent us a reset while msg_delete was in
+ * transit. If there's no pcb, we are done.
+ */
+ if (pxtcp->pcb == NULL) {
+ pollmgr_refptr_unref(pxtcp->rp);
+ pxtcp_free(pxtcp);
+ return;
+ }
+
+ /*
+ * Have we completely forwarded all inbound traffic to the guest?
+ *
+ * We may still be waiting for ACKs. We may have failed to send
+ * some of the data (tcp_write() failed with ERR_MEM). We may
+ * have failed to send the FIN (tcp_shutdown() failed with
+ * ERR_MEM).
+ */
+ if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
+ pxtcp_pcb_dissociate(pxtcp);
+ pollmgr_refptr_unref(pxtcp->rp);
+ pxtcp_free(pxtcp);
+ }
+ else {
+ DPRINTF2(("delete: pxtcp %p; pcb %p:"
+ " unacked %d, unsent %d, vacant %d, %s - DEFER!\n",
+ (void *)pxtcp, (void *)pxtcp->pcb,
+ (int)pxtcp->inbuf.unacked,
+ (int)pxtcp->inbuf.unsent,
+ (int)pxtcp->inbuf.vacant,
+ pxtcp->inbound_close_done ? "FIN sent" : "FIN is NOT sent"));
+
+ LWIP_ASSERT1(!pxtcp->deferred_delete);
+ pxtcp->deferred_delete = 1;
+ }
+}
+
+
+/**
+ * If we couldn't delete pxtcp right away in the msg_delete callback
+ * from the poll manager thread, we repeat the check at the end of
+ * relevant pcb callbacks.
+ */
+DECLINLINE(void)
+pxtcp_pcb_maybe_deferred_delete(struct pxtcp *pxtcp)
+{
+ if (pxtcp->deferred_delete && pxtcp_pcb_forward_inbound_done(pxtcp)) {
+ pxtcp_pcb_delete_pxtcp(pxtcp);
+ }
+}
+
+
+/**
+ * Poll manager callbacks should use this convenience wrapper to
+ * schedule pxtcp deletion on the lwip thread and to deregister from
+ * the poll manager.
+ */
+static int
+pxtcp_schedule_delete(struct pxtcp *pxtcp)
+{
+ /*
+ * If pollmgr_refptr_get() is called by any channel before
+ * scheduled deletion happens, let them know we are gone.
+ */
+ pxtcp->pmhdl.slot = -1;
+
+ /*
+ * Schedule deletion. Since poll manager thread may be pre-empted
+ * right after we send the message, the deletion may actually
+ * happen on the lwip thread before we return from this function,
+ * so it's not safe to refer to pxtcp after this call.
+ */
+ proxy_lwip_post(&pxtcp->msg_delete);
+
+ /* tell poll manager to deregister us */
+ return -1;
+}
+
+
+/**
+ * Lwip thread callback invoked via pxtcp::msg_reset
+ *
+ * Like pxtcp_pcb_delete(), but sends RST to the guest before
+ * deleting this pxtcp.
+ */
+static void
+pxtcp_pcb_reset_pxtcp(void *ctx)
+{
+ struct pxtcp *pxtcp = (struct pxtcp *)ctx;
+ LWIP_ASSERT1(pxtcp != NULL);
+
+ DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d\n",
+ __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
+
+ if (pxtcp->sock != INVALID_SOCKET) {
+ proxy_reset_socket(pxtcp->sock);
+ pxtcp->sock = INVALID_SOCKET;
+ }
+
+ if (pxtcp->pcb != NULL) {
+ struct tcp_pcb *pcb = pxtcp->pcb;
+ pxtcp_pcb_dissociate(pxtcp);
+ tcp_abort(pcb);
+ }
+
+ pollmgr_refptr_unref(pxtcp->rp);
+ pxtcp_free(pxtcp);
+}
+
+
+
+/**
+ * Poll manager callbacks should use this convenience wrapper to
+ * schedule pxtcp reset and deletion on the lwip thread and to
+ * deregister from the poll manager.
+ *
+ * See pxtcp_schedule_delete() for additional comments.
+ */
+static int
+pxtcp_schedule_reset(struct pxtcp *pxtcp)
+{
+ pxtcp->pmhdl.slot = -1;
+ proxy_lwip_post(&pxtcp->msg_reset);
+ return -1;
+}
+
+
+/**
+ * Reject proxy connection attempt. Depending on the cause (sockerr)
+ * we may just drop the pcb silently, generate an ICMP datagram or
+ * send TCP reset.
+ */
+static void
+pxtcp_pcb_reject(struct tcp_pcb *pcb, int sockerr,
+ struct netif *netif, struct pbuf *p)
+{
+ int reset = 0;
+
+ if (sockerr == ECONNREFUSED) {
+ reset = 1;
+ }
+ else if (p != NULL) {
+ struct netif *oif;
+
+ LWIP_ASSERT1(netif != NULL);
+
+ oif = ip_current_netif();
+ ip_current_netif() = netif;
+
+ if (PCB_ISIPV6(pcb)) {
+ if (sockerr == EHOSTDOWN) {
+ icmp6_dest_unreach(p, ICMP6_DUR_ADDRESS); /* XXX: ??? */
+ }
+ else if (sockerr == EHOSTUNREACH
+ || sockerr == ENETDOWN
+ || sockerr == ENETUNREACH)
+ {
+ icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE);
+ }
+ }
+ else {
+ if (sockerr == EHOSTDOWN
+ || sockerr == EHOSTUNREACH
+ || sockerr == ENETDOWN
+ || sockerr == ENETUNREACH)
+ {
+ icmp_dest_unreach(p, ICMP_DUR_HOST);
+ }
+ }
+
+ ip_current_netif() = oif;
+ }
+
+ tcp_abandon(pcb, reset);
+}
+
+
+/**
+ * Called from poll manager thread via pxtcp::msg_accept when proxy
+ * failed to connect to the destination. Also called when we failed
+ * to register pxtcp with poll manager.
+ *
+ * This is like pxtcp_pcb_reset_pxtcp() but is more discriminate in
+ * how this unestablished connection is terminated.
+ */
+static void
+pxtcp_pcb_accept_refuse(void *ctx)
+{
+ struct pxtcp *pxtcp = (struct pxtcp *)ctx;
+
+ DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d: %R[sockerr]\n",
+ __func__, (void *)pxtcp, (void *)pxtcp->pcb,
+ pxtcp->sock, pxtcp->sockerr));
+
+ LWIP_ASSERT1(pxtcp != NULL);
+ LWIP_ASSERT1(pxtcp->sock == INVALID_SOCKET);
+
+ if (pxtcp->pcb != NULL) {
+ struct tcp_pcb *pcb = pxtcp->pcb;
+ pxtcp_pcb_dissociate(pxtcp);
+ pxtcp_pcb_reject(pcb, pxtcp->sockerr, pxtcp->netif, pxtcp->unsent);
+ }
+
+ pollmgr_refptr_unref(pxtcp->rp);
+ pxtcp_free(pxtcp);
+}
+
+
+/**
+ * Convenience wrapper for poll manager connect callback to reject
+ * connection attempt.
+ *
+ * Like pxtcp_schedule_reset(), but the callback is more discriminate
+ * in how this unestablished connection is terminated.
+ */
+static int
+pxtcp_schedule_reject(struct pxtcp *pxtcp)
+{
+ pxtcp->msg_accept.msg.cb.function = pxtcp_pcb_accept_refuse;
+ pxtcp->pmhdl.slot = -1;
+ proxy_lwip_post(&pxtcp->msg_accept);
+ return -1;
+}
+
+
+/**
+ * Global tcp_proxy_accept() callback for proxied outgoing TCP
+ * connections from guest(s).
+ */
+static err_t
+pxtcp_pcb_heard(void *arg, struct tcp_pcb *newpcb, struct pbuf *syn)
+{
+ LWIP_UNUSED_ARG(arg);
+
+ return pxtcp_pcb_accept_outbound(newpcb, syn,
+ PCB_ISIPV6(newpcb), &newpcb->local_ip, newpcb->local_port);
+}
+
+
+err_t
+pxtcp_pcb_accept_outbound(struct tcp_pcb *newpcb, struct pbuf *p,
+ int is_ipv6, ipX_addr_t *dst_addr, u16_t dst_port)
+{
+ struct pxtcp *pxtcp;
+ ipX_addr_t mapped_dst_addr;
+ int sdom;
+ SOCKET sock;
+ ssize_t nsent;
+ int sockerr = 0;
+
+ /*
+ * TCP first calls accept callback when it receives the first SYN
+ * and "tentatively accepts" new proxied connection attempt. When
+ * proxy "confirms" the SYN and sends SYN|ACK and the guest
+ * replies with ACK the accept callback is called again, this time
+ * with the established connection.
+ */
+ LWIP_ASSERT1(newpcb->state == SYN_RCVD_0);
+ tcp_accept(newpcb, pxtcp_pcb_accept);
+ tcp_arg(newpcb, NULL);
+
+ tcp_setprio(newpcb, TCP_PRIO_MAX);
+
+ pxremap_outbound_ipX(is_ipv6, &mapped_dst_addr, dst_addr);
+
+ sdom = is_ipv6 ? PF_INET6 : PF_INET;
+ sock = proxy_connected_socket(sdom, SOCK_STREAM,
+ &mapped_dst_addr, dst_port);
+ if (sock == INVALID_SOCKET) {
+ sockerr = SOCKERRNO();
+ goto abort;
+ }
+
+ pxtcp = pxtcp_allocate();
+ if (pxtcp == NULL) {
+ proxy_reset_socket(sock);
+ goto abort;
+ }
+
+ /* save initial datagram in case we need to reply with ICMP */
+ if (p != NULL) {
+ pbuf_ref(p);
+ pxtcp->unsent = p;
+ pxtcp->netif = ip_current_netif();
+ }
+
+ pxtcp_pcb_associate(pxtcp, newpcb);
+ pxtcp->sock = sock;
+
+ pxtcp->pmhdl.callback = pxtcp_pmgr_connect;
+ pxtcp->events = POLLOUT;
+
+ nsent = pxtcp_chan_send(POLLMGR_CHAN_PXTCP_ADD, pxtcp);
+ if (nsent < 0) {
+ pxtcp->sock = INVALID_SOCKET;
+ proxy_reset_socket(sock);
+ pxtcp_pcb_accept_refuse(pxtcp);
+ return ERR_ABRT;
+ }
+
+ return ERR_OK;
+
+ abort:
+ DPRINTF0(("%s: pcb %p, sock %d: %R[sockerr]\n",
+ __func__, (void *)newpcb, sock, sockerr));
+ pxtcp_pcb_reject(newpcb, sockerr, ip_current_netif(), p);
+ return ERR_ABRT;
+}
+
+
+/**
+ * tcp_proxy_accept() callback for accepted proxied outgoing TCP
+ * connections from guest(s). This is "real" accept with three-way
+ * handshake completed.
+ */
+static err_t
+pxtcp_pcb_accept(void *arg, struct tcp_pcb *pcb, err_t error)
+{
+ struct pxtcp *pxtcp = (struct pxtcp *)arg;
+
+ LWIP_UNUSED_ARG(pcb); /* used only in asserts */
+ LWIP_UNUSED_ARG(error); /* always ERR_OK */
+
+ LWIP_ASSERT1(pxtcp != NULL);
+ LWIP_ASSERT1(pxtcp->pcb = pcb);
+ LWIP_ASSERT1(pcb->callback_arg == pxtcp);
+
+ /* send any inbound data that are already queued */
+ pxtcp_pcb_forward_inbound(pxtcp);
+ return ERR_OK;
+}
+
+
+/**
+ * Initial poll manager callback for proxied outgoing TCP connections.
+ * pxtcp_pcb_accept() sets pxtcp::pmhdl::callback to this.
+ *
+ * Waits for connect(2) to the destination to complete. On success
+ * replaces itself with pxtcp_pmgr_pump() callback common to all
+ * established TCP connections.
+ */
+static int
+pxtcp_pmgr_connect(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pxtcp *pxtcp;
+ RT_NOREF(fd);
+
+ pxtcp = (struct pxtcp *)handler->data;
+ LWIP_ASSERT1(handler == &pxtcp->pmhdl);
+ LWIP_ASSERT1(fd == pxtcp->sock);
+ LWIP_ASSERT1(pxtcp->sockerr == 0);
+
+ if (revents & POLLNVAL) {
+ pxtcp->sock = INVALID_SOCKET;
+ pxtcp->sockerr = ETIMEDOUT;
+ return pxtcp_schedule_reject(pxtcp);
+ }
+
+ /*
+ * Solaris and NetBSD don't report either POLLERR or POLLHUP when
+ * connect(2) fails, just POLLOUT. In that case we always need to
+ * check SO_ERROR.
+ */
+#if defined(RT_OS_SOLARIS) || defined(RT_OS_NETBSD)
+# define CONNECT_CHECK_ERROR POLLOUT
+#else
+# define CONNECT_CHECK_ERROR (POLLERR | POLLHUP)
+#endif
+
+ /*
+ * Check the cause of the failure so that pxtcp_pcb_reject() may
+ * behave accordingly.
+ */
+ if (revents & CONNECT_CHECK_ERROR) {
+ socklen_t optlen = (socklen_t)sizeof(pxtcp->sockerr);
+ int status;
+ SOCKET s;
+
+ status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
+ (char *)&pxtcp->sockerr, &optlen);
+ if (RT_UNLIKELY(status == SOCKET_ERROR)) { /* should not happen */
+ DPRINTF(("%s: sock %d: SO_ERROR failed: %R[sockerr]\n",
+ __func__, fd, SOCKERRNO()));
+ pxtcp->sockerr = ETIMEDOUT;
+ }
+ else {
+ /* don't spam this log on successful connect(2) */
+ if ((revents & (POLLERR | POLLHUP)) /* we were told it's failed */
+ || pxtcp->sockerr != 0) /* we determined it's failed */
+ {
+ DPRINTF(("%s: sock %d: connect: %R[sockerr]\n",
+ __func__, fd, pxtcp->sockerr));
+ }
+
+ if ((revents & (POLLERR | POLLHUP))
+ && RT_UNLIKELY(pxtcp->sockerr == 0))
+ {
+ /* if we're told it's failed, make sure it's marked as such */
+ pxtcp->sockerr = ETIMEDOUT;
+ }
+ }
+
+ if (pxtcp->sockerr != 0) {
+ s = pxtcp->sock;
+ pxtcp->sock = INVALID_SOCKET;
+ closesocket(s);
+ return pxtcp_schedule_reject(pxtcp);
+ }
+ }
+
+ if (revents & POLLOUT) { /* connect is successful */
+ /* confirm accept to the guest */
+ proxy_lwip_post(&pxtcp->msg_accept);
+
+ /*
+ * Switch to common callback used for all established proxied
+ * connections.
+ */
+ pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
+
+ /*
+ * Initially we poll for incoming traffic only. Outgoing
+ * traffic is fast-forwarded by pxtcp_pcb_recv(); if it fails
+ * it will ask us to poll for POLLOUT too.
+ */
+ pxtcp->events = POLLIN;
+ return pxtcp->events;
+ }
+
+ /* should never get here */
+ DPRINTF0(("%s: pxtcp %p, sock %d: unexpected revents 0x%x\n",
+ __func__, (void *)pxtcp, fd, revents));
+ return pxtcp_schedule_reset(pxtcp);
+}
+
+
+/**
+ * Called from poll manager thread via pxtcp::msg_accept when proxy
+ * connected to the destination. Finalize accept by sending SYN|ACK
+ * to the guest.
+ */
+static void
+pxtcp_pcb_accept_confirm(void *ctx)
+{
+ struct pxtcp *pxtcp = (struct pxtcp *)ctx;
+ err_t error;
+
+ LWIP_ASSERT1(pxtcp != NULL);
+ if (pxtcp->pcb == NULL) {
+ return;
+ }
+
+ /* we are not going to reply with ICMP, so we can drop initial pbuf */
+ if (pxtcp->unsent != NULL) {
+ pbuf_free(pxtcp->unsent);
+ pxtcp->unsent = NULL;
+ }
+
+ error = tcp_proxy_accept_confirm(pxtcp->pcb);
+
+ /*
+ * If lwIP failed to enqueue SYN|ACK because it's out of pbufs it
+ * abandons the pcb. Retrying that is not very easy, since it
+ * would require keeping "fractional state". From guest's point
+ * of view there is no reply to its SYN so it will either resend
+ * the SYN (effetively triggering full connection retry for us),
+ * or it will eventually time out.
+ */
+ if (error == ERR_ABRT) {
+ pxtcp->pcb = NULL; /* pcb is gone */
+ pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
+ }
+
+ /*
+ * else if (error != ERR_OK): even if tcp_output() failed with
+ * ERR_MEM - don't give up, that SYN|ACK is enqueued and will be
+ * retransmitted eventually.
+ */
+}
+
+
+/**
+ * Entry point for port-forwarding.
+ *
+ * fwtcp accepts new incoming connection, creates pxtcp for the socket
+ * (with no pcb yet) and adds it to the poll manager (polling for
+ * errors only). Then it calls this function to construct the pcb and
+ * perform connection to the guest.
+ */
+void
+pxtcp_pcb_connect(struct pxtcp *pxtcp, const struct fwspec *fwspec)
+{
+ struct sockaddr_storage ss;
+ socklen_t sslen;
+ struct tcp_pcb *pcb;
+ ipX_addr_t src_addr, dst_addr;
+ u16_t src_port, dst_port;
+ int status;
+ err_t error;
+
+ LWIP_ASSERT1(pxtcp != NULL);
+ LWIP_ASSERT1(pxtcp->pcb == NULL);
+ LWIP_ASSERT1(fwspec->stype == SOCK_STREAM);
+
+ pcb = tcp_new();
+ if (pcb == NULL) {
+ goto reset;
+ }
+
+ tcp_setprio(pcb, TCP_PRIO_MAX);
+ pxtcp_pcb_associate(pxtcp, pcb);
+
+ sslen = sizeof(ss);
+ status = getpeername(pxtcp->sock, (struct sockaddr *)&ss, &sslen);
+ if (status == SOCKET_ERROR) {
+ goto reset;
+ }
+
+ /* nit: compares PF and AF, but they are the same everywhere */
+ LWIP_ASSERT1(ss.ss_family == fwspec->sdom);
+
+ status = fwany_ipX_addr_set_src(&src_addr, (const struct sockaddr *)&ss);
+ if (status == PXREMAP_FAILED) {
+ goto reset;
+ }
+
+ if (ss.ss_family == PF_INET) {
+ const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss;
+
+ src_port = peer4->sin_port;
+
+ memcpy(&dst_addr.ip4, &fwspec->dst.sin.sin_addr, sizeof(ip_addr_t));
+ dst_port = fwspec->dst.sin.sin_port;
+ }
+ else { /* PF_INET6 */
+ const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss;
+ ip_set_v6(pcb, 1);
+
+ src_port = peer6->sin6_port;
+
+ memcpy(&dst_addr.ip6, &fwspec->dst.sin6.sin6_addr, sizeof(ip6_addr_t));
+ dst_port = fwspec->dst.sin6.sin6_port;
+ }
+
+ /* lwip port arguments are in host order */
+ src_port = ntohs(src_port);
+ dst_port = ntohs(dst_port);
+
+ error = tcp_proxy_bind(pcb, ipX_2_ip(&src_addr), src_port);
+ if (error != ERR_OK) {
+ goto reset;
+ }
+
+ error = tcp_connect(pcb, ipX_2_ip(&dst_addr), dst_port,
+ /* callback: */ pxtcp_pcb_connected);
+ if (error != ERR_OK) {
+ goto reset;
+ }
+
+ return;
+
+ reset:
+ pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
+}
+
+
+/**
+ * Port-forwarded connection to guest is successful, pump data.
+ */
+static err_t
+pxtcp_pcb_connected(void *arg, struct tcp_pcb *pcb, err_t error)
+{
+ struct pxtcp *pxtcp = (struct pxtcp *)arg;
+
+ LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
+ LWIP_UNUSED_ARG(error);
+
+ LWIP_ASSERT1(pxtcp != NULL);
+ LWIP_ASSERT1(pxtcp->pcb == pcb);
+ LWIP_ASSERT1(pcb->callback_arg == pxtcp);
+ LWIP_UNUSED_ARG(pcb);
+
+ DPRINTF0(("%s: new pxtcp %p; pcb %p; sock %d\n",
+ __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
+
+ /* ACK on connection is like ACK on data in pxtcp_pcb_sent() */
+ pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
+
+ return ERR_OK;
+}
+
+
+/**
+ * tcp_recv() callback.
+ */
+static err_t
+pxtcp_pcb_recv(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t error)
+{
+ struct pxtcp *pxtcp = (struct pxtcp *)arg;
+
+ LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
+ LWIP_UNUSED_ARG(error);
+
+ LWIP_ASSERT1(pxtcp != NULL);
+ LWIP_ASSERT1(pxtcp->pcb == pcb);
+ LWIP_ASSERT1(pcb->callback_arg == pxtcp);
+ LWIP_UNUSED_ARG(pcb);
+
+
+ /*
+ * Have we done sending previous batch?
+ */
+ if (pxtcp->unsent != NULL) {
+ if (p != NULL) {
+ /*
+ * Return an error to tell TCP to hold onto that pbuf.
+ * It will be presented to us later from tcp_fasttmr().
+ */
+ return ERR_WOULDBLOCK;
+ }
+ else {
+ /*
+ * Unlike data, p == NULL indicating orderly shutdown is
+ * NOT presented to us again
+ */
+ pxtcp->outbound_close = 1;
+ return ERR_OK;
+ }
+ }
+
+
+ /*
+ * Guest closed?
+ */
+ if (p == NULL) {
+ pxtcp->outbound_close = 1;
+ pxtcp_pcb_forward_outbound_close(pxtcp);
+ return ERR_OK;
+ }
+
+
+ /*
+ * Got data, send what we can without blocking.
+ */
+ return pxtcp_pcb_forward_outbound(pxtcp, p);
+}
+
+
+/**
+ * Guest half-closed its TX side of the connection.
+ *
+ * Called either immediately from pxtcp_pcb_recv() when it gets NULL,
+ * or from pxtcp_pcb_forward_outbound() when it finishes forwarding
+ * previously unsent data and sees pxtcp::outbound_close flag saved by
+ * pxtcp_pcb_recv().
+ */
+static void
+pxtcp_pcb_forward_outbound_close(struct pxtcp *pxtcp)
+{
+ struct tcp_pcb *pcb;
+
+ LWIP_ASSERT1(pxtcp != NULL);
+ LWIP_ASSERT1(pxtcp->outbound_close);
+ LWIP_ASSERT1(!pxtcp->outbound_close_done);
+
+ pcb = pxtcp->pcb;
+ LWIP_ASSERT1(pcb != NULL);
+
+ DPRINTF(("outbound_close: pxtcp %p; pcb %p %s\n",
+ (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
+
+
+ /* set the flag first, since shutdown() may trigger POLLHUP */
+ pxtcp->outbound_close_done = 1;
+ shutdown(pxtcp->sock, SHUT_WR); /* half-close the socket */
+
+#if !(HAVE_TCP_POLLHUP & POLLOUT)
+ /*
+ * We need to nudge poll manager manually, since OS will not
+ * report POLLHUP.
+ */
+ if (pxtcp->inbound_close) {
+ pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_DEL, pxtcp);
+ }
+#endif
+
+
+ /* no more outbound data coming to us */
+ tcp_recv(pcb, NULL);
+
+ /*
+ * If we have already done inbound close previously (active close
+ * on the pcb), then we must not hold onto a pcb in TIME_WAIT
+ * state since those will be recycled by lwip when it runs out of
+ * free pcbs in the pool.
+ *
+ * The test is true also for a pcb in CLOSING state that waits
+ * just for the ACK of its FIN (to transition to TIME_WAIT).
+ */
+ if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
+ pxtcp_pcb_dissociate(pxtcp);
+ }
+}
+
+
+/**
+ * Forward outbound data from pcb to socket.
+ *
+ * Called by pxtcp_pcb_recv() to forward new data and by callout
+ * triggered by POLLOUT on the socket to send previously unsent data.
+ *
+ * (Re)scehdules one-time callout if not all data are sent.
+ */
+static err_t
+pxtcp_pcb_forward_outbound(struct pxtcp *pxtcp, struct pbuf *p)
+{
+ struct pbuf *qs, *q;
+ size_t qoff;
+ size_t forwarded;
+ int sockerr;
+
+ LWIP_ASSERT1(pxtcp->unsent == NULL || pxtcp->unsent == p);
+
+ forwarded = 0;
+ sockerr = 0;
+
+ q = NULL;
+ qoff = 0;
+
+ qs = p;
+ while (qs != NULL) {
+ IOVEC iov[8];
+ const size_t iovsize = sizeof(iov)/sizeof(iov[0]);
+ size_t fwd1;
+ ssize_t nsent;
+ size_t i;
+
+ fwd1 = 0;
+ for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
+ LWIP_ASSERT1(q->len > 0);
+ IOVEC_SET_BASE(iov[i], q->payload);
+ IOVEC_SET_LEN(iov[i], q->len);
+ fwd1 += q->len;
+ }
+
+ /*
+ * TODO: This is where application-level proxy can hook into
+ * to process outbound traffic.
+ */
+ nsent = pxtcp_sock_send(pxtcp, iov, i);
+
+ if (nsent == (ssize_t)fwd1) {
+ /* successfully sent this chain fragment completely */
+ forwarded += nsent;
+ qs = q;
+ }
+ else if (nsent >= 0) {
+ /* successfully sent only some data */
+ forwarded += nsent;
+
+ /* find the first pbuf that was not completely forwarded */
+ qoff = nsent;
+ for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
+ if (qoff < q->len) {
+ break;
+ }
+ qoff -= q->len;
+ }
+ LWIP_ASSERT1(q != NULL);
+ LWIP_ASSERT1(qoff < q->len);
+ break;
+ }
+ else {
+ sockerr = -nsent;
+
+ /*
+ * Some errors are really not errors - if we get them,
+ * it's not different from getting nsent == 0, so filter
+ * them out here.
+ */
+ if (proxy_error_is_transient(sockerr)) {
+ sockerr = 0;
+ }
+ q = qs;
+ qoff = 0;
+ break;
+ }
+ }
+
+ if (forwarded > 0) {
+ DPRINTF2(("forward_outbound: pxtcp %p, pcb %p: sent %d bytes\n",
+ (void *)pxtcp, (void *)pxtcp->pcb, (int)forwarded));
+ tcp_recved(pxtcp->pcb, (u16_t)forwarded);
+ }
+
+ if (q == NULL) { /* everything is forwarded? */
+ LWIP_ASSERT1(sockerr == 0);
+ LWIP_ASSERT1(forwarded == p->tot_len);
+
+ pxtcp->unsent = NULL;
+ pbuf_free(p);
+ if (pxtcp->outbound_close) {
+ pxtcp_pcb_forward_outbound_close(pxtcp);
+ }
+ }
+ else {
+ if (q != p) {
+ /* free forwarded pbufs at the beginning of the chain */
+ pbuf_ref(q);
+ pbuf_free(p);
+ }
+ if (qoff > 0) {
+ /* advance payload pointer past the forwarded part */
+ pbuf_header(q, -(s16_t)qoff);
+ }
+ pxtcp->unsent = q;
+ DPRINTF2(("forward_outbound: pxtcp %p, pcb %p: kept %d bytes\n",
+ (void *)pxtcp, (void *)pxtcp->pcb, (int)q->tot_len));
+
+ /*
+ * Have sendmsg() failed?
+ *
+ * Connection reset will be detected by poll and
+ * pxtcp_schedule_reset() will be called.
+ *
+ * Otherwise something *really* unexpected must have happened,
+ * so we'd better abort.
+ */
+ if (sockerr != 0 && sockerr != ECONNRESET) {
+ struct tcp_pcb *pcb = pxtcp->pcb;
+ DPRINTF2(("forward_outbound: pxtcp %p, pcb %p: %R[sockerr]\n",
+ (void *)pxtcp, (void *)pcb, sockerr));
+
+ pxtcp_pcb_dissociate(pxtcp);
+
+ tcp_abort(pcb);
+
+ /* call error callback manually since we've already dissociated */
+ pxtcp_pcb_err((void *)pxtcp, ERR_ABRT);
+ return ERR_ABRT;
+ }
+
+ /* schedule one-shot POLLOUT on the socket */
+ pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp);
+ }
+ return ERR_OK;
+}
+
+
+#if !defined(RT_OS_WINDOWS)
+static ssize_t
+pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
+{
+ struct msghdr mh;
+ ssize_t nsent;
+
+#ifdef MSG_NOSIGNAL
+ const int send_flags = MSG_NOSIGNAL;
+#else
+ const int send_flags = 0;
+#endif
+
+ memset(&mh, 0, sizeof(mh));
+
+ mh.msg_iov = iov;
+ mh.msg_iovlen = iovlen;
+
+ nsent = sendmsg(pxtcp->sock, &mh, send_flags);
+ if (nsent < 0) {
+ nsent = -SOCKERRNO();
+ }
+
+ return nsent;
+}
+#else /* RT_OS_WINDOWS */
+static ssize_t
+pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
+{
+ DWORD nsent;
+ int status;
+
+ status = WSASend(pxtcp->sock, iov, (DWORD)iovlen, &nsent,
+ 0, NULL, NULL);
+ if (status == SOCKET_ERROR) {
+ return -SOCKERRNO();
+ }
+
+ return nsent;
+}
+#endif /* RT_OS_WINDOWS */
+
+
+/**
+ * Callback from poll manager (on POLLOUT) to send data from
+ * pxtcp::unsent pbuf to socket.
+ */
+static void
+pxtcp_pcb_write_outbound(void *ctx)
+{
+ struct pxtcp *pxtcp = (struct pxtcp *)ctx;
+ LWIP_ASSERT1(pxtcp != NULL);
+
+ if (pxtcp->pcb == NULL) {
+ return;
+ }
+
+ pxtcp_pcb_forward_outbound(pxtcp, pxtcp->unsent);
+}
+
+
+/**
+ * Common poll manager callback used by both outgoing and incoming
+ * (port-forwarded) connections that has connected socket.
+ */
+static int
+pxtcp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pxtcp *pxtcp;
+ int status;
+ int sockerr;
+ RT_NOREF(fd);
+
+ pxtcp = (struct pxtcp *)handler->data;
+ LWIP_ASSERT1(handler == &pxtcp->pmhdl);
+ LWIP_ASSERT1(fd == pxtcp->sock);
+
+ if (revents & POLLNVAL) {
+ pxtcp->sock = INVALID_SOCKET;
+ return pxtcp_schedule_reset(pxtcp);
+ }
+
+ if (revents & POLLERR) {
+ socklen_t optlen = (socklen_t)sizeof(sockerr);
+
+ status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
+ (char *)&sockerr, &optlen);
+ if (status == SOCKET_ERROR) { /* should not happen */
+ DPRINTF(("sock %d: POLLERR: SO_ERROR failed: %R[sockerr]\n",
+ fd, SOCKERRNO()));
+ }
+ else {
+ DPRINTF0(("sock %d: POLLERR: %R[sockerr]\n", fd, sockerr));
+ }
+ return pxtcp_schedule_reset(pxtcp);
+ }
+
+ if (revents & POLLOUT) {
+ pxtcp->events &= ~POLLOUT;
+ proxy_lwip_post(&pxtcp->msg_outbound);
+ }
+
+ if (revents & POLLIN) {
+ ssize_t nread;
+ int stop_pollin;
+
+ nread = pxtcp_sock_read(pxtcp, &stop_pollin);
+ if (nread < 0) {
+ sockerr = -(int)nread;
+ DPRINTF0(("sock %d: POLLIN: %R[sockerr]\n", fd, sockerr));
+ return pxtcp_schedule_reset(pxtcp);
+ }
+
+ if (stop_pollin) {
+ pxtcp->events &= ~POLLIN;
+ }
+
+ if (nread > 0) {
+ proxy_lwip_post(&pxtcp->msg_inbound);
+#if !HAVE_TCP_POLLHUP
+ /*
+ * If host does not report POLLHUP for closed sockets
+ * (e.g. NetBSD) we should check for full close manually.
+ */
+ if (pxtcp->inbound_close && pxtcp->outbound_close_done) {
+ LWIP_ASSERT1((revents & POLLHUP) == 0);
+ return pxtcp_schedule_delete(pxtcp);
+ }
+#endif
+ }
+ }
+
+#if !HAVE_TCP_POLLHUP
+ LWIP_ASSERT1((revents & POLLHUP) == 0);
+#else
+ if (revents & POLLHUP) {
+ DPRINTF(("sock %d: HUP\n", fd));
+
+#if HAVE_TCP_POLLHUP == POLLIN
+ /*
+ * XXX: OSX reports POLLHUP once more when inbound is already
+ * half-closed (which has already been reported as a "normal"
+ * POLLHUP, handled below), the socket is polled for POLLOUT
+ * (guest sends a lot of data that we can't push out fast
+ * enough), and remote sends a reset - e.g. an http client
+ * that half-closes after request and then aborts the transfer.
+ *
+ * It really should have been reported as POLLERR, but it
+ * seems OSX never reports POLLERR for sockets.
+ */
+#if defined(RT_OS_DARWIN)
+ {
+ socklen_t optlen = (socklen_t)sizeof(sockerr);
+
+ status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
+ (char *)&sockerr, &optlen);
+ if (status == SOCKET_ERROR) { /* should not happen */
+ DPRINTF(("sock %d: POLLHUP: SO_ERROR failed: %R[sockerr]\n",
+ fd, SOCKERRNO()));
+ sockerr = ECONNRESET;
+ }
+ else if (sockerr != 0) {
+ DPRINTF0(("sock %d: POLLHUP: %R[sockerr]\n", fd, sockerr));
+ }
+
+ if (sockerr != 0) { /* XXX: should have been POLLERR */
+ return pxtcp_schedule_reset(pxtcp);
+ }
+ }
+#endif /* RT_OS_DARWIN */
+
+ /*
+ * Remote closed inbound.
+ */
+ if (!pxtcp->outbound_close_done) {
+ /*
+ * We might still need to poll for POLLOUT, but we can not
+ * poll for POLLIN anymore (even if not all data are read)
+ * because we will be spammed by POLLHUP.
+ */
+ pxtcp->events &= ~POLLIN;
+ if (!pxtcp->inbound_close) {
+ /* the rest of the input has to be pulled */
+ proxy_lwip_post(&pxtcp->msg_inpull);
+ }
+ }
+ else
+#endif
+ /*
+ * Both directions are closed.
+ */
+ {
+ LWIP_ASSERT1(pxtcp->outbound_close_done);
+
+ if (pxtcp->inbound_close) {
+ /* there's no unread data, we are done */
+ return pxtcp_schedule_delete(pxtcp);
+ }
+ else {
+ /* pull the rest of the input first (deferred_delete) */
+ pxtcp->pmhdl.slot = -1;
+ proxy_lwip_post(&pxtcp->msg_inpull);
+ return -1;
+ }
+ /* NOTREACHED */
+ }
+
+ }
+#endif /* HAVE_TCP_POLLHUP */
+
+ return pxtcp->events;
+}
+
+
+/**
+ * Read data from socket to ringbuf. This may be used both on lwip
+ * and poll manager threads.
+ *
+ * Flag pointed to by pstop is set when further reading is impossible,
+ * either temporary when buffer is full, or permanently when EOF is
+ * received.
+ *
+ * Returns number of bytes read. NB: EOF is reported as 1!
+ *
+ * Returns zero if nothing was read, either because buffer is full, or
+ * if no data is available (EWOULDBLOCK, EINTR &c).
+ *
+ * Returns -errno on real socket errors.
+ */
+static ssize_t
+pxtcp_sock_read(struct pxtcp *pxtcp, int *pstop)
+{
+ IOVEC iov[2];
+ size_t iovlen;
+ ssize_t nread;
+
+ const size_t sz = pxtcp->inbuf.bufsize;
+ size_t beg, lim, wrnew;
+
+ *pstop = 0;
+
+ beg = pxtcp->inbuf.vacant;
+ IOVEC_SET_BASE(iov[0], &pxtcp->inbuf.buf[beg]);
+
+ /* lim is the index we can NOT write to */
+ lim = pxtcp->inbuf.unacked;
+ if (lim == 0) {
+ lim = sz - 1; /* empty slot at the end */
+ }
+ else if (lim == 1 && beg != 0) {
+ lim = sz; /* empty slot at the beginning */
+ }
+ else {
+ --lim;
+ }
+
+ if (beg == lim) {
+ /*
+ * Buffer is full, stop polling for POLLIN.
+ *
+ * pxtcp_pcb_sent() will re-enable POLLIN when guest ACKs
+ * data, freeing space in the ring buffer.
+ */
+ *pstop = 1;
+ return 0;
+ }
+
+ if (beg < lim) {
+ /* free space in one chunk */
+ iovlen = 1;
+ IOVEC_SET_LEN(iov[0], lim - beg);
+ }
+ else {
+ /* free space in two chunks */
+ iovlen = 2;
+ IOVEC_SET_LEN(iov[0], sz - beg);
+ IOVEC_SET_BASE(iov[1], &pxtcp->inbuf.buf[0]);
+ IOVEC_SET_LEN(iov[1], lim);
+ }
+
+ /*
+ * TODO: This is where application-level proxy can hook into to
+ * process inbound traffic.
+ */
+ nread = pxtcp_sock_recv(pxtcp, iov, iovlen);
+
+ if (nread > 0) {
+ wrnew = beg + nread;
+ if (wrnew >= sz) {
+ wrnew -= sz;
+ }
+ pxtcp->inbuf.vacant = wrnew;
+ DPRINTF2(("pxtcp %p: sock %d read %d bytes\n",
+ (void *)pxtcp, pxtcp->sock, (int)nread));
+ return nread;
+ }
+ else if (nread == 0) {
+ *pstop = 1;
+ pxtcp->inbound_close = 1;
+ DPRINTF2(("pxtcp %p: sock %d read EOF\n",
+ (void *)pxtcp, pxtcp->sock));
+ return 1;
+ }
+ else {
+ int sockerr = -nread;
+
+ if (proxy_error_is_transient(sockerr)) {
+ /* haven't read anything, just return */
+ DPRINTF2(("pxtcp %p: sock %d read cancelled\n",
+ (void *)pxtcp, pxtcp->sock));
+ return 0;
+ }
+ else {
+ /* socket error! */
+ DPRINTF0(("pxtcp %p: sock %d read: %R[sockerr]\n",
+ (void *)pxtcp, pxtcp->sock, sockerr));
+ return -sockerr;
+ }
+ }
+}
+
+
+#if !defined(RT_OS_WINDOWS)
+static ssize_t
+pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
+{
+ struct msghdr mh;
+ ssize_t nread;
+
+ memset(&mh, 0, sizeof(mh));
+
+ mh.msg_iov = iov;
+ mh.msg_iovlen = iovlen;
+
+ nread = recvmsg(pxtcp->sock, &mh, 0);
+ if (nread < 0) {
+ nread = -SOCKERRNO();
+ }
+
+ return nread;
+}
+#else /* RT_OS_WINDOWS */
+static ssize_t
+pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
+{
+ DWORD flags;
+ DWORD nread;
+ int status;
+
+ flags = 0;
+ status = WSARecv(pxtcp->sock, iov, (DWORD)iovlen, &nread,
+ &flags, NULL, NULL);
+ if (status == SOCKET_ERROR) {
+ return -SOCKERRNO();
+ }
+
+ return (ssize_t)nread;
+}
+#endif /* RT_OS_WINDOWS */
+
+
+/**
+ * Callback from poll manager (pxtcp::msg_inbound) to trigger output
+ * from ringbuf to guest.
+ */
+static void
+pxtcp_pcb_write_inbound(void *ctx)
+{
+ struct pxtcp *pxtcp = (struct pxtcp *)ctx;
+ LWIP_ASSERT1(pxtcp != NULL);
+
+ if (pxtcp->pcb == NULL) {
+ return;
+ }
+
+ pxtcp_pcb_forward_inbound(pxtcp);
+}
+
+
+/**
+ * tcp_poll() callback
+ *
+ * We swtich it on when tcp_write() or tcp_shutdown() fail with
+ * ERR_MEM to prevent connection from stalling. If there are ACKs or
+ * more inbound data then pxtcp_pcb_forward_inbound() will be
+ * triggered again, but if neither happens, tcp_poll() comes to the
+ * rescue.
+ */
+static err_t
+pxtcp_pcb_poll(void *arg, struct tcp_pcb *pcb)
+{
+ struct pxtcp *pxtcp = (struct pxtcp *)arg;
+ LWIP_UNUSED_ARG(pcb);
+
+ DPRINTF2(("%s: pxtcp %p; pcb %p\n",
+ __func__, (void *)pxtcp, (void *)pxtcp->pcb));
+
+ pxtcp_pcb_forward_inbound(pxtcp);
+
+ /*
+ * If the last thing holding up deletion of the pxtcp was failed
+ * tcp_shutdown() and it succeeded, we may be the last callback.
+ */
+ pxtcp_pcb_maybe_deferred_delete(pxtcp);
+
+ return ERR_OK;
+}
+
+
+static void
+pxtcp_pcb_schedule_poll(struct pxtcp *pxtcp)
+{
+ tcp_poll(pxtcp->pcb, pxtcp_pcb_poll, 0);
+}
+
+
+static void
+pxtcp_pcb_cancel_poll(struct pxtcp *pxtcp)
+{
+ tcp_poll(pxtcp->pcb, NULL, 255);
+}
+
+
+/**
+ * Forward inbound data from ring buffer to the guest.
+ *
+ * Scheduled by poll manager thread after it receives more data into
+ * the ring buffer (we have more data to send).
+
+ * Also called from tcp_sent() callback when guest ACKs some data,
+ * increasing pcb->snd_buf (we are permitted to send more data).
+ *
+ * Also called from tcp_poll() callback if previous attempt to forward
+ * inbound data failed with ERR_MEM (we need to try again).
+ */
+static void
+pxtcp_pcb_forward_inbound(struct pxtcp *pxtcp)
+{
+ struct tcp_pcb *pcb;
+ size_t sndbuf;
+ size_t beg, lim, sndlim;
+ size_t toeob, tolim;
+ size_t nsent;
+ err_t error;
+
+ LWIP_ASSERT1(pxtcp != NULL);
+ pcb = pxtcp->pcb;
+ if (pcb == NULL) {
+ return;
+ }
+
+ if (/* __predict_false */ pcb->state < ESTABLISHED) {
+ /*
+ * If we have just confirmed accept of this connection, the
+ * pcb is in SYN_RCVD state and we still haven't received the
+ * ACK of our SYN. It's only in SYN_RCVD -> ESTABLISHED
+ * transition that lwip decrements pcb->acked so that that ACK
+ * is not reported to pxtcp_pcb_sent(). If we send something
+ * now and immediately close (think "daytime", e.g.) while
+ * still in SYN_RCVD state, we will move directly to
+ * FIN_WAIT_1 and when our confirming SYN is ACK'ed lwip will
+ * report it to pxtcp_pcb_sent().
+ */
+ DPRINTF2(("forward_inbound: pxtcp %p; pcb %p %s - later...\n",
+ (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
+ return;
+ }
+
+
+ beg = pxtcp->inbuf.unsent; /* private to lwip thread */
+ lim = pxtcp->inbuf.vacant;
+
+ if (beg == lim) {
+ if (pxtcp->inbound_close && !pxtcp->inbound_close_done) {
+ pxtcp_pcb_forward_inbound_close(pxtcp);
+ tcp_output(pcb);
+ return;
+ }
+
+ /*
+ * Else, there's no data to send.
+ *
+ * If there is free space in the buffer, producer will
+ * reschedule us as it receives more data and vacant (lim)
+ * advances.
+ *
+ * If buffer is full when all data have been passed to
+ * tcp_write() but not yet acknowledged, we will advance
+ * unacked on ACK, freeing some space for producer to write to
+ * (then see above).
+ */
+ return;
+ }
+
+ sndbuf = tcp_sndbuf(pcb);
+ if (sndbuf == 0) {
+ /*
+ * Can't send anything now. As guest ACKs some data, TCP will
+ * call pxtcp_pcb_sent() callback and we will come here again.
+ */
+ return;
+ }
+
+ nsent = 0;
+
+ /*
+ * We have three limits to consider:
+ * - how much data we have in the ringbuf
+ * - how much data we are allowed to send
+ * - ringbuf size
+ */
+ toeob = pxtcp->inbuf.bufsize - beg;
+ if (lim < beg) { /* lim wrapped */
+ if (sndbuf < toeob) { /* but we are limited by sndbuf */
+ /* so beg is not going to wrap, treat sndbuf as lim */
+ lim = beg + sndbuf; /* ... and proceed to the simple case */
+ }
+ else { /* we are limited by the end of the buffer, beg will wrap */
+ u8_t maybemore;
+ if (toeob == sndbuf || lim == 0) {
+ maybemore = 0;
+ }
+ else {
+ maybemore = TCP_WRITE_FLAG_MORE;
+ }
+
+ Assert(toeob == (u16_t)toeob);
+ error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)toeob, maybemore);
+ if (error != ERR_OK) {
+ goto writeerr;
+ }
+ nsent += toeob;
+ pxtcp->inbuf.unsent = 0; /* wrap */
+
+ if (maybemore) {
+ beg = 0;
+ sndbuf -= toeob;
+ }
+ else {
+ /* we are done sending, but ... */
+ goto check_inbound_close;
+ }
+ }
+ }
+
+ LWIP_ASSERT1(beg < lim);
+ sndlim = beg + sndbuf;
+ if (lim > sndlim) {
+ lim = sndlim;
+ }
+ tolim = lim - beg;
+ if (tolim > 0) {
+ error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)tolim, 0);
+ if (error != ERR_OK) {
+ goto writeerr;
+ }
+ nsent += tolim;
+ pxtcp->inbuf.unsent = lim;
+ }
+
+ check_inbound_close:
+ if (pxtcp->inbound_close && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant) {
+ pxtcp_pcb_forward_inbound_close(pxtcp);
+ }
+
+ DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes\n",
+ (void *)pxtcp, (void *)pcb, (int)nsent));
+ tcp_output(pcb);
+ pxtcp_pcb_cancel_poll(pxtcp);
+ return;
+
+ writeerr:
+ if (error == ERR_MEM) {
+ if (nsent > 0) { /* first write succeeded, second failed */
+ DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes only\n",
+ (void *)pxtcp, (void *)pcb, (int)nsent));
+ tcp_output(pcb);
+ }
+ DPRINTF(("forward_inbound: pxtcp %p, pcb %p: ERR_MEM\n",
+ (void *)pxtcp, (void *)pcb));
+ pxtcp_pcb_schedule_poll(pxtcp);
+ }
+ else {
+ DPRINTF(("forward_inbound: pxtcp %p, pcb %p: %s\n",
+ (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
+
+ /* XXX: We shouldn't get ERR_ARG. Check ERR_CONN conditions early? */
+ LWIP_ASSERT1(error == ERR_MEM);
+ }
+}
+
+
+static void
+pxtcp_pcb_forward_inbound_close(struct pxtcp *pxtcp)
+{
+ struct tcp_pcb *pcb;
+ err_t error;
+
+ LWIP_ASSERT1(pxtcp != NULL);
+ LWIP_ASSERT1(pxtcp->inbound_close);
+ LWIP_ASSERT1(!pxtcp->inbound_close_done);
+ LWIP_ASSERT1(pxtcp->inbuf.unsent == pxtcp->inbuf.vacant);
+
+ pcb = pxtcp->pcb;
+ LWIP_ASSERT1(pcb != NULL);
+
+ DPRINTF(("inbound_close: pxtcp %p; pcb %p: %s\n",
+ (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
+
+ error = tcp_shutdown(pcb, /*RX*/ 0, /*TX*/ 1);
+ if (error != ERR_OK) {
+ DPRINTF(("inbound_close: pxtcp %p; pcb %p:"
+ " tcp_shutdown: error=%s\n",
+ (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
+ pxtcp_pcb_schedule_poll(pxtcp);
+ return;
+ }
+
+ pxtcp_pcb_cancel_poll(pxtcp);
+ pxtcp->inbound_close_done = 1;
+
+
+ /*
+ * If we have already done outbound close previously (passive
+ * close on the pcb), then we must not hold onto a pcb in LAST_ACK
+ * state since those will be deleted by lwip when that last ack
+ * comes from the guest.
+ *
+ * NB: We do NOT check for deferred delete here, even though we
+ * have just set one of its conditions, inbound_close_done. We
+ * let pcb callbacks that called us do that. It's simpler and
+ * cleaner that way.
+ */
+ if (pxtcp->outbound_close_done && pxtcp_pcb_forward_inbound_done(pxtcp)) {
+ pxtcp_pcb_dissociate(pxtcp);
+ }
+}
+
+
+/**
+ * Check that all forwarded inbound data is sent and acked, and that
+ * inbound close is scheduled (we aren't called back when it's acked).
+ */
+DECLINLINE(int)
+pxtcp_pcb_forward_inbound_done(const struct pxtcp *pxtcp)
+{
+ return (pxtcp->inbound_close_done /* also implies that all data forwarded */
+ && pxtcp->inbuf.unacked == pxtcp->inbuf.unsent);
+}
+
+
+/**
+ * tcp_sent() callback - guest acknowledged len bytes.
+ *
+ * We can advance inbuf::unacked index, making more free space in the
+ * ringbuf and wake up producer on poll manager thread.
+ *
+ * We can also try to send more data if we have any since pcb->snd_buf
+ * was increased and we are now permitted to send more.
+ */
+static err_t
+pxtcp_pcb_sent(void *arg, struct tcp_pcb *pcb, u16_t len)
+{
+ struct pxtcp *pxtcp = (struct pxtcp *)arg;
+ size_t unacked;
+
+ LWIP_ASSERT1(pxtcp != NULL);
+ LWIP_ASSERT1(pxtcp->pcb == pcb);
+ LWIP_ASSERT1(pcb->callback_arg == pxtcp);
+ LWIP_UNUSED_ARG(pcb); /* only in assert */
+
+ DPRINTF2(("%s: pxtcp %p; pcb %p: +%d ACKed:"
+ " unacked %d, unsent %d, vacant %d\n",
+ __func__, (void *)pxtcp, (void *)pcb, (int)len,
+ (int)pxtcp->inbuf.unacked,
+ (int)pxtcp->inbuf.unsent,
+ (int)pxtcp->inbuf.vacant));
+
+ if (/* __predict_false */ len == 0) {
+ /* we are notified to start pulling */
+ LWIP_ASSERT1(!pxtcp->inbound_close);
+ LWIP_ASSERT1(pxtcp->inbound_pull);
+
+ unacked = pxtcp->inbuf.unacked;
+ }
+ else {
+ /*
+ * Advance unacked index. Guest acknowledged the data, so it
+ * won't be needed again for potential retransmits.
+ */
+ unacked = pxtcp->inbuf.unacked + len;
+ if (unacked > pxtcp->inbuf.bufsize) {
+ unacked -= pxtcp->inbuf.bufsize;
+ }
+ pxtcp->inbuf.unacked = unacked;
+ }
+
+ /* arrange for more inbound data */
+ if (!pxtcp->inbound_close) {
+ if (!pxtcp->inbound_pull) {
+ /* wake up producer, in case it has stopped polling for POLLIN */
+ pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
+#ifdef RT_OS_WINDOWS
+ /**
+ * We have't got enought room in ring buffer to read atm,
+ * but we don't want to lose notification from WSAW4ME when
+ * space would be available, so we reset event with empty recv
+ */
+ recv(pxtcp->sock, NULL, 0, 0);
+#endif
+ }
+ else {
+ ssize_t nread;
+ int stop_pollin; /* ignored */
+
+ nread = pxtcp_sock_read(pxtcp, &stop_pollin);
+
+ if (nread < 0) {
+ int sockerr = -(int)nread;
+ LWIP_UNUSED_ARG(sockerr);
+ DPRINTF0(("%s: sock %d: %R[sockerr]\n",
+ __func__, pxtcp->sock, sockerr));
+
+#if HAVE_TCP_POLLHUP == POLLIN /* see counterpart in pxtcp_pmgr_pump() */
+ /*
+ * It may still be registered with poll manager for POLLOUT.
+ */
+ pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
+ return ERR_OK;
+#else
+ /*
+ * It is no longer registered with poll manager so we
+ * can kill it directly.
+ */
+ pxtcp_pcb_reset_pxtcp(pxtcp);
+ return ERR_ABRT;
+#endif
+ }
+ }
+ }
+
+ /* forward more data if we can */
+ if (!pxtcp->inbound_close_done) {
+ pxtcp_pcb_forward_inbound(pxtcp);
+
+ /*
+ * NB: we might have dissociated from a pcb that transitioned
+ * to LAST_ACK state, so don't refer to pcb below.
+ */
+ }
+
+
+ /* have we got all the acks? */
+ if (pxtcp->inbound_close /* no more new data */
+ && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant /* all data is sent */
+ && unacked == pxtcp->inbuf.unsent) /* ... and is acked */
+ {
+ char *buf;
+
+ DPRINTF(("%s: pxtcp %p; pcb %p; all data ACKed\n",
+ __func__, (void *)pxtcp, (void *)pxtcp->pcb));
+
+ /* no more retransmits, so buf is not needed */
+ buf = pxtcp->inbuf.buf;
+ pxtcp->inbuf.buf = NULL;
+ free(buf);
+
+ /* no more acks, so no more callbacks */
+ if (pxtcp->pcb != NULL) {
+ tcp_sent(pxtcp->pcb, NULL);
+ }
+
+ /*
+ * We may be the last callback for this pcb if we have also
+ * successfully forwarded inbound_close.
+ */
+ pxtcp_pcb_maybe_deferred_delete(pxtcp);
+ }
+
+ return ERR_OK;
+}
+
+
+/**
+ * Callback from poll manager (pxtcp::msg_inpull) to switch
+ * pxtcp_pcb_sent() to actively pull the last bits of input. See
+ * POLLHUP comment in pxtcp_pmgr_pump().
+ *
+ * pxtcp::sock is deregistered from poll manager after this callback
+ * is scheduled.
+ */
+static void
+pxtcp_pcb_pull_inbound(void *ctx)
+{
+ struct pxtcp *pxtcp = (struct pxtcp *)ctx;
+ LWIP_ASSERT1(pxtcp != NULL);
+
+ if (pxtcp->pcb == NULL) {
+ DPRINTF(("%s: pxtcp %p: PCB IS GONE\n", __func__, (void *)pxtcp));
+ pxtcp_pcb_reset_pxtcp(pxtcp);
+ return;
+ }
+
+ pxtcp->inbound_pull = 1;
+ if (pxtcp->pmhdl.slot < 0) {
+ DPRINTF(("%s: pxtcp %p: pcb %p (deferred delete)\n",
+ __func__, (void *)pxtcp, (void *)pxtcp->pcb));
+ pxtcp->deferred_delete = 1;
+ }
+ else {
+ DPRINTF(("%s: pxtcp %p: pcb %p\n",
+ __func__, (void *)pxtcp, (void *)pxtcp->pcb));
+ }
+
+ pxtcp_pcb_sent(pxtcp, pxtcp->pcb, 0);
+}
+
+
+/**
+ * tcp_err() callback.
+ *
+ * pcb is not passed to this callback since it may be already
+ * deallocated by the stack, but we can't do anything useful with it
+ * anyway since connection is gone.
+ */
+static void
+pxtcp_pcb_err(void *arg, err_t error)
+{
+ struct pxtcp *pxtcp = (struct pxtcp *)arg;
+ LWIP_ASSERT1(pxtcp != NULL);
+
+ /*
+ * ERR_CLSD is special - it is reported here when:
+ *
+ * . guest has already half-closed
+ * . we send FIN to guest when external half-closes
+ * . guest acks that FIN
+ *
+ * Since connection is closed but receive has been already closed
+ * lwip can only report this via tcp_err. At this point the pcb
+ * is still alive, so we can peek at it if need be.
+ *
+ * The interesting twist is when the ACK from guest that akcs our
+ * FIN also acks some data. In this scenario lwip will NOT call
+ * tcp_sent() callback with the ACK for that last bit of data but
+ * instead will call tcp_err with ERR_CLSD right away. Since that
+ * ACK also acknowledges all the data, we should run some of
+ * pxtcp_pcb_sent() logic here.
+ */
+ if (error == ERR_CLSD) {
+ struct tcp_pcb *pcb = pxtcp->pcb; /* still alive */
+
+ DPRINTF2(("ERR_CLSD: pxtcp %p; pcb %p:"
+ " pcb->acked %d;"
+ " unacked %d, unsent %d, vacant %d\n",
+ (void *)pxtcp, (void *)pcb,
+ pcb->acked,
+ (int)pxtcp->inbuf.unacked,
+ (int)pxtcp->inbuf.unsent,
+ (int)pxtcp->inbuf.vacant));
+
+ LWIP_ASSERT1(pxtcp->pcb == pcb);
+ LWIP_ASSERT1(pcb->callback_arg == pxtcp);
+
+ if (pcb->acked > 0) {
+ pxtcp_pcb_sent(pxtcp, pcb, pcb->acked);
+ }
+ return;
+ }
+
+ DPRINTF0(("tcp_err: pxtcp=%p, error=%s\n",
+ (void *)pxtcp, proxy_lwip_strerr(error)));
+
+ pxtcp->pcb = NULL; /* pcb is gone */
+ if (pxtcp->deferred_delete) {
+ pxtcp_pcb_reset_pxtcp(pxtcp);
+ }
+ else {
+ pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
+ }
+}
diff --git a/src/VBox/NetworkServices/NAT/pxtcp.h b/src/VBox/NetworkServices/NAT/pxtcp.h
new file mode 100644
index 00000000..d9aae748
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/pxtcp.h
@@ -0,0 +1,42 @@
+/* $Id: pxtcp.h $ */
+/** @file
+ * NAT Network - TCP proxy, internal interface declarations.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NAT_pxtcp_h
+#define VBOX_INCLUDED_SRC_NAT_pxtcp_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include "lwip/err.h"
+#include "lwip/ip_addr.h"
+
+struct pbuf;
+struct tcp_pcb;
+struct pxtcp;
+struct fwspec;
+
+err_t pxtcp_pcb_accept_outbound(struct tcp_pcb *, struct pbuf *, int, ipX_addr_t *, u16_t);
+
+struct pxtcp *pxtcp_create_forwarded(SOCKET);
+void pxtcp_cancel_forwarded(struct pxtcp *);
+
+void pxtcp_pcb_connect(struct pxtcp *, const struct fwspec *);
+
+int pxtcp_pmgr_add(struct pxtcp *);
+void pxtcp_pmgr_del(struct pxtcp *);
+
+#endif /* !VBOX_INCLUDED_SRC_NAT_pxtcp_h */
diff --git a/src/VBox/NetworkServices/NAT/pxudp.c b/src/VBox/NetworkServices/NAT/pxudp.c
new file mode 100644
index 00000000..0629a903
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/pxudp.c
@@ -0,0 +1,848 @@
+/* $Id: pxudp.c $ */
+/** @file
+ * NAT Network - UDP proxy.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "winutils.h"
+#include "proxy.h"
+#include "proxy_pollmgr.h"
+#include "pxremap.h"
+
+#ifndef RT_OS_WINDOWS
+#include <sys/types.h>
+#include <sys/socket.h>
+#ifdef RT_OS_DARWIN
+# define __APPLE_USE_RFC_3542
+#endif
+#include <netinet/in.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <poll.h>
+
+#include <err.h> /* BSD'ism */
+#else
+#include <stdlib.h>
+#include <iprt/stdint.h>
+#include <stdio.h>
+#include "winpoll.h"
+#endif
+
+#include "lwip/opt.h"
+
+#include "lwip/sys.h"
+#include "lwip/tcpip.h"
+#include "lwip/udp.h"
+#include "lwip/icmp.h"
+
+struct pxudp {
+ /**
+ * Our poll manager handler.
+ */
+ struct pollmgr_handler pmhdl;
+
+ /**
+ * lwIP ("internal") side of the proxied connection.
+ */
+ struct udp_pcb *pcb;
+
+ /**
+ * Host ("external") side of the proxied connection.
+ */
+ SOCKET sock;
+
+ /**
+ * Is this pcb a mapped host loopback?
+ */
+ int is_mapped;
+
+ /**
+ * Cached value of TTL socket option.
+ */
+ int ttl;
+
+ /**
+ * Cached value of TOS socket option.
+ */
+ int tos;
+
+ /**
+ * Cached value of "don't fragment" socket option.
+ */
+ int df;
+
+ /**
+ * For some protocols (notably: DNS) we know we are getting just
+ * one reply, so we don't want the pcb and the socket to sit there
+ * waiting to be g/c'ed by timeout. This field counts request and
+ * replies for them.
+ */
+ int count;
+
+ /**
+ * Mailbox for inbound pbufs.
+ *
+ * XXX: since we have single producer and single consumer we can
+ * use lockless ringbuf like for pxtcp.
+ */
+ sys_mbox_t inmbox;
+
+ /**
+ * lwIP thread's strong reference to us.
+ */
+ struct pollmgr_refptr *rp;
+
+ /*
+ * We use static messages to void malloc/free overhead.
+ */
+ struct tcpip_msg msg_delete; /* delete pxudp */
+ struct tcpip_msg msg_inbound; /* trigger send of inbound data */
+};
+
+
+static struct pxudp *pxudp_allocate(void);
+static void pxudp_drain_inmbox(struct pxudp *);
+static void pxudp_free(struct pxudp *);
+
+static struct udp_pcb *pxudp_pcb_dissociate(struct pxudp *);
+
+/* poll manager callbacks for pxudp related channels */
+static int pxudp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int);
+static int pxudp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int);
+
+/* helper functions for sending/receiving pxudp over poll manager channels */
+static ssize_t pxudp_chan_send(enum pollmgr_slot_t, struct pxudp *);
+static ssize_t pxudp_chan_send_weak(enum pollmgr_slot_t, struct pxudp *);
+static struct pxudp *pxudp_chan_recv(struct pollmgr_handler *, SOCKET, int);
+static struct pxudp *pxudp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int);
+
+/* poll manager callbacks for individual sockets */
+static int pxudp_pmgr_pump(struct pollmgr_handler *, SOCKET, int);
+
+/* convenience function for poll manager callback */
+static int pxudp_schedule_delete(struct pxudp *);
+
+/* lwip thread callbacks called via proxy_lwip_post() */
+static void pxudp_pcb_delete_pxudp(void *);
+
+/* outbound ttl check */
+static int pxudp_ttl_expired(struct pbuf *);
+
+/* udp pcb callbacks &c */
+static void pxudp_pcb_accept(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t);
+static void pxudp_pcb_recv(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t);
+static void pxudp_pcb_forward_outbound(struct pxudp *, struct pbuf *, ip_addr_t *, u16_t);
+static void pxudp_pcb_expired(struct pxudp *);
+static void pxudp_pcb_write_inbound(void *);
+static void pxudp_pcb_forward_inbound(struct pxudp *);
+
+/* poll manager handlers for pxudp channels */
+static struct pollmgr_handler pxudp_pmgr_chan_add_hdl;
+static struct pollmgr_handler pxudp_pmgr_chan_del_hdl;
+
+
+void
+pxudp_init(void)
+{
+ /*
+ * Create channels.
+ */
+ pxudp_pmgr_chan_add_hdl.callback = pxudp_pmgr_chan_add;
+ pxudp_pmgr_chan_add_hdl.data = NULL;
+ pxudp_pmgr_chan_add_hdl.slot = -1;
+ pollmgr_add_chan(POLLMGR_CHAN_PXUDP_ADD, &pxudp_pmgr_chan_add_hdl);
+
+ pxudp_pmgr_chan_del_hdl.callback = pxudp_pmgr_chan_del;
+ pxudp_pmgr_chan_del_hdl.data = NULL;
+ pxudp_pmgr_chan_del_hdl.slot = -1;
+ pollmgr_add_chan(POLLMGR_CHAN_PXUDP_DEL, &pxudp_pmgr_chan_del_hdl);
+
+ udp_proxy_accept(pxudp_pcb_accept);
+}
+
+
+/**
+ * Syntactic sugar for sending pxudp pointer over poll manager
+ * channel. Used by lwip thread functions.
+ */
+static ssize_t
+pxudp_chan_send(enum pollmgr_slot_t chan, struct pxudp *pxudp)
+{
+ return pollmgr_chan_send(chan, &pxudp, sizeof(pxudp));
+}
+
+
+/**
+ * Syntactic sugar for sending weak reference to pxudp over poll
+ * manager channel. Used by lwip thread functions.
+ */
+static ssize_t
+pxudp_chan_send_weak(enum pollmgr_slot_t chan, struct pxudp *pxudp)
+{
+ pollmgr_refptr_weak_ref(pxudp->rp);
+ return pollmgr_chan_send(chan, &pxudp->rp, sizeof(pxudp->rp));
+}
+
+
+/**
+ * Counterpart of pxudp_chan_send().
+ */
+static struct pxudp *
+pxudp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pxudp *pxudp;
+
+ pxudp = (struct pxudp *)pollmgr_chan_recv_ptr(handler, fd, revents);
+ return pxudp;
+}
+
+
+/**
+ * Counterpart of pxudp_chan_send_weak().
+ */
+struct pxudp *
+pxudp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pollmgr_refptr *rp;
+ struct pollmgr_handler *base;
+ struct pxudp *pxudp;
+
+ rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents);
+ base = (struct pollmgr_handler *)pollmgr_refptr_get(rp);
+ pxudp = (struct pxudp *)base;
+
+ return pxudp;
+}
+
+
+/**
+ * POLLMGR_CHAN_PXUDP_ADD handler.
+ *
+ * Get new pxudp from lwip thread and start polling its socket.
+ */
+static int
+pxudp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pxudp *pxudp;
+ int status;
+
+ pxudp = pxudp_chan_recv(handler, fd, revents);
+ DPRINTF(("pxudp_add: new pxudp %p; pcb %p\n",
+ (void *)pxudp, (void *)pxudp->pcb));
+
+ LWIP_ASSERT1(pxudp != NULL);
+ LWIP_ASSERT1(pxudp->pmhdl.callback != NULL);
+ LWIP_ASSERT1(pxudp->pmhdl.data = (void *)pxudp);
+ LWIP_ASSERT1(pxudp->pmhdl.slot < 0);
+
+
+ status = pollmgr_add(&pxudp->pmhdl, pxudp->sock, POLLIN);
+ if (status < 0) {
+ pxudp_schedule_delete(pxudp);
+ }
+
+ return POLLIN;
+}
+
+
+/**
+ * POLLMGR_CHAN_PXUDP_DEL handler.
+ */
+static int
+pxudp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pxudp *pxudp;
+
+ pxudp = pxudp_chan_recv_strong(handler, fd, revents);
+ if (pxudp == NULL) {
+ return POLLIN;
+ }
+
+ DPRINTF(("pxudp_del: pxudp %p; socket %d\n", (void *)pxudp, pxudp->sock));
+
+ pollmgr_del_slot(pxudp->pmhdl.slot);
+
+ /*
+ * Go back to lwip thread to delete after any pending callbacks
+ * for unprocessed inbound traffic are drained.
+ */
+ pxudp_schedule_delete(pxudp);
+
+ return POLLIN;
+}
+
+
+static struct pxudp *
+pxudp_allocate(void)
+{
+ struct pxudp *pxudp;
+ err_t error;
+
+ pxudp = (struct pxudp *)malloc(sizeof(*pxudp));
+ if (pxudp == NULL) {
+ return NULL;
+ }
+
+ pxudp->pmhdl.callback = NULL;
+ pxudp->pmhdl.data = (void *)pxudp;
+ pxudp->pmhdl.slot = -1;
+
+ pxudp->pcb = NULL;
+ pxudp->sock = INVALID_SOCKET;
+ pxudp->df = -1;
+ pxudp->ttl = -1;
+ pxudp->tos = -1;
+ pxudp->count = 0;
+
+ pxudp->rp = pollmgr_refptr_create(&pxudp->pmhdl);
+ if (pxudp->rp == NULL) {
+ free(pxudp);
+ return NULL;
+ }
+
+ error = sys_mbox_new(&pxudp->inmbox, 16);
+ if (error != ERR_OK) {
+ pollmgr_refptr_unref(pxudp->rp);
+ free(pxudp);
+ return NULL;
+ }
+
+#define CALLBACK_MSG(MSG, FUNC) \
+ do { \
+ pxudp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \
+ pxudp->MSG.sem = NULL; \
+ pxudp->MSG.msg.cb.function = FUNC; \
+ pxudp->MSG.msg.cb.ctx = (void *)pxudp; \
+ } while (0)
+
+ CALLBACK_MSG(msg_delete, pxudp_pcb_delete_pxudp);
+ CALLBACK_MSG(msg_inbound, pxudp_pcb_write_inbound);
+
+ return pxudp;
+}
+
+
+static void
+pxudp_drain_inmbox(struct pxudp *pxudp)
+{
+ void *ptr;
+
+ if (!sys_mbox_valid(&pxudp->inmbox)) {
+ return;
+ }
+
+ while (sys_mbox_tryfetch(&pxudp->inmbox, &ptr) != SYS_MBOX_EMPTY) {
+ struct pbuf *p = (struct pbuf *)ptr;
+ pbuf_free(p);
+ }
+
+ sys_mbox_free(&pxudp->inmbox);
+ sys_mbox_set_invalid(&pxudp->inmbox);
+}
+
+
+static void
+pxudp_free(struct pxudp *pxudp)
+{
+ pxudp_drain_inmbox(pxudp);
+ free(pxudp);
+}
+
+
+/**
+ * Dissociate pxudp and its udp_pcb.
+ *
+ * Unlike its TCP cousin returns the pcb since UDP pcbs need to be
+ * actively deleted, so save callers the trouble of saving a copy
+ * before calling us.
+ */
+static struct udp_pcb *
+pxudp_pcb_dissociate(struct pxudp *pxudp)
+{
+ struct udp_pcb *pcb;
+
+ if (pxudp == NULL || pxudp->pcb == NULL) {
+ return NULL;
+ }
+
+ pcb = pxudp->pcb;
+
+ udp_recv(pxudp->pcb, NULL, NULL);
+ pxudp->pcb = NULL;
+
+ return pcb;
+}
+
+
+/**
+ * Lwip thread callback invoked via pxudp::msg_delete
+ *
+ * Since we use static messages to communicate to the lwip thread, we
+ * cannot delete pxudp without making sure there are no unprocessed
+ * messages in the lwip thread mailbox.
+ *
+ * The easiest way to ensure that is to send this "delete" message as
+ * the last one and when it's processed we know there are no more and
+ * it's safe to delete pxudp.
+ *
+ * Channel callback should use pxudp_schedule_delete() convenience
+ * function defined below.
+ */
+static void
+pxudp_pcb_delete_pxudp(void *arg)
+{
+ struct pxudp *pxudp = (struct pxudp *)arg;
+ struct udp_pcb *pcb;
+
+ LWIP_ASSERT1(pxudp != NULL);
+
+ if (pxudp->sock != INVALID_SOCKET) {
+ closesocket(pxudp->sock);
+ pxudp->sock = INVALID_SOCKET;
+ }
+
+ pcb = pxudp_pcb_dissociate(pxudp);
+ if (pcb != NULL) {
+ udp_remove(pcb);
+ }
+
+ pollmgr_refptr_unref(pxudp->rp);
+ pxudp_free(pxudp);
+}
+
+
+/**
+ * Poll manager callback should use this convenience wrapper to
+ * schedule pxudp deletion on the lwip thread and to deregister from
+ * the poll manager.
+ */
+static int
+pxudp_schedule_delete(struct pxudp *pxudp)
+{
+ /*
+ * If pollmgr_refptr_get() is called by any channel before
+ * scheduled deletion happens, let them know we are gone.
+ */
+ pxudp->pmhdl.slot = -1;
+
+ /*
+ * Schedule deletion. Since poll manager thread may be pre-empted
+ * right after we send the message, the deletion may actually
+ * happen on the lwip thread before we return from this function,
+ * so it's not safe to refer to pxudp after this call.
+ */
+ proxy_lwip_post(&pxudp->msg_delete);
+
+ /* tell poll manager to deregister us */
+ return -1;
+}
+
+
+/**
+ * Outbound TTL/HOPL check.
+ */
+static int
+pxudp_ttl_expired(struct pbuf *p)
+{
+ int ttl;
+
+ if (ip_current_is_v6()) {
+ ttl = IP6H_HOPLIM(ip6_current_header());
+ }
+ else {
+ ttl = IPH_TTL(ip_current_header());
+ }
+
+ if (RT_UNLIKELY(ttl <= 1)) {
+ int status = pbuf_header(p, ip_current_header_tot_len() + UDP_HLEN);
+ if (RT_LIKELY(status == 0)) {
+ if (ip_current_is_v6()) {
+ icmp6_time_exceeded(p, ICMP6_TE_HL);
+ }
+ else {
+ icmp_time_exceeded(p, ICMP_TE_TTL);
+ }
+ }
+ pbuf_free(p);
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/**
+ * New proxied UDP conversation created.
+ * Global callback for udp_proxy_accept().
+ */
+static void
+pxudp_pcb_accept(void *arg, struct udp_pcb *newpcb, struct pbuf *p,
+ ip_addr_t *addr, u16_t port)
+{
+ struct pxudp *pxudp;
+ ipX_addr_t dst_addr;
+ int mapping;
+ int sdom;
+ SOCKET sock;
+
+ LWIP_ASSERT1(newpcb != NULL);
+ LWIP_ASSERT1(p != NULL);
+ LWIP_UNUSED_ARG(arg);
+
+ mapping = pxremap_outbound_ipX(PCB_ISIPV6(newpcb), &dst_addr, &newpcb->local_ip);
+ if (mapping != PXREMAP_MAPPED && pxudp_ttl_expired(p)) {
+ udp_remove(newpcb);
+ return;
+ }
+
+ pxudp = pxudp_allocate();
+ if (pxudp == NULL) {
+ DPRINTF(("pxudp_allocate: failed\n"));
+ udp_remove(newpcb);
+ pbuf_free(p);
+ return;
+ }
+
+ sdom = PCB_ISIPV6(newpcb) ? PF_INET6 : PF_INET;
+ pxudp->is_mapped = (mapping == PXREMAP_MAPPED);
+
+#if 0 /* XXX: DNS IPv6->IPv4 remapping hack */
+ if (pxudp->is_mapped
+ && newpcb->local_port == 53
+ && PCB_ISIPV6(newpcb))
+ {
+ /*
+ * "Remap" DNS over IPv6 to IPv4 since Ubuntu dnsmasq does not
+ * listen on IPv6.
+ */
+ sdom = PF_INET;
+ ipX_addr_set_loopback(0, &dst_addr);
+ }
+#endif /* DNS IPv6->IPv4 remapping hack */
+
+ sock = proxy_connected_socket(sdom, SOCK_DGRAM,
+ &dst_addr, newpcb->local_port);
+ if (sock == INVALID_SOCKET) {
+ udp_remove(newpcb);
+ pbuf_free(p);
+ return;
+ }
+
+ pxudp->sock = sock;
+ pxudp->pcb = newpcb;
+ udp_recv(newpcb, pxudp_pcb_recv, pxudp);
+
+ pxudp->pmhdl.callback = pxudp_pmgr_pump;
+ pxudp_chan_send(POLLMGR_CHAN_PXUDP_ADD, pxudp);
+
+ /* dispatch directly instead of calling pxudp_pcb_recv() */
+ pxudp_pcb_forward_outbound(pxudp, p, addr, port);
+}
+
+
+/**
+ * udp_recv() callback.
+ */
+static void
+pxudp_pcb_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p,
+ ip_addr_t *addr, u16_t port)
+{
+ struct pxudp *pxudp = (struct pxudp *)arg;
+
+ LWIP_ASSERT1(pxudp != NULL);
+ LWIP_ASSERT1(pcb == pxudp->pcb);
+ LWIP_UNUSED_ARG(pcb);
+
+ if (p != NULL) {
+ pxudp_pcb_forward_outbound(pxudp, p, addr, port);
+ }
+ else {
+ pxudp_pcb_expired(pxudp);
+ }
+}
+
+
+static void
+pxudp_pcb_forward_outbound(struct pxudp *pxudp, struct pbuf *p,
+ ip_addr_t *addr, u16_t port)
+{
+ int status;
+
+ LWIP_UNUSED_ARG(addr);
+ LWIP_UNUSED_ARG(port);
+
+ if (!pxudp->is_mapped && pxudp_ttl_expired(p)) {
+ return;
+ }
+
+ if (!ip_current_is_v6()) { /* IPv4 */
+ const struct ip_hdr *iph = ip_current_header();
+ int ttl, tos, df;
+
+ /*
+ * Different OSes have different socket options for DF.
+ * Unlike pxping.c, we can't use IP_HDRINCL here as it's only
+ * valid for SOCK_RAW.
+ */
+# define USE_DF_OPTION(_Optname) \
+ const int dfopt = _Optname; \
+ const char * const dfoptname = #_Optname; \
+ RT_NOREF_PV(dfoptname)
+#if defined(IP_MTU_DISCOVER) /* Linux */
+ USE_DF_OPTION(IP_MTU_DISCOVER);
+#elif defined(IP_DONTFRAG) /* Solaris 11+, FreeBSD */
+ USE_DF_OPTION(IP_DONTFRAG);
+#elif defined(IP_DONTFRAGMENT) /* Windows */
+ USE_DF_OPTION(IP_DONTFRAGMENT);
+#else
+ USE_DF_OPTION(0);
+#endif
+
+ ttl = IPH_TTL(iph);
+ if (!pxudp->is_mapped) {
+ LWIP_ASSERT1(ttl > 1);
+ --ttl;
+ }
+
+ if (ttl != pxudp->ttl) {
+ status = setsockopt(pxudp->sock, IPPROTO_IP, IP_TTL,
+ (char *)&ttl, sizeof(ttl));
+ if (RT_LIKELY(status == 0)) {
+ pxudp->ttl = ttl;
+ }
+ else {
+ DPRINTF(("IP_TTL: %R[sockerr]\n", SOCKERRNO()));
+ }
+ }
+
+ tos = IPH_TOS(iph);
+ if (tos != pxudp->tos) {
+ status = setsockopt(pxudp->sock, IPPROTO_IP, IP_TOS,
+ (char *)&tos, sizeof(tos));
+ if (RT_LIKELY(status == 0)) {
+ pxudp->tos = tos;
+ }
+ else {
+ DPRINTF(("IP_TOS: %R[sockerr]\n", SOCKERRNO()));
+ }
+ }
+
+ if (dfopt) {
+ df = (IPH_OFFSET(iph) & PP_HTONS(IP_DF)) != 0;
+#if defined(IP_MTU_DISCOVER)
+ df = df ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
+#endif
+ if (df != pxudp->df) {
+ status = setsockopt(pxudp->sock, IPPROTO_IP, dfopt,
+ (char *)&df, sizeof(df));
+ if (RT_LIKELY(status == 0)) {
+ pxudp->df = df;
+ }
+ else {
+ DPRINTF(("%s: %R[sockerr]\n", dfoptname, SOCKERRNO()));
+ }
+ }
+ }
+ }
+ else { /* IPv6 */
+ const struct ip6_hdr *iph = ip6_current_header();
+ int ttl;
+
+ ttl = IP6H_HOPLIM(iph);
+ if (!pxudp->is_mapped) {
+ LWIP_ASSERT1(ttl > 1);
+ --ttl;
+ }
+
+ if (ttl != pxudp->ttl) {
+ status = setsockopt(pxudp->sock, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
+ (char *)&ttl, sizeof(ttl));
+ if (RT_LIKELY(status == 0)) {
+ pxudp->ttl = ttl;
+ }
+ else {
+ DPRINTF(("IPV6_UNICAST_HOPS: %R[sockerr]\n", SOCKERRNO()));
+ }
+ }
+ }
+
+ if (pxudp->pcb->local_port == 53) {
+ ++pxudp->count;
+ }
+
+ proxy_sendto(pxudp->sock, p, NULL, 0);
+ pbuf_free(p);
+}
+
+
+/**
+ * Proxy udp_pcbs are expired by timer, which is signaled by passing
+ * NULL pbuf to the udp_recv() callback. At that point the pcb is
+ * removed from the list of proxy udp pcbs so no new datagrams will be
+ * delivered.
+ */
+static void
+pxudp_pcb_expired(struct pxudp *pxudp)
+{
+ struct udp_pcb *pcb;
+
+ DPRINTF2(("%s: pxudp %p, pcb %p, sock %d: expired\n",
+ __func__, (void *)pxudp, (void *)pxudp->pcb, pxudp->sock));
+
+ pcb = pxudp_pcb_dissociate(pxudp);
+ if (pcb != NULL) {
+ udp_remove(pcb);
+ }
+
+ pxudp_chan_send_weak(POLLMGR_CHAN_PXUDP_DEL, pxudp);
+}
+
+
+/**
+ */
+static int
+pxudp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
+{
+ struct pxudp *pxudp;
+ struct pbuf *p;
+ ssize_t nread;
+ err_t error;
+
+ pxudp = (struct pxudp *)handler->data;
+ LWIP_ASSERT1(handler == &pxudp->pmhdl);
+ LWIP_ASSERT1(fd == pxudp->sock);
+ LWIP_UNUSED_ARG(fd);
+
+
+ if (revents & ~(POLLIN|POLLERR)) {
+ DPRINTF(("%s: unexpected revents 0x%x\n", __func__, revents));
+ return pxudp_schedule_delete(pxudp);
+ }
+
+ /*
+ * XXX: AFAICS, there's no way to match the error with the
+ * outgoing datagram that triggered it, since we do non-blocking
+ * sends from lwip thread.
+ */
+ if (revents & POLLERR) {
+ int sockerr = -1;
+ socklen_t optlen = (socklen_t)sizeof(sockerr);
+ int status;
+
+ status = getsockopt(pxudp->sock, SOL_SOCKET,
+ SO_ERROR, (char *)&sockerr, &optlen);
+ if (status < 0) {
+ DPRINTF(("%s: sock %d: SO_ERROR failed:%R[sockerr]\n",
+ __func__, pxudp->sock, SOCKERRNO()));
+ }
+ else {
+ DPRINTF(("%s: sock %d: %R[sockerr]\n",
+ __func__, pxudp->sock, sockerr));
+ }
+ }
+
+ if ((revents & POLLIN) == 0) {
+ return POLLIN;
+ }
+
+#ifdef RT_OS_WINDOWS
+ nread = recv(pxudp->sock, (char *)pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0);
+#else
+ nread = recv(pxudp->sock, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0);
+#endif
+ if (nread == SOCKET_ERROR) {
+ DPRINTF(("%s: %R[sockerr]\n", __func__, SOCKERRNO()));
+ return POLLIN;
+ }
+
+ p = pbuf_alloc(PBUF_RAW, (u16_t)nread, PBUF_RAM);
+ if (p == NULL) {
+ DPRINTF(("%s: pbuf_alloc(%d) failed\n", __func__, (int)nread));
+ return POLLIN;
+ }
+
+ error = pbuf_take(p, pollmgr_udpbuf, (u16_t)nread);
+ if (error != ERR_OK) {
+ DPRINTF(("%s: pbuf_take(%d) failed\n", __func__, (int)nread));
+ pbuf_free(p);
+ return POLLIN;
+ }
+
+ error = sys_mbox_trypost(&pxudp->inmbox, p);
+ if (error != ERR_OK) {
+ pbuf_free(p);
+ return POLLIN;
+ }
+
+ proxy_lwip_post(&pxudp->msg_inbound);
+
+ return POLLIN;
+}
+
+
+/**
+ * Callback from poll manager to trigger sending to guest.
+ */
+static void
+pxudp_pcb_write_inbound(void *ctx)
+{
+ struct pxudp *pxudp = (struct pxudp *)ctx;
+ LWIP_ASSERT1(pxudp != NULL);
+
+ if (pxudp->pcb == NULL) {
+ return;
+ }
+
+ pxudp_pcb_forward_inbound(pxudp);
+}
+
+
+static void
+pxudp_pcb_forward_inbound(struct pxudp *pxudp)
+{
+ struct pbuf *p;
+ u32_t timo;
+ err_t error;
+
+ if (!sys_mbox_valid(&pxudp->inmbox)) {
+ return;
+ }
+
+ timo = sys_mbox_tryfetch(&pxudp->inmbox, (void **)&p);
+ if (timo == SYS_MBOX_EMPTY) {
+ return;
+ }
+
+ error = udp_send(pxudp->pcb, p);
+ if (error != ERR_OK) {
+ DPRINTF(("%s: udp_send(pcb %p) err %d\n",
+ __func__, (void *)pxudp, error));
+ }
+
+ pbuf_free(p);
+
+ /*
+ * If we enabled counting in pxudp_pcb_forward_outbound() check
+ * that we have (all) the reply(s).
+ */
+ if (pxudp->count > 0) {
+ --pxudp->count;
+ if (pxudp->count == 0) {
+ pxudp_pcb_expired(pxudp);
+ }
+ }
+}
diff --git a/src/VBox/NetworkServices/NAT/rtmon_bsd.c b/src/VBox/NetworkServices/NAT/rtmon_bsd.c
new file mode 100644
index 00000000..cc3166a1
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/rtmon_bsd.c
@@ -0,0 +1,116 @@
+/* $Id: rtmon_bsd.c $ */
+/** @file
+ * NAT Network - IPv6 default route monitor for BSD routing sockets.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "proxy.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <net/if_dl.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/ip6.h>
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+
+/**
+ * Query IPv6 routing table - BSD routing sockets version.
+ *
+ * We don't actually monitor the routing socket for updates, and
+ * instead query the kernel each time.
+ *
+ * We take a shortcut and don't read the reply to our RTM_GET - if
+ * there's no default IPv6 route, write(2) will fail with ESRCH
+ * synchronously. In theory it may fail asynchronously and we should
+ * wait for the RTM_GET reply and check rt_msghdr::rtm_errno.
+ *
+ * KAME code in *BSD maintains internally a list of default routers
+ * that it learned from RAs, and installs only one of them into the
+ * routing table (actually, I'm not sure if BSD routing table can
+ * handle multiple routes to the same destination). One side-effect
+ * of this is that when manually configured route (e.g. teredo) is
+ * deleted, the system will lose its default route even when KAME IPv6
+ * has default router(s) in its internal list. Next RA will force the
+ * update, though.
+ *
+ * Solaris does expose multiple routes in the routing table and
+ * replies to RTM_GET with "default default".
+ */
+int
+rtmon_get_defaults(void)
+{
+ int rtsock;
+ struct req {
+ struct rt_msghdr rtm;
+ struct sockaddr_in6 dst;
+ struct sockaddr_in6 mask;
+ struct sockaddr_dl ifp;
+ } req;
+ ssize_t nsent;
+
+ rtsock = socket(PF_ROUTE, SOCK_RAW, AF_INET6);
+ if (rtsock < 0) {
+ DPRINTF0(("rtmon: failed to create routing socket\n"));
+ return -1;
+ }
+
+ memset(&req, 0, sizeof(req));
+
+ req.rtm.rtm_type = RTM_GET;
+ req.rtm.rtm_version = RTM_VERSION;
+ req.rtm.rtm_msglen = sizeof(req);
+ req.rtm.rtm_seq = 0x12345;
+
+ req.rtm.rtm_flags = RTF_UP;
+ req.rtm.rtm_addrs = RTA_DST | RTA_NETMASK | RTA_IFP;
+
+ req.dst.sin6_family = AF_INET6;
+#if HAVE_SA_LEN
+ req.dst.sin6_len = sizeof(req.dst);
+#endif
+
+ req.mask.sin6_family = AF_INET6;
+#if HAVE_SA_LEN
+ req.mask.sin6_len = sizeof(req.mask);
+#endif
+
+ req.ifp.sdl_family = AF_LINK;
+#if HAVE_SA_LEN
+ req.ifp.sdl_len = sizeof(req.ifp);
+#endif
+
+ nsent = write(rtsock, &req, req.rtm.rtm_msglen);
+ if (nsent < 0) {
+ if (errno == ESRCH) {
+ /* there's no default route */
+ return 0;
+ }
+ else {
+ DPRINTF0(("rtmon: failed to send RTM_GET\n"));
+ return -1;
+ }
+ }
+
+ return 1;
+}
diff --git a/src/VBox/NetworkServices/NAT/rtmon_linux.c b/src/VBox/NetworkServices/NAT/rtmon_linux.c
new file mode 100644
index 00000000..a364b433
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/rtmon_linux.c
@@ -0,0 +1,249 @@
+/* $Id: rtmon_linux.c $ */
+/** @file
+ * NAT Network - IPv6 default route monitor for Linux netlink.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+#define LOG_GROUP LOG_GROUP_NAT_SERVICE
+
+#include "proxy.h"
+
+#include <sys/types.h> /* must come before linux/netlink */
+#include <sys/socket.h>
+
+#include <asm/types.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+
+static int rtmon_check_defaults(const void *buf, size_t len);
+
+
+/**
+ * Read IPv6 routing table - Linux rtnetlink version.
+ *
+ * XXX: TODO: To avoid re-reading the table we should subscribe to
+ * updates by binding a monitoring NETLINK_ROUTE socket to
+ * sockaddr_nl::nl_groups = RTMGRP_IPV6_ROUTE.
+ *
+ * But that will provide updates only. Documentation is scarce, but
+ * from what I've seen it seems that to get accurate routing info the
+ * monitoring socket needs to be created first, then full routing
+ * table requested (easier to do via spearate socket), then monitoring
+ * socket polled for input. The first update(s) of the monitoring
+ * socket may happen before full table is returned, so we can't just
+ * count the defaults, we need to keep track of their { oif, gw } to
+ * correctly ignore updates that are reported via monitoring socket,
+ * but that are already reflected in the full routing table returned
+ * in response to our request.
+ */
+int
+rtmon_get_defaults(void)
+{
+ int rtsock;
+ ssize_t nsent, ssize;
+ int ndefrts;
+
+ char *buf = NULL;
+ size_t bufsize;
+
+ struct {
+ struct nlmsghdr nh;
+ struct rtmsg rtm;
+ char attrbuf[512];
+ } rtreq;
+
+ memset(&rtreq, 0, sizeof(rtreq));
+ rtreq.nh.nlmsg_type = RTM_GETROUTE;
+ rtreq.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ rtreq.rtm.rtm_family = AF_INET6;
+ rtreq.rtm.rtm_table = RT_TABLE_MAIN;
+ rtreq.rtm.rtm_protocol = RTPROT_UNSPEC;
+
+ rtreq.nh.nlmsg_len = NLMSG_SPACE(sizeof(rtreq.rtm));
+
+ bufsize = 1024;
+ ssize = bufsize;
+ for (;;) {
+ char *newbuf;
+ int recverr;
+
+ newbuf = (char *)realloc(buf, ssize);
+ if (newbuf == NULL) {
+ DPRINTF0(("rtmon: failed to %sallocate buffer\n",
+ buf == NULL ? "" : "re"));
+ free(buf);
+ return -1;
+ }
+
+ buf = newbuf;
+ bufsize = ssize;
+
+ /* it's easier to reopen than to flush */
+ rtsock = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (rtsock < 0) {
+ DPRINTF0(("rtmon: failed to create netlink socket: %s", strerror(errno)));
+ free(buf);
+ return -1;
+ }
+
+ nsent = send(rtsock, &rtreq, rtreq.nh.nlmsg_len, 0);
+ if (nsent < 0) {
+ DPRINTF0(("rtmon: RTM_GETROUTE failed: %s", strerror(errno)));
+ close (rtsock);
+ free(buf);
+ return -1;
+ }
+
+ ssize = recv(rtsock, buf, bufsize, MSG_TRUNC);
+ recverr = errno;
+ close (rtsock);
+
+ if (ssize < 0) {
+ DPRINTF(("rtmon: failed to read RTM_GETROUTE response: %s",
+ strerror(recverr)));
+ free(buf);
+ return -1;
+ }
+
+ if ((size_t)ssize <= bufsize) {
+ DPRINTF2(("rtmon: RTM_GETROUTE: %lu bytes\n",
+ (unsigned long)ssize));
+ break;
+ }
+
+ DPRINTF2(("rtmon: RTM_GETROUTE: truncated %lu to %lu bytes, retrying\n",
+ (unsigned long)ssize, (unsigned long)bufsize));
+ /* try again with larger buffer */
+ }
+
+ ndefrts = rtmon_check_defaults(buf, (size_t)ssize);
+ free(buf);
+
+ if (ndefrts == 0) {
+ DPRINTF(("rtmon: no IPv6 default routes found\n"));
+ }
+ else {
+ DPRINTF(("rtmon: %d IPv6 default route%s found\n",
+ ndefrts,
+ ndefrts == 1 || ndefrts == -1 ? "" : "s"));
+ }
+
+ return ndefrts;
+}
+
+
+/**
+ * Scan netlink message in the buffer for IPv6 default route changes.
+ */
+static int
+rtmon_check_defaults(const void *buf, size_t len)
+{
+ struct nlmsghdr *nh;
+ int dfltdiff = 0;
+
+ for (nh = (struct nlmsghdr *)buf;
+ NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len))
+ {
+ struct rtmsg *rtm;
+ struct rtattr *rta;
+ int attrlen;
+ int delta = 0;
+ const void *gwbuf;
+ size_t gwlen;
+ int oif;
+
+ DPRINTF2(("nlmsg seq %d type %d flags 0x%x\n",
+ nh->nlmsg_seq, nh->nlmsg_type, nh->nlmsg_flags));
+
+ if (nh->nlmsg_type == NLMSG_DONE) {
+ break;
+ }
+
+ if (nh->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *ne = (struct nlmsgerr *)NLMSG_DATA(nh);
+ DPRINTF2(("> error %d\n", ne->error));
+ LWIP_UNUSED_ARG(ne);
+ break;
+ }
+
+ if (nh->nlmsg_type < RTM_BASE || RTM_MAX <= nh->nlmsg_type) {
+ /* shouldn't happen */
+ DPRINTF2(("> not an RTM message!\n"));
+ continue;
+ }
+
+
+ rtm = (struct rtmsg *)NLMSG_DATA(nh);
+ attrlen = RTM_PAYLOAD(nh);
+
+ if (nh->nlmsg_type == RTM_NEWROUTE) {
+ delta = +1;
+ }
+ else if (nh->nlmsg_type == RTM_DELROUTE) {
+ delta = -1;
+ }
+ else {
+ /* shouldn't happen */
+ continue;
+ }
+
+ /*
+ * Is this an IPv6 default route in the main table? (Local
+ * table always has ::/0 reject route, hence the last check).
+ */
+ if (rtm->rtm_family == AF_INET6 /* should always be true */
+ && rtm->rtm_dst_len == 0
+ && rtm->rtm_table == RT_TABLE_MAIN)
+ {
+ dfltdiff += delta;
+ }
+ else {
+ /* some other route change */
+ continue;
+ }
+
+
+ gwbuf = NULL;
+ gwlen = 0;
+ oif = -1;
+
+ for (rta = RTM_RTA(rtm);
+ RTA_OK(rta, attrlen);
+ rta = RTA_NEXT(rta, attrlen))
+ {
+ if (rta->rta_type == RTA_GATEWAY) {
+ gwbuf = RTA_DATA(rta);
+ gwlen = RTA_PAYLOAD(rta);
+ }
+ else if (rta->rta_type == RTA_OIF) {
+ /* assert RTA_PAYLOAD(rta) == 4 */
+ memcpy(&oif, RTA_DATA(rta), sizeof(oif));
+ }
+ }
+
+ /* XXX: TODO: note that { oif, gw } was added/removed */
+ LWIP_UNUSED_ARG(gwbuf);
+ LWIP_UNUSED_ARG(gwlen);
+ LWIP_UNUSED_ARG(oif);
+ }
+
+ return dfltdiff;
+}
diff --git a/src/VBox/NetworkServices/NAT/rtmon_win.c b/src/VBox/NetworkServices/NAT/rtmon_win.c
new file mode 100644
index 00000000..f70f6717
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/rtmon_win.c
@@ -0,0 +1,21 @@
+/* $Id: rtmon_win.c $ */
+/** @file
+ * NAT Network - IPv6 default route monitor for Windows.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+int
+rtmon_get_defaults(void) {
+ return 0;
+}
diff --git a/src/VBox/NetworkServices/NAT/tftp.h b/src/VBox/NetworkServices/NAT/tftp.h
new file mode 100644
index 00000000..57c8608a
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/tftp.h
@@ -0,0 +1,49 @@
+/* $Id: tftp.h $ */
+/** @file
+ * NAT Network - Definitions for TFTP protocol.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NAT_tftp_h
+#define VBOX_INCLUDED_SRC_NAT_tftp_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#define TFTP_SERVER_PORT 69
+
+/* opcodes */
+#define TFTP_RRQ 1
+#define TFTP_WRQ 2
+#define TFTP_DATA 3
+#define TFTP_ACK 4
+#define TFTP_ERROR 5
+/* RFC 2347 */
+#define TFTP_OACK 6
+
+
+/* error codes */
+#define TFTP_EUNDEF 0 /* Not defined, see error message (if any). */
+#define TFTP_ENOENT 1 /* File not found. */
+#define TFTP_EACCESS 2 /* Access violation. */
+#define TFTP_EFBIG 3 /* Disk full or allocation exceeded. */
+#define TFTP_ENOSYS 4 /* Illegal TFTP operation. */
+#define TFTP_ESRCH 5 /* Unknown transfer ID. */
+#define TFTP_EEXIST 6 /* File already exists. */
+#define TFTP_EUSER 7 /* No such user. */
+/* RFC 2347 */
+#define TFTP_EONAK 8 /* Option refused. */
+
+
+#endif /* !VBOX_INCLUDED_SRC_NAT_tftp_h */
diff --git a/src/VBox/NetworkServices/NAT/winpoll.h b/src/VBox/NetworkServices/NAT/winpoll.h
new file mode 100644
index 00000000..5016f432
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/winpoll.h
@@ -0,0 +1,53 @@
+/* $Id: winpoll.h $ */
+/** @file
+ * NAT Network - poll(2) for winsock, definitions and declarations.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NAT_winpoll_h
+#define VBOX_INCLUDED_SRC_NAT_winpoll_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+# include <iprt/cdefs.h>
+/**
+ * WinSock2 has definition for POLL* and pollfd, but it defined for _WIN32_WINNT > 0x0600
+ * and used in WSAPoll, which has very unclear history.
+ */
+# if(_WIN32_WINNT < 0x0600)
+# define POLLRDNORM 0x0100
+# define POLLRDBAND 0x0200
+# define POLLIN (POLLRDNORM | POLLRDBAND)
+# define POLLPRI 0x0400
+
+# define POLLWRNORM 0x0010
+# define POLLOUT (POLLWRNORM)
+# define POLLWRBAND 0x0020
+
+# define POLLERR 0x0001
+# define POLLHUP 0x0002
+# define POLLNVAL 0x0004
+
+struct pollfd {
+
+ SOCKET fd;
+ SHORT events;
+ SHORT revents;
+
+};
+#endif
+RT_C_DECLS_BEGIN
+int RTWinPoll(struct pollfd *pFds, unsigned int nfds, int timeout, int *pNready);
+RT_C_DECLS_END
+#endif /* !VBOX_INCLUDED_SRC_NAT_winpoll_h */
diff --git a/src/VBox/NetworkServices/NAT/winutils.h b/src/VBox/NetworkServices/NAT/winutils.h
new file mode 100644
index 00000000..cc39d463
--- /dev/null
+++ b/src/VBox/NetworkServices/NAT/winutils.h
@@ -0,0 +1,210 @@
+/* $Id: winutils.h $ */
+/** @file
+ * NAT Network - winsock compatibility shim.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NAT_winutils_h
+#define VBOX_INCLUDED_SRC_NAT_winutils_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+# include <iprt/cdefs.h>
+
+# ifdef RT_OS_WINDOWS
+# include <iprt/win/winsock2.h>
+# include <iprt/win/ws2tcpip.h>
+# include <mswsock.h>
+# include <iprt/win/windows.h>
+# include <iprt/err.h>
+# include <iprt/net.h>
+# include <iprt/log.h>
+/**
+ * Inclusion of lwip/def.h was added here to avoid conflict of definitions
+ * of hton-family functions in LWIP and windock's headers.
+ */
+# include <lwip/def.h>
+
+# ifndef PF_LOCAL
+# define PF_LOCAL AF_INET
+# endif
+
+# ifdef DEBUG
+# define err(code,...) do { \
+ AssertMsgFailed((__VA_ARGS__)); \
+ }while(0)
+#else
+# define err(code,...) do { \
+ DPRINTF0((__VA_ARGS__)); \
+ ExitProcess(code); \
+ }while(0)
+#endif
+# define errx err
+# define __func__ __FUNCTION__
+# define __attribute__(x) /* IGNORE */
+
+# define SOCKERRNO() (WSAGetLastError())
+# define SET_SOCKERRNO(error) do { WSASetLastError(error); } while (0)
+
+/**
+ * "Windows Sockets Error Codes" obtained with WSAGetLastError().
+ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740668(v=vs.85).aspx
+ *
+ * This block of error codes from <winsock2.h> conflicts with "POSIX
+ * supplement" error codes from <errno.h>, but we don't expect to ever
+ * encounter the latter in the proxy code, so redefine them to their
+ * unixy names.
+ */
+# undef EWOULDBLOCK
+# define EWOULDBLOCK WSAEWOULDBLOCK
+# undef EINPROGRESS
+# define EINPROGRESS WSAEINPROGRESS
+# undef EALREADY
+# define EALREADY WSAEALREADY
+# undef ENOTSOCK
+# define ENOTSOCK WSAENOTSOCK
+# undef EDESTADDRREQ
+# define EDESTADDRREQ WSAEDESTADDRREQ
+# undef EMSGSIZE
+# define EMSGSIZE WSAEMSGSIZE
+# undef EPROTOTYPE
+# define EPROTOTYPE WSAEPROTOTYPE
+# undef ENOPROTOOPT
+# define ENOPROTOOPT WSAENOPROTOOPT
+# undef EPROTONOSUPPORT
+# define EPROTONOSUPPORT WSAEPROTONOSUPPORT
+# undef ESOCKTNOSUPPORT
+# define ESOCKTNOSUPPORT WSAESOCKTNOSUPPORT
+# undef EOPNOTSUPP
+# define EOPNOTSUPP WSAEOPNOTSUPP
+# undef EPFNOSUPPORT
+# define EPFNOSUPPORT WSAEPFNOSUPPORT
+# undef EAFNOSUPPORT
+# define EAFNOSUPPORT WSAEAFNOSUPPORT
+# undef EADDRINUSE
+# define EADDRINUSE WSAEADDRINUSE
+# undef EADDRNOTAVAIL
+# define EADDRNOTAVAIL WSAEADDRNOTAVAIL
+# undef ENETDOWN
+# define ENETDOWN WSAENETDOWN
+# undef ENETUNREACH
+# define ENETUNREACH WSAENETUNREACH
+# undef ENETRESET
+# define ENETRESET WSAENETRESET
+# undef ECONNABORTED
+# define ECONNABORTED WSAECONNABORTED
+# undef ECONNRESET
+# define ECONNRESET WSAECONNRESET
+# undef ENOBUFS
+# define ENOBUFS WSAENOBUFS
+# undef EISCONN
+# define EISCONN WSAEISCONN
+# undef ENOTCONN
+# define ENOTCONN WSAENOTCONN
+# undef ESHUTDOWN
+# define ESHUTDOWN WSAESHUTDOWN
+# undef ETOOMANYREFS
+# define ETOOMANYREFS WSAETOOMANYREFS
+# undef ETIMEDOUT
+# define ETIMEDOUT WSAETIMEDOUT
+# undef ECONNREFUSED
+# define ECONNREFUSED WSAECONNREFUSED
+# undef ELOOP
+# define ELOOP WSAELOOP
+# undef ENAMETOOLONG
+# define ENAMETOOLONG WSAENAMETOOLONG
+# undef EHOSTDOWN
+# define EHOSTDOWN WSAEHOSTDOWN
+# undef EHOSTUNREACH
+# define EHOSTUNREACH WSAEHOSTUNREACH
+
+/**
+ * parameters to shutdown (2) with Winsock2
+ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740481(v=vs.85).aspx
+ */
+# define SHUT_RD SD_RECEIVE
+# define SHUT_WR SD_SEND
+# define SHUT_RDWR SD_BOTH
+
+typedef ULONG nfds_t;
+
+typedef WSABUF IOVEC;
+
+# define IOVEC_GET_BASE(iov) ((iov).buf)
+# define IOVEC_SET_BASE(iov, b) ((iov).buf = (b))
+
+# define IOVEC_GET_LEN(iov) ((iov).len)
+# define IOVEC_SET_LEN(iov, l) ((iov).len = (ULONG)(l))
+
+#if _WIN32_WINNT < 0x0600
+/* otherwise defined the other way around in ws2def.h */
+#define cmsghdr _WSACMSGHDR
+
+#undef CMSG_DATA /* wincrypt.h can byte my shiny metal #undef */
+#define CMSG_DATA WSA_CMSG_DATA
+#define CMSG_LEN WSA_CMSG_LEN
+#define CMSG_SPACE WSA_CMSG_SPACE
+
+#define CMSG_FIRSTHDR WSA_CMSG_FIRSTHDR
+#define CMSG_NXTHDR WSA_CMSG_NXTHDR
+#endif /* _WIN32_WINNT < 0x0600 - provide unglified CMSG names */
+
+RT_C_DECLS_BEGIN
+int RTWinSocketPair(int domain, int type, int protocol, SOCKET socket_vector[2]);
+RT_C_DECLS_END
+
+# else /* !RT_OS_WINDOWS */
+
+# include <errno.h>
+# include <unistd.h>
+
+# define SOCKET int
+# define INVALID_SOCKET (-1)
+# define SOCKET_ERROR (-1)
+
+# define SOCKERRNO() (errno)
+# define SET_SOCKERRNO(error) do { errno = (error); } while (0)
+
+# define closesocket(s) close(s)
+# define ioctlsocket(s, req, arg) ioctl((s), (req), (arg))
+
+typedef struct iovec IOVEC;
+
+# define IOVEC_GET_BASE(iov) ((iov).iov_base)
+# define IOVEC_SET_BASE(iov, b) ((iov).iov_base = (b))
+
+# define IOVEC_GET_LEN(iov) ((iov).iov_len)
+# define IOVEC_SET_LEN(iov, l) ((iov).iov_len = (l))
+# endif
+
+DECLINLINE(int)
+proxy_error_is_transient(int error)
+{
+# if !defined(RT_OS_WINDOWS)
+ return error == EWOULDBLOCK
+# if EAGAIN != EWOULDBLOCK
+ || error == EAGAIN
+# endif
+ || error == EINTR
+ || error == ENOBUFS
+ || error == ENOMEM;
+# else
+ return error == WSAEWOULDBLOCK
+ || error == WSAEINTR /* NB: we don't redefine EINTR above */
+ || error == WSAENOBUFS;
+# endif
+}
+
+#endif /* !VBOX_INCLUDED_SRC_NAT_winutils_h */
diff --git a/src/VBox/NetworkServices/NetLib/ComHostUtils.cpp b/src/VBox/NetworkServices/NetLib/ComHostUtils.cpp
new file mode 100644
index 00000000..8ee2e19a
--- /dev/null
+++ b/src/VBox/NetworkServices/NetLib/ComHostUtils.cpp
@@ -0,0 +1,230 @@
+/* $Id: ComHostUtils.cpp $ */
+/** @file
+ * ComHostUtils.cpp
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#if defined(RT_OS_WINDOWS) && !defined(VBOX_COM_OUTOFPROC_MODULE)
+# define VBOX_COM_OUTOFPROC_MODULE
+#endif
+#include <VBox/com/com.h>
+#include <VBox/com/listeners.h>
+#include <VBox/com/string.h>
+#include <VBox/com/Guid.h>
+#include <VBox/com/array.h>
+#include <VBox/com/ErrorInfo.h>
+#include <VBox/com/errorprint.h>
+#include <VBox/com/EventQueue.h>
+#include <VBox/com/VirtualBox.h>
+
+#include <iprt/alloca.h>
+#include <iprt/buildconfig.h>
+#include <iprt/errcore.h>
+#include <iprt/net.h> /* must come before getopt */
+#include <iprt/getopt.h>
+#include <iprt/initterm.h>
+#include <iprt/message.h>
+#include <iprt/param.h>
+#include <iprt/path.h>
+#include <iprt/stream.h>
+#include <iprt/time.h>
+#include <iprt/string.h>
+
+
+#include "../NetLib/VBoxNetLib.h"
+#include "../NetLib/shared_ptr.h"
+
+#include <vector>
+#include <list>
+#include <string>
+#include <map>
+
+#include "../NetLib/VBoxNetBaseService.h"
+
+#ifdef RT_OS_WINDOWS /* WinMain */
+# include <iprt/win/windows.h>
+# include <stdlib.h>
+# ifdef INET_ADDRSTRLEN
+/* On Windows INET_ADDRSTRLEN defined as 22 Ws2ipdef.h, because it include port number */
+# undef INET_ADDRSTRLEN
+# endif
+# define INET_ADDRSTRLEN 16
+#else
+# include <netinet/in.h>
+#endif
+
+#include "utils.h"
+
+
+VBOX_LISTENER_DECLARE(NATNetworkListenerImpl)
+
+
+int localMappings(const ComNatPtr& nat, AddressToOffsetMapping& mapping)
+{
+ mapping.clear();
+
+ ComBstrArray strs;
+ size_t cStrs;
+ HRESULT hrc = nat->COMGETTER(LocalMappings)(ComSafeArrayAsOutParam(strs));
+ if ( SUCCEEDED(hrc)
+ && (cStrs = strs.size()))
+ {
+ for (size_t i = 0; i < cStrs; ++i)
+ {
+ char szAddr[17];
+ RTNETADDRIPV4 ip4addr;
+ char *pszTerm;
+ uint32_t u32Off;
+ com::Utf8Str strLo2Off(strs[i]);
+ const char *pszLo2Off = strLo2Off.c_str();
+
+ RT_ZERO(szAddr);
+
+ pszTerm = RTStrStr(pszLo2Off, "=");
+
+ if ( pszTerm
+ && (pszTerm - pszLo2Off) <= INET_ADDRSTRLEN)
+ {
+ memcpy(szAddr, pszLo2Off, (pszTerm - pszLo2Off));
+ int rc = RTNetStrToIPv4Addr(szAddr, &ip4addr);
+ if (RT_SUCCESS(rc))
+ {
+ u32Off = RTStrToUInt32(pszTerm + 1);
+ if (u32Off != 0)
+ mapping.insert(
+ AddressToOffsetMapping::value_type(ip4addr, u32Off));
+ }
+ }
+ }
+ }
+ else
+ return VERR_NOT_FOUND;
+
+ return VINF_SUCCESS;
+}
+
+
+int hostDnsSearchList(const ComHostPtr& host, std::vector<std::string>& strings)
+{
+ strings.clear();
+
+ ComBstrArray strs;
+ if (SUCCEEDED(host->COMGETTER(SearchStrings)(ComSafeArrayAsOutParam(strs))))
+ {
+ for (unsigned int i = 0; i < strs.size(); ++i)
+ {
+ strings.push_back(com::Utf8Str(strs[i]).c_str());
+ }
+ }
+ else
+ return VERR_NOT_FOUND;
+
+ return VINF_SUCCESS;
+}
+
+
+int hostDnsDomain(const ComHostPtr& host, std::string& domainStr)
+{
+ com::Bstr domain;
+ if (SUCCEEDED(host->COMGETTER(DomainName)(domain.asOutParam())))
+ {
+ domainStr = com::Utf8Str(domain).c_str();
+ return VINF_SUCCESS;
+ }
+
+ return VERR_NOT_FOUND;
+}
+
+
+int createNatListener(ComNatListenerPtr& listener, const ComVirtualBoxPtr& vboxptr,
+ NATNetworkEventAdapter *adapter, /* const */ ComEventTypeArray& events)
+{
+ ComObjPtr<NATNetworkListenerImpl> obj;
+ HRESULT hrc = obj.createObject();
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+
+ hrc = obj->init(new NATNetworkListener(), adapter);
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+
+ ComPtr<IEventSource> esVBox;
+ hrc = vboxptr->COMGETTER(EventSource)(esVBox.asOutParam());
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+
+ listener = obj;
+
+ hrc = esVBox->RegisterListener(listener, ComSafeArrayAsInParam(events), true);
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+
+ return VINF_SUCCESS;
+}
+
+int destroyNatListener(ComNatListenerPtr& listener, const ComVirtualBoxPtr& vboxptr)
+{
+ if (listener)
+ {
+ ComPtr<IEventSource> esVBox;
+ HRESULT hrc = vboxptr->COMGETTER(EventSource)(esVBox.asOutParam());
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+ if (!esVBox.isNull())
+ {
+ hrc = esVBox->UnregisterListener(listener);
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+ }
+ listener.setNull();
+ }
+ return VINF_SUCCESS;
+}
+
+int createClientListener(ComNatListenerPtr& listener, const ComVirtualBoxClientPtr& vboxclientptr,
+ NATNetworkEventAdapter *adapter, /* const */ ComEventTypeArray& events)
+{
+ ComObjPtr<NATNetworkListenerImpl> obj;
+ HRESULT hrc = obj.createObject();
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+
+ hrc = obj->init(new NATNetworkListener(), adapter);
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+
+ ComPtr<IEventSource> esVBox;
+ hrc = vboxclientptr->COMGETTER(EventSource)(esVBox.asOutParam());
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+
+ listener = obj;
+
+ hrc = esVBox->RegisterListener(listener, ComSafeArrayAsInParam(events), true);
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+
+ return VINF_SUCCESS;
+}
+
+int destroyClientListener(ComNatListenerPtr& listener, const ComVirtualBoxClientPtr& vboxclientptr)
+{
+ if (listener)
+ {
+ ComPtr<IEventSource> esVBox;
+ HRESULT hrc = vboxclientptr->COMGETTER(EventSource)(esVBox.asOutParam());
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+ if (!esVBox.isNull())
+ {
+ hrc = esVBox->UnregisterListener(listener);
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+ }
+ listener.setNull();
+ }
+ return VINF_SUCCESS;
+}
diff --git a/src/VBox/NetworkServices/NetLib/Makefile.kup b/src/VBox/NetworkServices/NetLib/Makefile.kup
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/VBox/NetworkServices/NetLib/Makefile.kup
diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetARP.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetARP.cpp
new file mode 100644
index 00000000..c3a7782f
--- /dev/null
+++ b/src/VBox/NetworkServices/NetLib/VBoxNetARP.cpp
@@ -0,0 +1,155 @@
+/* $Id: VBoxNetARP.cpp $ */
+/** @file
+ * VBoxNetARP - IntNet ARP Client Routines.
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_DEFAULT
+#include "VBoxNetLib.h"
+#include <iprt/string.h>
+#include <VBox/intnetinline.h>
+#include <VBox/log.h>
+
+
+/**
+ * Deal with ARP queries.
+ *
+ * @returns true if ARP.
+ *
+ * @param pSession The support driver session.
+ * @param hIf The internal network interface handle.
+ * @param pBuf The internal network interface buffer.
+ * @param pMacAddr Our MAC address.
+ * @param IPv4Addr Our IPv4 address.
+ */
+bool VBoxNetArpHandleIt(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf, PCRTMAC pMacAddr, RTNETADDRIPV4 IPv4Addr)
+{
+ /*
+ * Valid IntNet Ethernet frame? Skip GSO, no ARP in there.
+ */
+ PCINTNETHDR pHdr = IntNetRingGetNextFrameToRead(&pBuf->Recv);
+ if ( !pHdr
+ || pHdr->u8Type != INTNETHDR_TYPE_FRAME)
+ return false;
+
+ size_t cbFrame = pHdr->cbFrame;
+ const void *pvFrame = IntNetHdrGetFramePtr(pHdr, pBuf);
+ PCRTNETETHERHDR pEthHdr = (PCRTNETETHERHDR)pvFrame;
+
+ /*
+ * Arp frame?
+ */
+ if (pEthHdr->EtherType != RT_H2N_U16_C(RTNET_ETHERTYPE_ARP))
+ return false;
+ if ( ( pEthHdr->DstMac.au16[0] != 0xffff
+ || pEthHdr->DstMac.au16[1] != 0xffff
+ || pEthHdr->DstMac.au16[2] != 0xffff)
+ && ( pEthHdr->DstMac.au16[0] != pMacAddr->au16[0]
+ || pEthHdr->DstMac.au16[1] != pMacAddr->au16[1]
+ || pEthHdr->DstMac.au16[2] != pMacAddr->au16[2])
+ )
+ return false;
+ if (cbFrame < sizeof(RTNETARPIPV4) + sizeof(RTNETETHERHDR))
+ return false;
+
+ PCRTNETARPHDR pArpHdr = (PCRTNETARPHDR)(pEthHdr + 1);
+ if (pArpHdr->ar_htype != RT_H2N_U16_C(RTNET_ARP_ETHER))
+ return false;
+ if (pArpHdr->ar_hlen != sizeof(RTMAC))
+ return false;
+ if (pArpHdr->ar_ptype != RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4))
+ return false;
+ if (pArpHdr->ar_plen != sizeof(RTNETADDRIPV4))
+ return false;
+
+ /* It's ARP, alright. Anything we need to do something about. */
+ PCRTNETARPIPV4 pArp = (PCRTNETARPIPV4)pArpHdr;
+ switch (pArp->Hdr.ar_oper)
+ {
+ case RT_H2N_U16_C(RTNET_ARPOP_REQUEST):
+ case RT_H2N_U16_C(RTNET_ARPOP_REVREQUEST):
+ case RT_H2N_U16_C(RTNET_ARPOP_INVREQUEST):
+ break;
+ default:
+ return true;
+ }
+
+ /*
+ * Deal with the queries.
+ */
+ RTNETARPIPV4 Reply;
+ switch (pArp->Hdr.ar_oper)
+ {
+ /* 'Who has ar_tpa? Tell ar_spa.' */
+ case RT_H2N_U16_C(RTNET_ARPOP_REQUEST):
+ if (pArp->ar_tpa.u != IPv4Addr.u)
+ return true;
+ Reply.Hdr.ar_oper = RT_H2N_U16_C(RTNET_ARPOP_REPLY);
+ break;
+
+ case RT_H2N_U16_C(RTNET_ARPOP_REVREQUEST):
+ if ( pArp->ar_tha.au16[0] != pMacAddr->au16[0]
+ || pArp->ar_tha.au16[1] != pMacAddr->au16[1]
+ || pArp->ar_tha.au16[2] != pMacAddr->au16[2])
+ return true;
+ Reply.Hdr.ar_oper = RT_H2N_U16_C(RTNET_ARPOP_REVREPLY);
+ break;
+
+ case RT_H2N_U16_C(RTNET_ARPOP_INVREQUEST):
+ /** @todo RTNET_ARPOP_INVREQUEST */
+ return true;
+ //Reply.Hdr.ar_oper = RT_H2N_U16_C(RTNET_ARPOP_INVREPLY);
+ //break;
+ }
+
+ /*
+ * Complete the reply and send it.
+ */
+ Reply.Hdr.ar_htype = RT_H2N_U16_C(RTNET_ARP_ETHER);
+ Reply.Hdr.ar_ptype = RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4);
+ Reply.Hdr.ar_hlen = sizeof(RTMAC);
+ Reply.Hdr.ar_plen = sizeof(RTNETADDRIPV4);
+ Reply.ar_sha = *pMacAddr;
+ Reply.ar_spa = IPv4Addr;
+ Reply.ar_tha = pArp->ar_sha;
+ Reply.ar_tpa = pArp->ar_spa;
+
+
+ RTNETETHERHDR EthHdr;
+ EthHdr.DstMac = pArp->ar_sha;
+ EthHdr.SrcMac = *pMacAddr;
+ EthHdr.EtherType = RT_H2N_U16_C(RTNET_ETHERTYPE_ARP);
+
+ uint8_t abTrailer[60 - sizeof(Reply) - sizeof(EthHdr)];
+ RT_ZERO(abTrailer);
+
+ INTNETSEG aSegs[3];
+ aSegs[0].cb = sizeof(EthHdr);
+ aSegs[0].pv = &EthHdr;
+
+ aSegs[1].pv = &Reply;
+ aSegs[1].cb = sizeof(Reply);
+
+ aSegs[2].pv = &abTrailer[0];
+ aSegs[2].cb = sizeof(abTrailer);
+
+ VBoxNetIntIfSend(pSession, hIf, pBuf, RT_ELEMENTS(aSegs), &aSegs[0], true /* fFlush */);
+
+ return true;
+}
+
diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.cpp
new file mode 100644
index 00000000..a4118fa8
--- /dev/null
+++ b/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.cpp
@@ -0,0 +1,849 @@
+/* $Id: VBoxNetBaseService.cpp $ */
+/** @file
+ * VBoxNetBaseService - common services for VBoxNetDHCP and VBoxNetNAT.
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_NET_SERVICE
+
+#include <VBox/com/com.h>
+#include <VBox/com/listeners.h>
+#include <VBox/com/string.h>
+#include <VBox/com/Guid.h>
+#include <VBox/com/array.h>
+#include <VBox/com/ErrorInfo.h>
+#include <VBox/com/errorprint.h>
+#include <VBox/com/VirtualBox.h>
+#include <VBox/com/NativeEventQueue.h>
+
+#include <iprt/alloca.h>
+#include <iprt/buildconfig.h>
+#include <iprt/err.h>
+#include <iprt/net.h> /* must come before getopt.h. */
+#include <iprt/getopt.h>
+#include <iprt/initterm.h>
+#include <iprt/param.h>
+#include <iprt/path.h>
+#include <iprt/process.h>
+#include <iprt/stream.h>
+#include <iprt/string.h>
+#include <iprt/time.h>
+#include <iprt/thread.h>
+#include <iprt/mem.h>
+#include <iprt/message.h>
+
+#include <VBox/sup.h>
+#include <VBox/intnet.h>
+#include <VBox/intnetinline.h>
+#include <VBox/vmm/vmm.h>
+#include <VBox/version.h>
+
+#include <vector>
+#include <string>
+
+#include <VBox/err.h>
+#include <VBox/log.h>
+
+#include "VBoxNetLib.h"
+#include "VBoxNetBaseService.h"
+
+#ifdef RT_OS_WINDOWS /* WinMain */
+# include <iprt/win/windows.h>
+# include <stdlib.h>
+#endif
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+struct VBoxNetBaseService::Data
+{
+ Data(const std::string& aServiceName, const std::string& aNetworkName):
+ m_ServiceName(aServiceName),
+ m_NetworkName(aNetworkName),
+ m_enmTrunkType(kIntNetTrunkType_WhateverNone),
+ m_pSession(NIL_RTR0PTR),
+ m_cbSendBuf(128 * _1K),
+ m_cbRecvBuf(256 * _1K),
+ m_hIf(INTNET_HANDLE_INVALID),
+ m_pIfBuf(NULL),
+ m_cVerbosity(0),
+ m_fNeedMain(false),
+ m_EventQ(NULL),
+ m_hThrRecv(NIL_RTTHREAD),
+ fShutdown(false)
+ {
+ int rc = RTCritSectInit(&m_csThis);
+ AssertRC(rc);
+ };
+
+ std::string m_ServiceName;
+ std::string m_NetworkName;
+ std::string m_TrunkName;
+ INTNETTRUNKTYPE m_enmTrunkType;
+
+ RTMAC m_MacAddress;
+ RTNETADDRIPV4 m_Ipv4Address;
+ RTNETADDRIPV4 m_Ipv4Netmask;
+
+ PSUPDRVSESSION m_pSession;
+ uint32_t m_cbSendBuf;
+ uint32_t m_cbRecvBuf;
+ INTNETIFHANDLE m_hIf; /**< The handle to the network interface. */
+ PINTNETBUF m_pIfBuf; /**< Interface buffer. */
+
+ std::vector<PRTGETOPTDEF> m_vecOptionDefs;
+
+ int32_t m_cVerbosity;
+
+ /* cs for syncing */
+ RTCRITSECT m_csThis;
+
+ /* Controls whether service will connect SVC for runtime needs */
+ bool m_fNeedMain;
+ /* Event Queue */
+ com::NativeEventQueue *m_EventQ;
+
+ /** receiving thread, used only if main is used */
+ RTTHREAD m_hThrRecv;
+
+ bool fShutdown;
+ static DECLCALLBACK(int) recvLoop(RTTHREAD, void *);
+};
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/* Commonly used options for network configuration */
+static RTGETOPTDEF g_aGetOptDef[] =
+{
+ { "--name", 'N', RTGETOPT_REQ_STRING },
+ { "--network", 'n', RTGETOPT_REQ_STRING },
+ { "--trunk-name", 't', RTGETOPT_REQ_STRING },
+ { "--trunk-type", 'T', RTGETOPT_REQ_STRING },
+ { "--mac-address", 'a', RTGETOPT_REQ_MACADDR },
+ { "--ip-address", 'i', RTGETOPT_REQ_IPV4ADDR },
+ { "--netmask", 'm', RTGETOPT_REQ_IPV4ADDR },
+ { "--verbose", 'v', RTGETOPT_REQ_NOTHING },
+ { "--need-main", 'M', RTGETOPT_REQ_BOOL },
+};
+
+
+DECLCALLBACK(int) VBoxNetBaseService::Data::recvLoop(RTTHREAD, void *pvUser)
+{
+ VBoxNetBaseService *pThis = static_cast<VBoxNetBaseService *>(pvUser);
+
+ HRESULT hrc = com::Initialize();
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+
+ pThis->doReceiveLoop();
+
+ return VINF_SUCCESS;
+}
+
+
+VBoxNetBaseService::VBoxNetBaseService(const std::string& aName, const std::string& aNetworkName):m(NULL)
+{
+ m = new VBoxNetBaseService::Data(aName, aNetworkName);
+
+ for(unsigned int i = 0; i < RT_ELEMENTS(g_aGetOptDef); ++i)
+ m->m_vecOptionDefs.push_back(&g_aGetOptDef[i]);
+}
+
+
+VBoxNetBaseService::~VBoxNetBaseService()
+{
+ /*
+ * Close the interface connection.
+ */
+ if (m)
+ {
+ shutdown();
+ if (m->m_hIf != INTNET_HANDLE_INVALID)
+ {
+ INTNETIFCLOSEREQ CloseReq;
+ CloseReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
+ CloseReq.Hdr.cbReq = sizeof(CloseReq);
+ CloseReq.pSession = m->m_pSession;
+ CloseReq.hIf = m->m_hIf;
+ m->m_hIf = INTNET_HANDLE_INVALID;
+ int rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_CLOSE, 0, &CloseReq.Hdr);
+ AssertRC(rc);
+ }
+
+ if (m->m_pSession != NIL_RTR0PTR)
+ {
+ SUPR3Term(false /*fForced*/);
+ m->m_pSession = NIL_RTR0PTR;
+ }
+
+ RTCritSectDelete(&m->m_csThis);
+
+ delete m;
+ m = NULL;
+ }
+}
+
+
+int VBoxNetBaseService::init()
+{
+ if (isMainNeeded())
+ {
+ HRESULT hrc = com::Initialize();
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+
+ hrc = virtualboxClient.createInprocObject(CLSID_VirtualBoxClient);
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+
+ hrc = virtualboxClient->COMGETTER(VirtualBox)(virtualbox.asOutParam());
+ AssertComRCReturn(hrc, VERR_INTERNAL_ERROR);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+bool VBoxNetBaseService::isMainNeeded() const
+{
+ return m->m_fNeedMain;
+}
+
+
+int VBoxNetBaseService::run()
+{
+ /**
+ * If the child class needs Main we start the receving thread which calls
+ * doReceiveLoop and enter to event polling loop. For other clients we do
+ * receiving on the current (main) thread.
+ */
+ if (isMainNeeded())
+ return startReceiveThreadAndEnterEventLoop();
+
+ doReceiveLoop();
+ return VINF_SUCCESS;
+}
+
+/**
+ * Parse the arguments.
+ *
+ * @returns 0 on success, fully bitched exit code on failure.
+ *
+ * @param argc Argument count.
+ * @param argv Argument vector.
+ *
+ * @todo r=bird: The --help and --version options shall not return a
+ * non-zero exit code. So, this method need to grow some
+ * complexity. I'm to blame for that blunder :/
+ */
+int VBoxNetBaseService::parseArgs(int argc, char **argv)
+{
+
+ RTGETOPTSTATE State;
+ PRTGETOPTDEF paOptionArray = getOptionsPtr();
+ int rc = RTGetOptInit(&State, argc, argv, paOptionArray, m->m_vecOptionDefs.size(), 0, 0 /*fFlags*/);
+ AssertRCReturn(rc, 49);
+#if 0
+ /* default initialization */
+ m_enmTrunkType = kIntNetTrunkType_WhateverNone;
+#endif
+ Log2(("BaseService: parseArgs enter\n"));
+
+ for (;;)
+ {
+ RTGETOPTUNION Val;
+ rc = RTGetOpt(&State, &Val);
+ if (!rc)
+ break;
+ switch (rc)
+ {
+ case 'N': // --name
+ m->m_ServiceName = Val.psz;
+ break;
+
+ case 'n': // --network
+ m->m_NetworkName = Val.psz;
+ break;
+
+ case 't': //--trunk-name
+ m->m_TrunkName = Val.psz;
+ break;
+
+ case 'T': //--trunk-type
+ if (!strcmp(Val.psz, "none"))
+ m->m_enmTrunkType = kIntNetTrunkType_None;
+ else if (!strcmp(Val.psz, "whatever"))
+ m->m_enmTrunkType = kIntNetTrunkType_WhateverNone;
+ else if (!strcmp(Val.psz, "netflt"))
+ m->m_enmTrunkType = kIntNetTrunkType_NetFlt;
+ else if (!strcmp(Val.psz, "netadp"))
+ m->m_enmTrunkType = kIntNetTrunkType_NetAdp;
+ else if (!strcmp(Val.psz, "srvnat"))
+ m->m_enmTrunkType = kIntNetTrunkType_SrvNat;
+ else
+ {
+ RTStrmPrintf(g_pStdErr, "Invalid trunk type '%s'\n", Val.psz);
+ return RTEXITCODE_SYNTAX;
+ }
+ break;
+
+ case 'a': // --mac-address
+ m->m_MacAddress = Val.MacAddr;
+ break;
+
+ case 'i': // --ip-address
+ m->m_Ipv4Address = Val.IPv4Addr;
+ break;
+
+ case 'm': // --netmask
+ m->m_Ipv4Netmask = Val.IPv4Addr;
+ break;
+
+ case 'v': // --verbose
+ m->m_cVerbosity++;
+ break;
+
+ case 'V': // --version (missed)
+ RTPrintf("%sr%u\n", RTBldCfgVersion(), RTBldCfgRevision());
+ return 1; /** @todo this exit code is wrong, of course. :/ */
+
+ case 'M': // --need-main
+ m->m_fNeedMain = true;
+ break;
+
+ case 'h': // --help (missed)
+ RTPrintf("%s Version %sr%u\n"
+ "(C) 2009-" VBOX_C_YEAR " " VBOX_VENDOR "\n"
+ "All rights reserved.\n"
+ "\n"
+ "Usage: %s <options>\n"
+ "\n"
+ "Options:\n",
+ RTProcShortName(),
+ RTBldCfgVersion(),
+ RTBldCfgRevision(),
+ RTProcShortName());
+ for (unsigned int i = 0; i < m->m_vecOptionDefs.size(); i++)
+ RTPrintf(" -%c, %s\n", m->m_vecOptionDefs[i]->iShort, m->m_vecOptionDefs[i]->pszLong);
+ usage(); /* to print Service Specific usage */
+ return 1; /** @todo this exit code is wrong, of course. :/ */
+
+ default:
+ {
+ int rc1 = parseOpt(rc, Val);
+ if (RT_FAILURE(rc1))
+ {
+ RTEXITCODE rcExit = RTGetOptPrintError(rc, &Val);
+ RTPrintf("Use --help for more information.\n");
+ return rcExit;
+ }
+ break;
+ }
+ }
+ }
+
+ RTMemFree(paOptionArray);
+ return RTEXITCODE_SUCCESS;
+}
+
+
+int VBoxNetBaseService::tryGoOnline(void)
+{
+ /*
+ * Open the session, load ring-0 and issue the request.
+ */
+ int rc = SUPR3Init(&m->m_pSession);
+ if (RT_FAILURE(rc))
+ {
+ m->m_pSession = NIL_RTR0PTR;
+ LogRel(("VBoxNetBaseService: SUPR3Init -> %Rrc\n", rc));
+ return rc;
+ }
+
+ char szPath[RTPATH_MAX];
+ rc = RTPathExecDir(szPath, sizeof(szPath) - sizeof("/VMMR0.r0"));
+ if (RT_FAILURE(rc))
+ {
+ LogRel(("VBoxNetBaseService: RTPathExecDir -> %Rrc\n", rc));
+ return rc;
+ }
+
+ rc = SUPR3LoadVMM(strcat(szPath, "/VMMR0.r0"));
+ if (RT_FAILURE(rc))
+ {
+ LogRel(("VBoxNetBaseService: SUPR3LoadVMM(\"%s\") -> %Rrc\n", szPath, rc));
+ return rc;
+ }
+
+ /*
+ * Create the open request.
+ */
+ PINTNETBUF pBuf;
+ INTNETOPENREQ OpenReq;
+ OpenReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
+ OpenReq.Hdr.cbReq = sizeof(OpenReq);
+ OpenReq.pSession = m->m_pSession;
+ RTStrCopy(OpenReq.szNetwork, sizeof(OpenReq.szNetwork), m->m_NetworkName.c_str());
+ OpenReq.szNetwork[sizeof(OpenReq.szNetwork) - 1] = '\0';
+ RTStrCopy(OpenReq.szTrunk, sizeof(OpenReq.szTrunk), m->m_TrunkName.c_str());
+ OpenReq.szTrunk[sizeof(OpenReq.szTrunk) - 1] = '\0';
+ OpenReq.enmTrunkType = m->m_enmTrunkType;
+ OpenReq.fFlags = 0; /** @todo check this */
+ OpenReq.cbSend = m->m_cbSendBuf;
+ OpenReq.cbRecv = m->m_cbRecvBuf;
+ OpenReq.hIf = INTNET_HANDLE_INVALID;
+
+ /*
+ * Issue the request.
+ */
+ Log2(("attempting to open/create network \"%s\"...\n", OpenReq.szNetwork));
+ rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_OPEN, 0, &OpenReq.Hdr);
+ if (RT_FAILURE(rc))
+ {
+ Log2(("VBoxNetBaseService: SUPR3CallVMMR0Ex(,VMMR0_DO_INTNET_OPEN,) failed, rc=%Rrc\n", rc));
+ return rc;
+ }
+ m->m_hIf = OpenReq.hIf;
+ Log2(("successfully opened/created \"%s\" - hIf=%#x\n", OpenReq.szNetwork, m->m_hIf));
+
+ /*
+ * Get the ring-3 address of the shared interface buffer.
+ */
+ INTNETIFGETBUFFERPTRSREQ GetBufferPtrsReq;
+ GetBufferPtrsReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
+ GetBufferPtrsReq.Hdr.cbReq = sizeof(GetBufferPtrsReq);
+ GetBufferPtrsReq.pSession = m->m_pSession;
+ GetBufferPtrsReq.hIf = m->m_hIf;
+ GetBufferPtrsReq.pRing3Buf = NULL;
+ GetBufferPtrsReq.pRing0Buf = NIL_RTR0PTR;
+ rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_GET_BUFFER_PTRS, 0, &GetBufferPtrsReq.Hdr);
+ if (RT_FAILURE(rc))
+ {
+ Log2(("VBoxNetBaseService: SUPR3CallVMMR0Ex(,VMMR0_DO_INTNET_IF_GET_BUFFER_PTRS,) failed, rc=%Rrc\n", rc));
+ return rc;
+ }
+ pBuf = GetBufferPtrsReq.pRing3Buf;
+ Log2(("pBuf=%p cbBuf=%d cbSend=%d cbRecv=%d\n",
+ pBuf, pBuf->cbBuf, pBuf->cbSend, pBuf->cbRecv));
+ m->m_pIfBuf = pBuf;
+
+ /*
+ * Activate the interface.
+ */
+ INTNETIFSETACTIVEREQ ActiveReq;
+ ActiveReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
+ ActiveReq.Hdr.cbReq = sizeof(ActiveReq);
+ ActiveReq.pSession = m->m_pSession;
+ ActiveReq.hIf = m->m_hIf;
+ ActiveReq.fActive = true;
+ rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_SET_ACTIVE, 0, &ActiveReq.Hdr);
+ if (RT_SUCCESS(rc))
+ return 0;
+
+ /* bail out */
+ Log2(("VBoxNetBaseService: SUPR3CallVMMR0Ex(,VMMR0_DO_INTNET_IF_SET_PROMISCUOUS_MODE,) failed, rc=%Rrc\n", rc));
+
+ /* ignore this error */
+ return VINF_SUCCESS;
+}
+
+
+void VBoxNetBaseService::shutdown(void)
+{
+ syncEnter();
+ if (!m->fShutdown)
+ {
+ m->fShutdown = true;
+ if (m->m_hThrRecv != NIL_RTTHREAD)
+ {
+ int rc = abortWait();
+ AssertRC(rc == VINF_SUCCESS || rc == VERR_SEM_DESTROYED);
+ rc = m->m_EventQ->interruptEventQueueProcessing();
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTThreadWait(m->m_hThrRecv, 60000, NULL);
+ if (RT_FAILURE(rc))
+ Log1WarningFunc(("RTThreadWait(%RTthrd) -> %Rrc\n", m->m_hThrRecv, rc));
+ }
+ else
+ {
+ AssertMsgFailed(("interruptEventQueueProcessing() failed\n"));
+ RTThreadWait(m->m_hThrRecv , 0, NULL);
+ }
+ }
+ }
+ syncLeave();
+}
+
+
+int VBoxNetBaseService::syncEnter()
+{
+ return RTCritSectEnter(&m->m_csThis);
+}
+
+
+int VBoxNetBaseService::syncLeave()
+{
+ return RTCritSectLeave(&m->m_csThis);
+}
+
+
+int VBoxNetBaseService::waitForIntNetEvent(int cMillis)
+{
+ INTNETIFWAITREQ WaitReq;
+ LogFlowFunc(("ENTER:cMillis: %d\n", cMillis));
+ WaitReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
+ WaitReq.Hdr.cbReq = sizeof(WaitReq);
+ WaitReq.pSession = m->m_pSession;
+ WaitReq.hIf = m->m_hIf;
+ WaitReq.cMillies = cMillis;
+
+ int rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_WAIT, 0, &WaitReq.Hdr);
+ LogFlowFuncLeaveRC(rc);
+ return rc;
+}
+
+
+int VBoxNetBaseService::abortWait()
+{
+ INTNETIFABORTWAITREQ AbortReq;
+ LogFlowFunc(("ENTER:\n"));
+ AbortReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
+ AbortReq.Hdr.cbReq = sizeof(AbortReq);
+ AbortReq.pSession = m->m_pSession;
+ AbortReq.hIf = m->m_hIf;
+ AbortReq.fNoMoreWaits = true;
+
+ int rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_ABORT_WAIT, 0, &AbortReq.Hdr);
+ LogFlowFuncLeaveRC(rc);
+ return rc;
+}
+
+
+/* S/G API */
+int VBoxNetBaseService::sendBufferOnWire(PCINTNETSEG paSegs, size_t cSegs, size_t cbFrame)
+{
+ /* Allocate frame */
+ PINTNETHDR pHdr = NULL;
+ uint8_t *pbFrame = NULL;
+ int rc = IntNetRingAllocateFrame(&m->m_pIfBuf->Send, (uint32_t)cbFrame, &pHdr, (void **)&pbFrame);
+ AssertRCReturn(rc, rc);
+
+ /* Now we fill pvFrame with S/G above */
+ size_t offFrame = 0;
+ for (size_t idxSeg = 0; idxSeg < cSegs; ++idxSeg)
+ {
+ memcpy(&pbFrame[offFrame], paSegs[idxSeg].pv, paSegs[idxSeg].cb);
+ offFrame += paSegs[idxSeg].cb;
+ }
+
+ /* Commit */
+ IntNetRingCommitFrameEx(&m->m_pIfBuf->Send, pHdr, cbFrame);
+
+ LogFlowFuncLeaveRC(rc);
+ return rc;
+}
+
+/**
+ * forcible ask for send packet on the "wire"
+ */
+void VBoxNetBaseService::flushWire()
+{
+ INTNETIFSENDREQ SendReq;
+ SendReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
+ SendReq.Hdr.cbReq = sizeof(SendReq);
+ SendReq.pSession = m->m_pSession;
+ SendReq.hIf = m->m_hIf;
+ int rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_SEND, 0, &SendReq.Hdr);
+ AssertRCReturnVoid(rc);
+ LogFlowFuncLeave();
+}
+
+
+int VBoxNetBaseService::hlpUDPBroadcast(unsigned uSrcPort, unsigned uDstPort,
+ void const *pvData, size_t cbData) const
+{
+ return VBoxNetUDPBroadcast(m->m_pSession, m->m_hIf, m->m_pIfBuf,
+ m->m_Ipv4Address, &m->m_MacAddress, uSrcPort,
+ uDstPort, pvData, cbData);
+
+}
+
+
+const std::string VBoxNetBaseService::getServiceName() const
+{
+ return m->m_ServiceName;
+}
+
+
+void VBoxNetBaseService::setServiceName(const std::string& aName)
+{
+ m->m_ServiceName = aName;
+}
+
+
+const std::string VBoxNetBaseService::getNetworkName() const
+{
+ return m->m_NetworkName;
+}
+
+
+void VBoxNetBaseService::setNetworkName(const std::string& aName)
+{
+ m->m_NetworkName = aName;
+}
+
+
+const RTMAC VBoxNetBaseService::getMacAddress() const
+{
+ return m->m_MacAddress;
+}
+
+
+void VBoxNetBaseService::setMacAddress(const RTMAC& aMac)
+{
+ m->m_MacAddress = aMac;
+}
+
+
+const RTNETADDRIPV4 VBoxNetBaseService::getIpv4Address() const
+{
+ return m->m_Ipv4Address;
+}
+
+
+void VBoxNetBaseService::setIpv4Address(const RTNETADDRIPV4& aAddress)
+{
+ m->m_Ipv4Address = aAddress;
+}
+
+
+const RTNETADDRIPV4 VBoxNetBaseService::getIpv4Netmask() const
+{
+ return m->m_Ipv4Netmask;
+}
+
+
+void VBoxNetBaseService::setIpv4Netmask(const RTNETADDRIPV4& aNetmask)
+{
+ m->m_Ipv4Netmask = aNetmask;
+}
+
+
+uint32_t VBoxNetBaseService::getSendBufSize() const
+{
+ return m->m_cbSendBuf;
+}
+
+
+void VBoxNetBaseService::setSendBufSize(uint32_t cbBuf)
+{
+ m->m_cbSendBuf = cbBuf;
+}
+
+
+uint32_t VBoxNetBaseService::getRecvBufSize() const
+{
+ return m->m_cbRecvBuf;
+}
+
+
+void VBoxNetBaseService::setRecvBufSize(uint32_t cbBuf)
+{
+ m->m_cbRecvBuf = cbBuf;
+}
+
+
+int32_t VBoxNetBaseService::getVerbosityLevel() const
+{
+ return m->m_cVerbosity;
+}
+
+
+void VBoxNetBaseService::setVerbosityLevel(int32_t aVerbosity)
+{
+ m->m_cVerbosity = aVerbosity;
+}
+
+
+void VBoxNetBaseService::addCommandLineOption(const PRTGETOPTDEF optDef)
+{
+ m->m_vecOptionDefs.push_back(optDef);
+}
+
+
+void VBoxNetBaseService::doReceiveLoop()
+{
+ int rc;
+ /* Well we're ready */
+ PINTNETRINGBUF pRingBuf = &m->m_pIfBuf->Recv;
+
+ for (;;)
+ {
+ /*
+ * Wait for a packet to become available.
+ */
+ rc = waitForIntNetEvent(2000);
+ if (rc == VERR_SEM_DESTROYED)
+ break;
+
+ if (RT_FAILURE(rc))
+ {
+ if (rc == VERR_TIMEOUT || rc == VERR_INTERRUPTED)
+ {
+ /* do we want interrupt anyone ??? */
+ continue;
+ }
+ LogRel(("VBoxNetBaseService: waitForIntNetEvent returned %Rrc\n", rc));
+ AssertRCReturnVoid(rc);
+ }
+
+ /*
+ * Process the receive buffer.
+ */
+ PCINTNETHDR pHdr;
+ while ((pHdr = IntNetRingGetNextFrameToRead(pRingBuf)) != NULL)
+ {
+ uint8_t const u8Type = pHdr->u8Type;
+ size_t cbFrame = pHdr->cbFrame;
+ switch (u8Type)
+ {
+ case INTNETHDR_TYPE_FRAME:
+ {
+ void *pvFrame = IntNetHdrGetFramePtr(pHdr, m->m_pIfBuf);
+ rc = processFrame(pvFrame, cbFrame);
+ if (RT_FAILURE(rc) && rc == VERR_IGNORED)
+ {
+ /* XXX: UDP + ARP for DHCP */
+ VBOXNETUDPHDRS Hdrs;
+ size_t cb;
+ void *pv = VBoxNetUDPMatch(m->m_pIfBuf, RTNETIPV4_PORT_BOOTPS, &m->m_MacAddress,
+ VBOXNETUDP_MATCH_UNICAST
+ | VBOXNETUDP_MATCH_BROADCAST
+ | VBOXNETUDP_MATCH_CHECKSUM
+ | (m->m_cVerbosity > 2 ? VBOXNETUDP_MATCH_PRINT_STDERR : 0),
+ &Hdrs, &cb);
+ if (pv && cb)
+ processUDP(pv, cb);
+ else
+ VBoxNetArpHandleIt(m->m_pSession, m->m_hIf, m->m_pIfBuf, &m->m_MacAddress, m->m_Ipv4Address);
+ }
+ break;
+ }
+ case INTNETHDR_TYPE_GSO:
+ {
+ PCPDMNETWORKGSO pGso = IntNetHdrGetGsoContext(pHdr, m->m_pIfBuf);
+ rc = processGSO(pGso, cbFrame);
+ if (RT_FAILURE(rc) && rc == VERR_IGNORED)
+ break;
+ break;
+ }
+
+ case INTNETHDR_TYPE_PADDING:
+ break;
+
+ default:
+ break;
+ }
+ IntNetRingSkipFrame(&m->m_pIfBuf->Recv);
+ } /* loop */
+ }
+}
+
+
+int VBoxNetBaseService::startReceiveThreadAndEnterEventLoop()
+{
+ AssertMsgReturn(isMainNeeded(), ("It's expected that we need Main"), VERR_INTERNAL_ERROR);
+
+ /* start receiving thread */
+ int rc = RTThreadCreate(&m->m_hThrRecv, /* thread handle*/
+ &VBoxNetBaseService::Data::recvLoop, /* routine */
+ this, /* user data */
+ 128 * _1K, /* stack size */
+ RTTHREADTYPE_IO, /* type */
+ RTTHREADFLAGS_WAITABLE, /* flags */
+ "RECV");
+ AssertRCReturn(rc, rc);
+
+ m->m_EventQ = com::NativeEventQueue::getMainEventQueue();
+ AssertPtrReturn(m->m_EventQ, VERR_INTERNAL_ERROR);
+
+ while (!m->fShutdown)
+ {
+ rc = m->m_EventQ->processEventQueue(RT_INDEFINITE_WAIT);
+ if (rc == VERR_INTERRUPTED)
+ {
+ LogFlow(("Event queue processing ended with rc=%Rrc\n", rc));
+ break;
+ }
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+void VBoxNetBaseService::debugPrint(int32_t iMinLevel, bool fMsg, const char *pszFmt, ...) const
+{
+ if (iMinLevel <= m->m_cVerbosity)
+ {
+ va_list va;
+ va_start(va, pszFmt);
+ debugPrintV(iMinLevel, fMsg, pszFmt, va);
+ va_end(va);
+ }
+}
+
+
+/**
+ * Print debug message depending on the m_cVerbosity level.
+ *
+ * @param iMinLevel The minimum m_cVerbosity level for this message.
+ * @param fMsg Whether to dump parts for the current service message.
+ * @param pszFmt The message format string.
+ * @param va Optional arguments.
+ */
+void VBoxNetBaseService::debugPrintV(int iMinLevel, bool fMsg, const char *pszFmt, va_list va) const
+{
+ RT_NOREF(fMsg);
+ if (iMinLevel <= m->m_cVerbosity)
+ {
+ va_list vaCopy; /* This dude is *very* special, thus the copy. */
+ va_copy(vaCopy, va);
+ RTStrmPrintf(g_pStdErr, "%s: %s: %N\n",
+ RTProcShortName(),
+ iMinLevel >= 2 ? "debug" : "info",
+ pszFmt,
+ &vaCopy);
+ va_end(vaCopy);
+ }
+}
+
+
+PRTGETOPTDEF VBoxNetBaseService::getOptionsPtr()
+{
+ PRTGETOPTDEF pOptArray = NULL;
+ pOptArray = (PRTGETOPTDEF)RTMemAlloc(sizeof(RTGETOPTDEF) * m->m_vecOptionDefs.size());
+ if (!pOptArray)
+ return NULL;
+ for (unsigned int i = 0; i < m->m_vecOptionDefs.size(); ++i)
+ {
+ PRTGETOPTDEF pOpt = m->m_vecOptionDefs[i];
+ memcpy(&pOptArray[i], pOpt, sizeof(*pOpt));
+ }
+ return pOptArray;
+}
diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.h b/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.h
new file mode 100644
index 00000000..7aa5a438
--- /dev/null
+++ b/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.h
@@ -0,0 +1,148 @@
+/* $Id: VBoxNetBaseService.h $ */
+/** @file
+ * VBoxNetUDP - IntNet Client Library.
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NetLib_VBoxNetBaseService_h
+#define VBOX_INCLUDED_SRC_NetLib_VBoxNetBaseService_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <iprt/critsect.h>
+
+
+class VBoxNetHlpUDPService
+{
+public:
+virtual int hlpUDPBroadcast(unsigned uSrcPort, unsigned uDstPort,
+ void const *pvData, size_t cbData) const = 0;
+};
+
+
+class VBoxNetLockee
+{
+public:
+ virtual int syncEnter() = 0;
+ virtual int syncLeave() = 0;
+};
+
+
+class VBoxNetALock
+{
+public:
+ VBoxNetALock(VBoxNetLockee *a_lck):m_lck(a_lck)
+ {
+ if (m_lck)
+ m_lck->syncEnter();
+ }
+
+ ~VBoxNetALock()
+ {
+ if (m_lck)
+ m_lck->syncLeave();
+ }
+
+private:
+ VBoxNetLockee *m_lck;
+};
+
+# ifndef BASE_SERVICES_ONLY
+class VBoxNetBaseService: public VBoxNetHlpUDPService, public VBoxNetLockee
+{
+public:
+ VBoxNetBaseService(const std::string& aName, const std::string& aNetworkName);
+ virtual ~VBoxNetBaseService();
+ int parseArgs(int argc, char **argv);
+ int tryGoOnline(void);
+ void shutdown(void);
+ int syncEnter();
+ int syncLeave();
+ int waitForIntNetEvent(int cMillis);
+ int abortWait();
+ int sendBufferOnWire(PCINTNETSEG paSegs, size_t cSegs, size_t cbBuffer);
+ void flushWire();
+
+ virtual int hlpUDPBroadcast(unsigned uSrcPort, unsigned uDstPort,
+ void const *pvData, size_t cbData) const;
+ virtual void usage(void) = 0;
+ virtual int parseOpt(int rc, const RTGETOPTUNION& getOptVal) = 0;
+ virtual int processFrame(void *, size_t) = 0;
+ virtual int processGSO(PCPDMNETWORKGSO, size_t) = 0;
+ virtual int processUDP(void *, size_t) = 0;
+
+
+ virtual int init(void);
+ virtual int run(void);
+ virtual bool isMainNeeded() const;
+
+protected:
+ const std::string getServiceName() const;
+ void setServiceName(const std::string&);
+
+ const std::string getNetworkName() const;
+ void setNetworkName(const std::string&);
+
+ const RTMAC getMacAddress() const;
+ void setMacAddress(const RTMAC&);
+
+ const RTNETADDRIPV4 getIpv4Address() const;
+ void setIpv4Address(const RTNETADDRIPV4&);
+
+ const RTNETADDRIPV4 getIpv4Netmask() const;
+ void setIpv4Netmask(const RTNETADDRIPV4&);
+
+ uint32_t getSendBufSize() const;
+ void setSendBufSize(uint32_t);
+
+ uint32_t getRecvBufSize() const;
+ void setRecvBufSize(uint32_t);
+
+ int32_t getVerbosityLevel() const;
+ void setVerbosityLevel(int32_t);
+
+ void addCommandLineOption(const PRTGETOPTDEF);
+
+ /**
+ * Print debug message depending on the m_cVerbosity level.
+ *
+ * @param iMinLevel The minimum m_cVerbosity level for this message.
+ * @param fMsg Whether to dump parts for the current DHCP message.
+ * @param pszFmt The message format string.
+ * @param ... Optional arguments.
+ */
+ void debugPrint(int32_t iMinLevel, bool fMsg, const char *pszFmt, ...) const;
+ virtual void debugPrintV(int32_t iMinLevel, bool fMsg, const char *pszFmt, va_list va) const;
+
+ private:
+ void doReceiveLoop();
+
+ /** starts receiving thread and enter event polling loop. */
+ int startReceiveThreadAndEnterEventLoop();
+
+ protected:
+ /* VirtualBox instance */
+ ComPtr<IVirtualBox> virtualbox;
+ ComPtr<IVirtualBoxClient> virtualboxClient;
+
+ private:
+ struct Data;
+ Data *m;
+
+ private:
+ PRTGETOPTDEF getOptionsPtr();
+};
+# endif
+#endif /* !VBOX_INCLUDED_SRC_NetLib_VBoxNetBaseService_h */
diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetIntIf.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetIntIf.cpp
new file mode 100644
index 00000000..9e7a695f
--- /dev/null
+++ b/src/VBox/NetworkServices/NetLib/VBoxNetIntIf.cpp
@@ -0,0 +1,140 @@
+/* $Id: VBoxNetIntIf.cpp $ */
+/** @file
+ * VBoxNetIntIf - IntNet Interface Client Routines.
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_DEFAULT
+#include "VBoxNetLib.h"
+#include <VBox/intnet.h>
+#include <VBox/intnetinline.h>
+#include <VBox/sup.h>
+#include <VBox/vmm/vmm.h>
+#include <iprt/errcore.h>
+#include <VBox/log.h>
+
+#include <iprt/string.h>
+
+
+
+/**
+ * Flushes the send buffer.
+ *
+ * @returns VBox status code.
+ * @param pSession The support driver session.
+ * @param hIf The interface handle to flush.
+ */
+int VBoxNetIntIfFlush(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf)
+{
+ INTNETIFSENDREQ SendReq;
+ SendReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
+ SendReq.Hdr.cbReq = sizeof(SendReq);
+ SendReq.pSession = pSession;
+ SendReq.hIf = hIf;
+ return SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_SEND, 0, &SendReq.Hdr);
+}
+
+
+/**
+ * Copys the SG segments into the specified fram.
+ *
+ * @param pvFrame The frame buffer.
+ * @param cSegs The number of segments.
+ * @param paSegs The segments.
+ */
+static void vboxnetIntIfCopySG(void *pvFrame, size_t cSegs, PCINTNETSEG paSegs)
+{
+ uint8_t *pbDst = (uint8_t *)pvFrame;
+ for (size_t iSeg = 0; iSeg < cSegs; iSeg++)
+ {
+ memcpy(pbDst, paSegs[iSeg].pv, paSegs[iSeg].cb);
+ pbDst += paSegs[iSeg].cb;
+ }
+}
+
+
+/**
+ * Writes a frame packet to the buffer.
+ *
+ * @returns VBox status code.
+ * @param pBuf The buffer.
+ * @param pRingBuf The ring buffer to read from.
+ * @param cSegs The number of segments.
+ * @param paSegs The segments.
+ */
+int VBoxNetIntIfRingWriteFrame(PINTNETBUF pBuf, PINTNETRINGBUF pRingBuf, size_t cSegs, PCINTNETSEG paSegs)
+{
+ RT_NOREF(pBuf);
+
+ /*
+ * Validate input.
+ */
+ AssertPtr(pBuf);
+ AssertPtr(pRingBuf);
+ AssertPtr(paSegs);
+ Assert(cSegs > 0);
+
+ /*
+ * Calc frame size.
+ */
+ uint32_t cbFrame = 0;
+ for (size_t iSeg = 0; iSeg < cSegs; iSeg++)
+ cbFrame += paSegs[iSeg].cb;
+ Assert(cbFrame >= sizeof(RTMAC) * 2);
+
+ /*
+ * Allocate a frame, copy the data and commit it.
+ */
+ PINTNETHDR pHdr = NULL;
+ void *pvFrame = NULL;
+ int rc = IntNetRingAllocateFrame(pRingBuf, cbFrame, &pHdr, &pvFrame);
+ if (RT_SUCCESS(rc))
+ {
+ vboxnetIntIfCopySG(pvFrame, cSegs, paSegs);
+ IntNetRingCommitFrame(pRingBuf, pHdr);
+ return VINF_SUCCESS;
+ }
+
+ return rc;
+}
+
+
+/**
+ * Sends a frame
+ *
+ * @returns VBox status code.
+ * @param pSession The support driver session.
+ * @param hIf The interface handle.
+ * @param pBuf The interface buffer.
+ * @param cSegs The number of segments.
+ * @param paSegs The segments.
+ * @param fFlush Whether to flush the write.
+ */
+int VBoxNetIntIfSend(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf,
+ size_t cSegs, PCINTNETSEG paSegs, bool fFlush)
+{
+ int rc = VBoxNetIntIfRingWriteFrame(pBuf, &pBuf->Send, cSegs, paSegs);
+ if (rc == VERR_BUFFER_OVERFLOW)
+ {
+ VBoxNetIntIfFlush(pSession, hIf);
+ rc = VBoxNetIntIfRingWriteFrame(pBuf, &pBuf->Send, cSegs, paSegs);
+ }
+ if (RT_SUCCESS(rc) && fFlush)
+ rc = VBoxNetIntIfFlush(pSession, hIf);
+ return rc;
+}
diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetLib.h b/src/VBox/NetworkServices/NetLib/VBoxNetLib.h
new file mode 100644
index 00000000..7b057274
--- /dev/null
+++ b/src/VBox/NetworkServices/NetLib/VBoxNetLib.h
@@ -0,0 +1,72 @@
+/* $Id: VBoxNetLib.h $ */
+/** @file
+ * VBoxNetUDP - IntNet Client Library.
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NetLib_VBoxNetLib_h
+#define VBOX_INCLUDED_SRC_NetLib_VBoxNetLib_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <iprt/net.h>
+#include <VBox/intnet.h>
+
+RT_C_DECLS_BEGIN
+
+
+/**
+ * Header pointers optionally returned by VBoxNetUDPMatch.
+ */
+typedef struct VBOXNETUDPHDRS
+{
+ PCRTNETETHERHDR pEth; /**< Pointer to the ethernet header. */
+ PCRTNETIPV4 pIpv4; /**< Pointer to the IPV4 header if IPV4 packet. */
+ PCRTNETUDP pUdp; /**< Pointer to the UDP header. */
+} VBOXNETUDPHDRS;
+/** Pointer to a VBOXNETUDPHDRS structure. */
+typedef VBOXNETUDPHDRS *PVBOXNETUDPHDRS;
+
+
+/** @name VBoxNetUDPMatch flags.
+ * @{ */
+#define VBOXNETUDP_MATCH_UNICAST RT_BIT_32(0)
+#define VBOXNETUDP_MATCH_BROADCAST RT_BIT_32(1)
+#define VBOXNETUDP_MATCH_CHECKSUM RT_BIT_32(2)
+#define VBOXNETUDP_MATCH_REQUIRE_CHECKSUM RT_BIT_32(3)
+#define VBOXNETUDP_MATCH_PRINT_STDERR RT_BIT_32(31)
+/** @} */
+
+void * VBoxNetUDPMatch(PINTNETBUF pBuf, unsigned uDstPort, PCRTMAC pDstMac, uint32_t fFlags, PVBOXNETUDPHDRS pHdrs, size_t *pcb);
+int VBoxNetUDPUnicast(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf,
+ RTNETADDRIPV4 SrcIPv4Addr, PCRTMAC SrcMacAddr, unsigned uSrcPort,
+ RTNETADDRIPV4 DstIPv4Addr, PCRTMAC DstMacAddr, unsigned uDstPort,
+ void const *pvData, size_t cbData);
+int VBoxNetUDPBroadcast(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf,
+ RTNETADDRIPV4 SrcIPv4Addr, PCRTMAC SrcMacAddr, unsigned uSrcPort,
+ unsigned uDstPort,
+ void const *pvData, size_t cbData);
+
+bool VBoxNetArpHandleIt(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf, PCRTMAC pMacAddr, RTNETADDRIPV4 IPv4Addr);
+
+int VBoxNetIntIfFlush(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf);
+int VBoxNetIntIfRingWriteFrame(PINTNETBUF pBuf, PINTNETRINGBUF pRingBuf, size_t cSegs, PCINTNETSEG paSegs);
+int VBoxNetIntIfSend(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf, size_t cSegs, PCINTNETSEG paSegs, bool fFlush);
+
+
+RT_C_DECLS_END
+
+#endif /* !VBOX_INCLUDED_SRC_NetLib_VBoxNetLib_h */
+
diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetPortForwardString.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetPortForwardString.cpp
new file mode 100644
index 00000000..7702ceff
--- /dev/null
+++ b/src/VBox/NetworkServices/NetLib/VBoxNetPortForwardString.cpp
@@ -0,0 +1,372 @@
+/* $Id: VBoxNetPortForwardString.cpp $ */
+/** @file
+ * VBoxNetPortForwardString - Routines for managing port-forward strings.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#ifndef RT_OS_WINDOWS
+# include <netinet/in.h>
+#else
+# include <iprt/win/winsock2.h>
+# include <Ws2ipdef.h>
+#endif
+
+#include <iprt/cdefs.h>
+#include <iprt/cidr.h>
+#include <iprt/ctype.h>
+#include <iprt/errcore.h>
+#include <iprt/getopt.h>
+#include <iprt/net.h>
+#include <iprt/param.h>
+#include <iprt/path.h>
+#include <iprt/stream.h>
+#include <iprt/string.h>
+
+#include <VBox/log.h>
+
+#include "VBoxPortForwardString.h"
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+#define PF_FIELD_SEPARATOR ':'
+#define PF_ADDRESS_FIELD_STARTS '['
+#define PF_ADDRESS_FIELD_ENDS ']'
+
+#define PF_STR_FIELD_SEPARATOR ":"
+#define PF_STR_ADDRESS_FIELD_STARTS "["
+#define PF_STR_ADDRESS_FIELD_ENDS "]"
+
+
+static int netPfStrAddressParse(char *pszRaw, size_t cchRaw,
+ char *pszAddress, int cbAddress,
+ bool fEmptyAcceptable)
+{
+ size_t cchField = 0;
+
+ AssertPtrReturn(pszRaw, -1);
+ AssertPtrReturn(pszAddress, -1);
+ AssertReturn(pszRaw[0] == PF_ADDRESS_FIELD_STARTS, -1);
+
+ if (pszRaw[0] == PF_ADDRESS_FIELD_STARTS)
+ {
+ /* shift pszRaw to next symbol */
+ pszRaw++;
+ cchRaw--;
+
+
+ /* we shouldn't face with ending here */
+ AssertReturn(cchRaw > 0, VERR_INVALID_PARAMETER);
+
+ char *pszEndOfAddress = RTStrStr(pszRaw, PF_STR_ADDRESS_FIELD_ENDS);
+
+ /* no pair closing sign */
+ AssertPtrReturn(pszEndOfAddress, VERR_INVALID_PARAMETER);
+
+ cchField = pszEndOfAddress - pszRaw;
+
+ /* field should be less then the rest of the string */
+ AssertReturn(cchField < cchRaw, VERR_INVALID_PARAMETER);
+
+ if (cchField != 0)
+ RTStrCopy(pszAddress, RT_MIN(cchField + 1, (size_t)cbAddress), pszRaw);
+ else if (!fEmptyAcceptable)
+ return -1;
+ }
+
+ AssertReturn(pszRaw[cchField] == PF_ADDRESS_FIELD_ENDS, -1);
+
+ return (int)cchField + 2; /* length of the field and closing braces */
+}
+
+
+/**
+ * Parses a port something.
+ *
+ * @returns Offset relative to @a pszRaw of the end of the port field.
+ * -1 on failure.
+ * @param pszRaw The zero terminated string to parse. Points a field
+ * separator.
+ * @param pu16Port Where to store the port number on success.
+ */
+static int netPfStrPortParse(char *pszRaw, uint16_t *pu16Port)
+{
+#if 1
+ AssertPtrReturn(pszRaw, -1);
+ AssertPtrReturn(pu16Port, -1);
+ AssertReturn(pszRaw[0] == PF_FIELD_SEPARATOR, -1);
+
+ char *pszNext = NULL;
+ int rc = RTStrToUInt16Ex(&pszRaw[1], &pszNext, 0, pu16Port);
+ if (rc == VWRN_TRAILING_CHARS)
+ AssertReturn(*pszNext == PF_FIELD_SEPARATOR, -1);
+ else if (rc == VINF_SUCCESS)
+ Assert(*pszNext == '\0');
+ else
+ AssertMsgFailedReturn(("rc=%Rrc\n", rc), -1);
+ if (*pu16Port == 0)
+ return -1;
+ return (int)(pszNext - pszRaw);
+
+#else /* The same code, just a little more verbose: */
+ char *pszEndOfPort = NULL;
+ uint16_t u16Port = 0;
+ int idxRaw = 1; /* we increment pszRaw after checks. */
+ int cbRest = 0;
+ size_t cbPort = 0;
+
+ AssertPtrReturn(pszRaw, -1);
+ AssertPtrReturn(pu16Port, -1);
+ AssertReturn(pszRaw[0] == PF_FIELD_SEPARATOR, -1);
+
+ pszRaw++; /* skip field separator */
+ cchRaw --;
+
+ char *pszEndOfPort = RTStrStr(pszRaw, ":");
+ if (!pszEndOfPort)
+ {
+ cbRest = strlen(pszRaw);
+
+ Assert(cchRaw == cbRest);
+
+ /* XXX: Assumption that if string is too big, it will be reported by
+ * RTStrToUint16.
+ */
+ if (cbRest > 0)
+ {
+ pszEndOfPort = pszRaw + cbRest;
+ cbPort = cbRest;
+ }
+ else
+ return -1;
+ }
+ else
+ cbPort = pszEndOfPort - pszRaw;
+
+
+ idxRaw += cbPort;
+
+ Assert(cbRest || pszRaw[idxRaw - 1] == PF_FIELD_SEPARATOR); /* we are 1 char ahead */
+
+ char szPort[10];
+ RT_ZERO(szPort);
+
+ Assert(idxRaw > 0);
+ RTStrCopy(szPort, RT_MIN(sizeof(szPort), (size_t)(cbPort) + 1), pszRaw);
+
+ if (!(u16Port = RTStrToUInt16(szPort)))
+ return -1;
+
+ *pu16Port = u16Port;
+
+ return idxRaw;
+#endif
+}
+
+
+static int netPfStrAddressPortPairParse(char *pszRaw, size_t cchRaw,
+ char *pszAddress, int cbAddress,
+ bool fEmptyAddressAcceptable,
+ uint16_t *pu16Port)
+{
+ int idxRaw = 0;
+ int idxRawTotal = 0;
+
+ AssertPtrReturn(pszRaw, -1);
+ AssertPtrReturn(pszAddress, -1);
+ AssertPtrReturn(pu16Port, -2);
+
+ /* XXX: Here we should check 0 - ':' and 1 - '[' */
+ Assert( pszRaw[0] == PF_FIELD_SEPARATOR
+ && pszRaw[1] == PF_ADDRESS_FIELD_STARTS);
+
+ pszRaw++; /* field separator skip */
+ cchRaw--;
+ AssertReturn(cchRaw > 0, VERR_INVALID_PARAMETER);
+
+ idxRaw = 0;
+
+ if (pszRaw[0] == PF_ADDRESS_FIELD_STARTS)
+ {
+ idxRaw += netPfStrAddressParse(pszRaw,
+ cchRaw - idxRaw,
+ pszAddress,
+ cbAddress,
+ fEmptyAddressAcceptable);
+ if (idxRaw == -1)
+ return -1;
+
+ Assert(pszRaw[idxRaw] == PF_FIELD_SEPARATOR);
+ }
+ else return -1;
+
+ pszRaw += idxRaw;
+ idxRawTotal += idxRaw;
+ cchRaw -= idxRaw;
+
+ AssertReturn(cchRaw > 0, VERR_INVALID_PARAMETER);
+
+ idxRaw = 0;
+
+ Assert(pszRaw[0] == PF_FIELD_SEPARATOR);
+
+ if (pszRaw[0] == PF_FIELD_SEPARATOR)
+ {
+ idxRaw = netPfStrPortParse(pszRaw, pu16Port);
+
+ Assert(strlen(&pszRaw[idxRaw]) == 0 || pszRaw[idxRaw] == PF_FIELD_SEPARATOR);
+
+ if (idxRaw == -1)
+ return -2;
+
+ idxRawTotal += idxRaw;
+
+ return idxRawTotal + 1;
+ }
+ else return -1; /* trailing garbage in the address */
+}
+
+/* XXX: Having fIPv6 we might emprove adress verification comparing address length
+ * with INET[6]_ADDRLEN
+ *
+ */
+int netPfStrToPf(const char *pcszStrPortForward, bool fIPv6, PPORTFORWARDRULE pPfr)
+{
+/** r=bird: Redo from scratch? This is very hard to read. And it's going about
+ * things in a very complicated, potentially leaky (pszRaw) fashion. */
+
+ int proto;
+ uint16_t u16HostPort;
+ uint16_t u16GuestPort;
+ bool fTcpProto = false;
+
+ int idxRaw = 0;
+ int cbToken = 0;
+
+ AssertPtrReturn(pcszStrPortForward, VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pPfr, VERR_INVALID_PARAMETER);
+
+ RT_ZERO(*pPfr);
+
+ char *pszHostAddr = &pPfr->szPfrHostAddr[0];
+ char *pszGuestAddr = &pPfr->szPfrGuestAddr[0];
+ char *pszName = &pPfr->szPfrName[0];
+
+ size_t cchRaw = strlen(pcszStrPortForward);
+
+ /* Minimal rule ":tcp:[]:0:[]:0" has got lenght 14 */
+ AssertReturn(cchRaw > 14, VERR_INVALID_PARAMETER);
+
+ char *pszRaw = RTStrDup(pcszStrPortForward);
+ AssertReturn(pszRaw, VERR_NO_MEMORY);
+
+ char *pszRawBegin = pszRaw;
+
+ /* name */
+ if (pszRaw[idxRaw] == PF_FIELD_SEPARATOR)
+ idxRaw = 1; /* begin of the next segment */
+ else
+ {
+ char *pszEndOfName = RTStrStr(pszRaw + 1, PF_STR_FIELD_SEPARATOR);
+ if (!pszEndOfName)
+ goto invalid_parameter;
+
+ cbToken = pszEndOfName - pszRaw; /* don't take : into account */
+ /* XXX it's unacceptable to have only name entry in PF */
+ AssertReturn(cbToken < (ssize_t)cchRaw, VERR_INVALID_PARAMETER);
+
+ if ( cbToken < 0
+ || (size_t)cbToken >= PF_NAMELEN)
+ goto invalid_parameter;
+
+ RTStrCopy(pszName,
+ RT_MIN((size_t)cbToken + 1, PF_NAMELEN),
+ pszRaw);
+ pszRaw += cbToken; /* move to separator */
+ cchRaw -= cbToken;
+ }
+
+ AssertReturn(pszRaw[0] == PF_FIELD_SEPARATOR, VERR_INVALID_PARAMETER);
+ /* protocol */
+
+ pszRaw++; /* skip separator */
+ cchRaw--;
+ idxRaw = 0;
+
+ if ( ( (fTcpProto = (RTStrNICmp(pszRaw, "tcp", 3) == 0))
+ || RTStrNICmp(pszRaw, "udp", 3) == 0)
+ && pszRaw[3] == PF_FIELD_SEPARATOR)
+ {
+ proto = (fTcpProto ? IPPROTO_TCP : IPPROTO_UDP);
+ idxRaw = 3;
+ }
+ else
+ goto invalid_parameter;
+
+ pszRaw += idxRaw;
+ cchRaw -= idxRaw;
+
+ idxRaw = netPfStrAddressPortPairParse(pszRaw, cchRaw,
+ pszHostAddr, INET6_ADDRSTRLEN,
+ true, &u16HostPort);
+ if (idxRaw < 0)
+ return VERR_INVALID_PARAMETER;
+
+ pszRaw += idxRaw;
+ cchRaw -= idxRaw;
+
+ Assert(pszRaw[0] == PF_FIELD_SEPARATOR);
+
+ idxRaw = netPfStrAddressPortPairParse(pszRaw, cchRaw,
+ pszGuestAddr, INET6_ADDRSTRLEN,
+ false, &u16GuestPort);
+
+ if (idxRaw < 0)
+ goto invalid_parameter;
+
+ /* XXX: fill the rule */
+ pPfr->fPfrIPv6 = fIPv6;
+ pPfr->iPfrProto = proto;
+
+ pPfr->u16PfrHostPort = u16HostPort;
+
+ if (*pszGuestAddr == '\0')
+ goto invalid_parameter; /* guest address should be defined */
+
+ pPfr->u16PfrGuestPort = u16GuestPort;
+
+ Log(("name: %s\n"
+ "proto: %d\n"
+ "host address: %s\n"
+ "host port: %d\n"
+ "guest address: %s\n"
+ "guest port:%d\n",
+ pszName, proto,
+ pszHostAddr, u16HostPort,
+ pszGuestAddr, u16GuestPort));
+
+ RTStrFree(pszRawBegin);
+ return VINF_SUCCESS;
+
+invalid_parameter:
+ RTStrFree(pszRawBegin);
+ if (pPfr)
+ RT_ZERO(*pPfr);
+ return VERR_INVALID_PARAMETER;
+}
diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetUDP.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetUDP.cpp
new file mode 100644
index 00000000..5dbbdbf2
--- /dev/null
+++ b/src/VBox/NetworkServices/NetLib/VBoxNetUDP.cpp
@@ -0,0 +1,304 @@
+/* $Id: VBoxNetUDP.cpp $ */
+/** @file
+ * VBoxNetUDP - IntNet UDP Client Routines.
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_DEFAULT
+#include "VBoxNetLib.h"
+#include <iprt/stream.h>
+#include <iprt/string.h>
+#include <iprt/rand.h>
+#include <VBox/log.h>
+#include <VBox/vmm/pdmnetinline.h>
+#include <VBox/intnetinline.h>
+
+
+/**
+ * Checks if the head of the receive ring is a UDP packet matching the given
+ * criteria.
+ *
+ * @returns Pointer to the data if it matches.
+ * @param pBuf The IntNet buffers.
+ * @param uDstPort The destination port to match.
+ * @param pDstMac The destination address to match if
+ * VBOXNETUDP_MATCH_UNICAST is specied.
+ * @param fFlags Flags indicating what to match and some debug stuff.
+ * See VBOXNETUDP_MATCH_*.
+ * @param pHdrs Where to return the pointers to the headers.
+ * Optional.
+ * @param pcb Where to return the size of the data on success.
+ */
+void *VBoxNetUDPMatch(PINTNETBUF pBuf, unsigned uDstPort, PCRTMAC pDstMac, uint32_t fFlags, PVBOXNETUDPHDRS pHdrs, size_t *pcb)
+{
+ /*
+ * Clear return values so we can return easier on mismatch.
+ */
+ *pcb = 0;
+ if (pHdrs)
+ {
+ pHdrs->pEth = NULL;
+ pHdrs->pIpv4 = NULL;
+ pHdrs->pUdp = NULL;
+ }
+
+ /*
+ * Valid IntNet Ethernet frame?
+ */
+ PCINTNETHDR pHdr = IntNetRingGetNextFrameToRead(&pBuf->Recv);
+ if ( !pHdr
+ || ( pHdr->u8Type != INTNETHDR_TYPE_FRAME
+ && pHdr->u8Type != INTNETHDR_TYPE_GSO))
+ return NULL;
+
+ size_t cbFrame = pHdr->cbFrame;
+ const void *pvFrame = IntNetHdrGetFramePtr(pHdr, pBuf);
+ PCPDMNETWORKGSO pGso = NULL;
+ if (pHdr->u8Type == INTNETHDR_TYPE_GSO)
+ {
+ pGso = (PCPDMNETWORKGSO)pvFrame;
+ if (!PDMNetGsoIsValid(pGso, cbFrame, cbFrame - sizeof(*pGso)))
+ return NULL;
+ /** @todo IPv6 UDP support, goes for this entire function really. Not really
+ * important yet since this is currently only used by the DHCP server. */
+ if (pGso->u8Type != PDMNETWORKGSOTYPE_IPV4_UDP)
+ return NULL;
+ pvFrame = pGso + 1;
+ cbFrame -= sizeof(*pGso);
+ }
+
+ PCRTNETETHERHDR pEthHdr = (PCRTNETETHERHDR)pvFrame;
+ if (pHdrs)
+ pHdrs->pEth = pEthHdr;
+
+#ifdef IN_RING3
+ /* Dump if to stderr/log if that's wanted. */
+ if (fFlags & VBOXNETUDP_MATCH_PRINT_STDERR)
+ {
+ RTStrmPrintf(g_pStdErr, "frame: cb=%04x dst=%.6Rhxs src=%.6Rhxs type=%04x%s\n",
+ cbFrame, &pEthHdr->DstMac, &pEthHdr->SrcMac, RT_BE2H_U16(pEthHdr->EtherType),
+ !memcmp(&pEthHdr->DstMac, pDstMac, sizeof(*pDstMac)) ? " Mine!" : "");
+ }
+#endif
+
+ /*
+ * Ethernet matching.
+ */
+
+ /* Ethernet min frame size. */
+ if (cbFrame < 64)
+ return NULL;
+
+ /* Match Ethertype: IPV4? */
+ /** @todo VLAN tagging? */
+ if (pEthHdr->EtherType != RT_H2BE_U16_C(RTNET_ETHERTYPE_IPV4))
+ return NULL;
+
+ /* Match destination address (ethernet) */
+ if ( ( !(fFlags & VBOXNETUDP_MATCH_UNICAST)
+ || memcmp(&pEthHdr->DstMac, pDstMac, sizeof(pEthHdr->DstMac)))
+ && ( !(fFlags & VBOXNETUDP_MATCH_BROADCAST)
+ || pEthHdr->DstMac.au16[0] != 0xffff
+ || pEthHdr->DstMac.au16[1] != 0xffff
+ || pEthHdr->DstMac.au16[2] != 0xffff))
+ return NULL;
+
+ /*
+ * If we're working on a GSO frame, we need to make sure the length fields
+ * are set correctly (they are usually set to 0).
+ */
+ if (pGso)
+ PDMNetGsoPrepForDirectUse(pGso, (void *)pvFrame, cbFrame, PDMNETCSUMTYPE_NONE);
+
+ /*
+ * IP validation and matching.
+ */
+ PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)(pEthHdr + 1);
+ if (pHdrs)
+ pHdrs->pIpv4 = pIpHdr;
+
+ /* Protocol: UDP */
+ if (pIpHdr->ip_p != RTNETIPV4_PROT_UDP)
+ return NULL;
+
+ /* Valid IPv4 header? */
+ size_t const offIpHdr = (uintptr_t)pIpHdr - (uintptr_t)pEthHdr;
+ if (!RTNetIPv4IsHdrValid(pIpHdr, cbFrame - offIpHdr, cbFrame - offIpHdr, !pGso /*fChecksum*/))
+ return NULL;
+
+ /*
+ * UDP matching and validation.
+ */
+ PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint32_t *)pIpHdr + pIpHdr->ip_hl);
+ if (pHdrs)
+ pHdrs->pUdp = pUdpHdr;
+
+ /* Destination port */
+ if (RT_BE2H_U16(pUdpHdr->uh_dport) != uDstPort)
+ return NULL;
+
+ if (!pGso)
+ {
+ /* Validate the UDP header according to flags. */
+ size_t offUdpHdr = (uintptr_t)pUdpHdr - (uintptr_t)pEthHdr;
+ if (fFlags & (VBOXNETUDP_MATCH_CHECKSUM | VBOXNETUDP_MATCH_REQUIRE_CHECKSUM))
+ {
+ if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbFrame - offUdpHdr, true /*fChecksum*/))
+ return NULL;
+ if ( (fFlags & VBOXNETUDP_MATCH_REQUIRE_CHECKSUM)
+ && !pUdpHdr->uh_sum)
+ return NULL;
+ }
+ else
+ {
+ if (!RTNetIPv4IsUDPSizeValid(pIpHdr, pUdpHdr, cbFrame - offUdpHdr))
+ return NULL;
+ }
+ }
+
+ /*
+ * We've got a match!
+ */
+ *pcb = RT_N2H_U16(pUdpHdr->uh_ulen) - sizeof(*pUdpHdr);
+ return (void *)(pUdpHdr + 1);
+}
+
+
+/** Internal worker for VBoxNetUDPUnicast and VBoxNetUDPBroadcast. */
+static int vboxnetudpSend(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf,
+ RTNETADDRIPV4 SrcIPv4Addr, PCRTMAC pSrcMacAddr, unsigned uSrcPort,
+ RTNETADDRIPV4 DstIPv4Addr, PCRTMAC pDstMacAddr, unsigned uDstPort,
+ void const *pvData, size_t cbData)
+{
+ INTNETSEG aSegs[4];
+
+ /* the Ethernet header */
+ RTNETETHERHDR EtherHdr;
+ EtherHdr.DstMac = *pDstMacAddr;
+ EtherHdr.SrcMac = *pSrcMacAddr;
+ EtherHdr.EtherType = RT_H2BE_U16_C(RTNET_ETHERTYPE_IPV4);
+
+ aSegs[0].pv = &EtherHdr;
+ aSegs[0].cb = sizeof(EtherHdr);
+ aSegs[0].Phys = NIL_RTHCPHYS;
+
+ /* the IP header */
+ RTNETIPV4 IpHdr;
+ unsigned cbIdHdr = RT_UOFFSETOF(RTNETIPV4, ip_options);
+ IpHdr.ip_v = 4;
+ IpHdr.ip_hl = cbIdHdr >> 2;
+ IpHdr.ip_tos = 0;
+ IpHdr.ip_len = RT_H2BE_U16((uint16_t)(cbData + sizeof(RTNETUDP) + cbIdHdr));
+ IpHdr.ip_id = (uint16_t)RTRandU32();
+ IpHdr.ip_off = 0;
+ IpHdr.ip_ttl = 255;
+ IpHdr.ip_p = RTNETIPV4_PROT_UDP;
+ IpHdr.ip_sum = 0;
+ IpHdr.ip_src = SrcIPv4Addr;
+ IpHdr.ip_dst = DstIPv4Addr;
+ IpHdr.ip_sum = RTNetIPv4HdrChecksum(&IpHdr);
+
+ aSegs[1].pv = &IpHdr;
+ aSegs[1].cb = cbIdHdr;
+ aSegs[1].Phys = NIL_RTHCPHYS;
+
+
+ /* the UDP bit */
+ RTNETUDP UdpHdr;
+ UdpHdr.uh_sport = RT_H2BE_U16(uSrcPort);
+ UdpHdr.uh_dport = RT_H2BE_U16(uDstPort);
+ UdpHdr.uh_ulen = RT_H2BE_U16((uint16_t)(cbData + sizeof(RTNETUDP)));
+#if 0
+ UdpHdr.uh_sum = 0; /* pretend checksumming is disabled */
+#else
+ UdpHdr.uh_sum = RTNetIPv4UDPChecksum(&IpHdr, &UdpHdr, pvData);
+#endif
+
+ aSegs[2].pv = &UdpHdr;
+ aSegs[2].cb = sizeof(UdpHdr);
+ aSegs[2].Phys = NIL_RTHCPHYS;
+
+ /* the payload */
+ aSegs[3].pv = (void *)pvData;
+ aSegs[3].cb = (uint32_t)cbData;
+ aSegs[3].Phys = NIL_RTHCPHYS;
+
+
+ /* send it */
+ return VBoxNetIntIfSend(pSession, hIf, pBuf, RT_ELEMENTS(aSegs), &aSegs[0], true /* fFlush */);
+}
+
+
+/**
+ * Sends an unicast UDP packet.
+ *
+ * @returns VBox status code.
+ * @param pSession The support driver session handle.
+ * @param hIf The interface handle.
+ * @param pBuf The interface buffer.
+ * @param SrcIPv4Addr The source IPv4 address.
+ * @param pSrcMacAddr The source MAC address.
+ * @param uSrcPort The source port number.
+ * @param DstIPv4Addr The destination IPv4 address. Can be broadcast.
+ * @param pDstMacAddr The destination MAC address.
+ * @param uDstPort The destination port number.
+ * @param pvData The data payload.
+ * @param cbData The size of the data payload.
+ */
+int VBoxNetUDPUnicast(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf,
+ RTNETADDRIPV4 SrcIPv4Addr, PCRTMAC pSrcMacAddr, unsigned uSrcPort,
+ RTNETADDRIPV4 DstIPv4Addr, PCRTMAC pDstMacAddr, unsigned uDstPort,
+ void const *pvData, size_t cbData)
+{
+ return vboxnetudpSend(pSession, hIf, pBuf,
+ SrcIPv4Addr, pSrcMacAddr, uSrcPort,
+ DstIPv4Addr, pDstMacAddr, uDstPort,
+ pvData, cbData);
+}
+
+
+/**
+ * Sends a broadcast UDP packet.
+ *
+ * @returns VBox status code.
+ * @param pSession The support driver session handle.
+ * @param hIf The interface handle.
+ * @param pBuf The interface buffer.
+ * @param SrcIPv4Addr The source IPv4 address.
+ * @param pSrcMacAddr The source MAC address.
+ * @param uSrcPort The source port number.
+ * @param uDstPort The destination port number.
+ * @param pvData The data payload.
+ * @param cbData The size of the data payload.
+ */
+int VBoxNetUDPBroadcast(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF pBuf,
+ RTNETADDRIPV4 SrcIPv4Addr, PCRTMAC pSrcMacAddr, unsigned uSrcPort,
+ unsigned uDstPort,
+ void const *pvData, size_t cbData)
+{
+ RTNETADDRIPV4 IPv4AddrBrdCast;
+ IPv4AddrBrdCast.u = UINT32_C(0xffffffff);
+ RTMAC MacBrdCast;
+ MacBrdCast.au16[0] = MacBrdCast.au16[1] = MacBrdCast.au16[2] = UINT16_C(0xffff);
+
+ return vboxnetudpSend(pSession, hIf, pBuf,
+ SrcIPv4Addr, pSrcMacAddr, uSrcPort,
+ IPv4AddrBrdCast, &MacBrdCast, uDstPort,
+ pvData, cbData);
+}
+
diff --git a/src/VBox/NetworkServices/NetLib/VBoxPortForwardString.h b/src/VBox/NetworkServices/NetLib/VBoxPortForwardString.h
new file mode 100644
index 00000000..7ca8abc5
--- /dev/null
+++ b/src/VBox/NetworkServices/NetLib/VBoxPortForwardString.h
@@ -0,0 +1,59 @@
+/* $Id: VBoxPortForwardString.h $ */
+/** @file
+ * VBoxPortForwardString
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NetLib_VBoxPortForwardString_h
+#define VBOX_INCLUDED_SRC_NetLib_VBoxPortForwardString_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <iprt/net.h>
+#include <VBox/intnet.h>
+
+RT_C_DECLS_BEGIN
+
+#define PF_NAMELEN 64
+/*
+ * TBD: Here is shared implementation of parsing port-forward string
+ * of format:
+ * name:[ipv4 or ipv6 address]:host-port:[ipv4 or ipv6 guest addr]:guest port
+ *
+ * This code supposed to be used in NetService and Frontend and perhaps in corresponding
+ * services.
+ *
+ * Note: ports are in host format.
+ */
+
+typedef struct PORTFORWARDRULE
+{
+ char szPfrName[PF_NAMELEN];
+ /* true if ipv6 and false otherwise */
+ int fPfrIPv6;
+ /* IPPROTO_{UDP,TCP} */
+ int iPfrProto;
+ char szPfrHostAddr[INET6_ADDRSTRLEN];
+ uint16_t u16PfrHostPort;
+ char szPfrGuestAddr[INET6_ADDRSTRLEN];
+ uint16_t u16PfrGuestPort;
+} PORTFORWARDRULE, *PPORTFORWARDRULE;
+
+int netPfStrToPf(const char *pszStrPortForward, bool fIPv6, PPORTFORWARDRULE pPfr);
+
+RT_C_DECLS_END
+
+#endif /* !VBOX_INCLUDED_SRC_NetLib_VBoxPortForwardString_h */
+
diff --git a/src/VBox/NetworkServices/NetLib/cpp/utils.h b/src/VBox/NetworkServices/NetLib/cpp/utils.h
new file mode 100644
index 00000000..31128d02
--- /dev/null
+++ b/src/VBox/NetworkServices/NetLib/cpp/utils.h
@@ -0,0 +1,47 @@
+/* $Id: utils.h $ */
+/** @file
+ * NetLib/cpp/utils.h
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NetLib_cpp_utils_h
+#define VBOX_INCLUDED_SRC_NetLib_cpp_utils_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <iprt/types.h>
+
+/** less operator for IPv4 addresess */
+DECLINLINE(bool) operator <(const RTNETADDRIPV4 &lhs, const RTNETADDRIPV4 &rhs)
+{
+ return RT_N2H_U32(lhs.u) < RT_N2H_U32(rhs.u);
+}
+
+/** greater operator for IPv4 addresess */
+DECLINLINE(bool) operator >(const RTNETADDRIPV4 &lhs, const RTNETADDRIPV4 &rhs)
+{
+ return RT_N2H_U32(lhs.u) > RT_N2H_U32(rhs.u);
+}
+
+/** Compares MAC addresses */
+DECLINLINE(bool) operator== (const RTMAC &lhs, const RTMAC &rhs)
+{
+ return lhs.au16[0] == rhs.au16[0]
+ && lhs.au16[1] == rhs.au16[1]
+ && lhs.au16[2] == rhs.au16[2];
+}
+
+#endif /* !VBOX_INCLUDED_SRC_NetLib_cpp_utils_h */
+
diff --git a/src/VBox/NetworkServices/NetLib/shared_ptr.h b/src/VBox/NetworkServices/NetLib/shared_ptr.h
new file mode 100644
index 00000000..42d32fb0
--- /dev/null
+++ b/src/VBox/NetworkServices/NetLib/shared_ptr.h
@@ -0,0 +1,102 @@
+/* $Id: shared_ptr.h $ */
+/** @file
+ * Simplified shared pointer.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VBOX_INCLUDED_SRC_NetLib_shared_ptr_h
+#define VBOX_INCLUDED_SRC_NetLib_shared_ptr_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#ifdef __cplusplus
+template<typename T>
+class SharedPtr
+{
+ struct imp
+ {
+ imp(T *pTrg = NULL, int cnt = 1): ptr(pTrg),refcnt(cnt){}
+ ~imp() { if (ptr) delete ptr;}
+
+ T *ptr;
+ int refcnt;
+ };
+
+
+ public:
+ SharedPtr(T *t = NULL):p(NULL)
+ {
+ p = new imp(t);
+ }
+
+ ~SharedPtr()
+ {
+ p->refcnt--;
+
+ if (p->refcnt == 0)
+ delete p;
+ }
+
+
+ SharedPtr(const SharedPtr& rhs)
+ {
+ p = rhs.p;
+ p->refcnt++;
+ }
+
+ const SharedPtr& operator= (const SharedPtr& rhs)
+ {
+ if (p == rhs.p) return *this;
+
+ p->refcnt--;
+ if (p->refcnt == 0)
+ delete p;
+
+ p = rhs.p;
+ p->refcnt++;
+
+ return *this;
+ }
+
+
+ T *get() const
+ {
+ return p->ptr;
+ }
+
+
+ T *operator->()
+ {
+ return p->ptr;
+ }
+
+
+ const T*operator->() const
+ {
+ return p->ptr;
+ }
+
+
+ int use_count()
+ {
+ return p->refcnt;
+ }
+
+ private:
+ imp *p;
+};
+#endif
+
+#endif /* !VBOX_INCLUDED_SRC_NetLib_shared_ptr_h */
diff --git a/src/VBox/NetworkServices/NetLib/utils.h b/src/VBox/NetworkServices/NetLib/utils.h
new file mode 100644
index 00000000..ccd5cadf
--- /dev/null
+++ b/src/VBox/NetworkServices/NetLib/utils.h
@@ -0,0 +1,142 @@
+/* $Id: utils.h $ */
+/** @file
+ * ComHostUtils.cpp
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+/*******************************************************************************
+* Header Files *
+*******************************************************************************/
+
+#ifndef VBOX_INCLUDED_SRC_NetLib_utils_h
+#define VBOX_INCLUDED_SRC_NetLib_utils_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include "cpp/utils.h"
+
+typedef ComPtr<IVirtualBox> ComVirtualBoxPtr;
+typedef ComPtr<IVirtualBoxClient> ComVirtualBoxClientPtr;
+typedef ComPtr<IDHCPServer> ComDhcpServerPtr;
+typedef ComPtr<IHost> ComHostPtr;
+typedef ComPtr<INATNetwork> ComNatPtr;
+typedef com::SafeArray<BSTR> ComBstrArray;
+
+typedef std::vector<RTNETADDRIPV4> AddressList;
+typedef std::map<RTNETADDRIPV4, int> AddressToOffsetMapping;
+
+
+inline bool isDhcpRequired(const ComNatPtr& nat)
+{
+ BOOL fNeedDhcpServer = false;
+ if (FAILED(nat->COMGETTER(NeedDhcpServer)(&fNeedDhcpServer)))
+ return false;
+
+ return RT_BOOL(fNeedDhcpServer);
+}
+
+
+inline int findDhcpServer(const ComVirtualBoxPtr& vbox, const std::string& name, ComDhcpServerPtr& dhcp)
+{
+ HRESULT hrc = vbox->FindDHCPServerByNetworkName(com::Bstr(name.c_str()).raw(),
+ dhcp.asOutParam());
+ AssertComRCReturn(hrc, VERR_NOT_FOUND);
+
+ return VINF_SUCCESS;
+}
+
+
+inline int findNatNetwork(const ComVirtualBoxPtr& vbox, const std::string& name, ComNatPtr& nat)
+{
+ HRESULT hrc = vbox->FindNATNetworkByName(com::Bstr(name.c_str()).raw(),
+ nat.asOutParam());
+
+ AssertComRCReturn(hrc, VERR_NOT_FOUND);
+
+ return VINF_SUCCESS;
+}
+
+
+inline RTNETADDRIPV4 networkid(const RTNETADDRIPV4& addr, const RTNETADDRIPV4& netmask)
+{
+ RTNETADDRIPV4 netid;
+ netid.u = addr.u & netmask.u;
+ return netid;
+}
+
+
+int localMappings(const ComNatPtr&, AddressToOffsetMapping&);
+int hostDnsSearchList(const ComHostPtr&, std::vector<std::string>&);
+int hostDnsDomain(const ComHostPtr&, std::string& domainStr);
+
+
+class NATNetworkEventAdapter
+{
+ public:
+ virtual HRESULT HandleEvent(VBoxEventType_T aEventType, IEvent *pEvent) = 0;
+};
+
+
+class NATNetworkListener
+{
+ public:
+ NATNetworkListener():m_pNAT(NULL){}
+
+ HRESULT init(NATNetworkEventAdapter *pNAT)
+ {
+ AssertPtrReturn(pNAT, E_INVALIDARG);
+
+ m_pNAT = pNAT;
+ return S_OK;
+ }
+
+ HRESULT init()
+ {
+ m_pNAT = NULL;
+ return S_OK;
+ }
+
+ void uninit() { m_pNAT = NULL; }
+
+ HRESULT HandleEvent(VBoxEventType_T aEventType, IEvent *pEvent)
+ {
+ if (m_pNAT)
+ return m_pNAT->HandleEvent(aEventType, pEvent);
+ else
+ return E_FAIL;
+ }
+
+ private:
+ NATNetworkEventAdapter *m_pNAT;
+};
+typedef ListenerImpl<NATNetworkListener, NATNetworkEventAdapter*> NATNetworkListenerImpl;
+
+# ifdef VBOX_WITH_XPCOM
+class NS_CLASSINFO_NAME(NATNetworkListenerImpl);
+# endif
+
+typedef ComPtr<NATNetworkListenerImpl> ComNatListenerPtr;
+typedef com::SafeArray<VBoxEventType_T> ComEventTypeArray;
+
+/* XXX: const is commented out because of compilation erro on Windows host, but it's intended that this function
+ isn't modify event type array */
+int createNatListener(ComNatListenerPtr& listener, const ComVirtualBoxPtr& vboxptr,
+ NATNetworkEventAdapter *adapter, /* const */ ComEventTypeArray& events);
+int destroyNatListener(ComNatListenerPtr& listener, const ComVirtualBoxPtr& vboxptr);
+int createClientListener(ComNatListenerPtr& listener, const ComVirtualBoxClientPtr& vboxclientptr,
+ NATNetworkEventAdapter *adapter, /* const */ ComEventTypeArray& events);
+int destroyClientListener(ComNatListenerPtr& listener, const ComVirtualBoxClientPtr& vboxclientptr);
+
+#endif /* !VBOX_INCLUDED_SRC_NetLib_utils_h */