diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:17:27 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:17:27 +0000 |
commit | f215e02bf85f68d3a6106c2a1f4f7f063f819064 (patch) | |
tree | 6bb5b92c046312c4e95ac2620b10ddf482d3fa8b /src/VBox/Devices/Network/slirp | |
parent | Initial commit. (diff) | |
download | virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.tar.xz virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.zip |
Adding upstream version 7.0.14-dfsg.upstream/7.0.14-dfsg
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/VBox/Devices/Network/slirp')
89 files changed, 42267 insertions, 0 deletions
diff --git a/src/VBox/Devices/Network/slirp/COPYRIGHT b/src/VBox/Devices/Network/slirp/COPYRIGHT new file mode 100644 index 00000000..ffbc2e31 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/COPYRIGHT @@ -0,0 +1,64 @@ +Slirp was written by Danny Gasparovski. +Copyright (c), 1995,1996 All Rights Reserved. + +Slirp is maintained by Kelly Price <tygris+slirp@erols.com> + +Slirp is free software; "free" as in you don't have to pay for it, and you +are free to do whatever you want with it. I do not accept any donations, +monetary or otherwise, for Slirp. Instead, I would ask you to pass this +potential donation to your favorite charity. In fact, I encourage +*everyone* who finds Slirp useful to make a small donation to their +favorite charity (for example, GreenPeace). This is not a requirement, but +a suggestion from someone who highly values the service they provide. + +The copyright terms and conditions: + +---BEGIN--- + + Copyright (c) 1995,1996 Danny Gasparovski. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + DANNY GASPAROVSKI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---END--- + +This basically means you can do anything you want with the software, except +1) call it your own, and 2) claim warranty on it. There is no warranty for +this software. None. Nada. If you lose a million dollars while using +Slirp, that's your loss not mine. So, ***USE AT YOUR OWN RISK!***. + +If these conditions cannot be met due to legal restrictions (E.g. where it +is against the law to give out Software without warranty), you must cease +using the software and delete all copies you have. + +Slirp uses code that is copyrighted by the following people/organizations: + +Juha Pirkola. +Gregory M. Christy. +The Regents of the University of California. +Carnegie Mellon University. +The Australian National University. +RSA Data Security, Inc. + +Please read the top of each source file for the details on the various +copyrights. diff --git a/src/VBox/Devices/Network/slirp/Makefile.kup b/src/VBox/Devices/Network/slirp/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/Devices/Network/slirp/Makefile.kup diff --git a/src/VBox/Devices/Network/slirp/bootp.c b/src/VBox/Devices/Network/slirp/bootp.c new file mode 100644 index 00000000..0ad78891 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bootp.c @@ -0,0 +1,969 @@ +/* $Id: bootp.c $ */ +/** @file + * NAT - BOOTP/DHCP server emulation. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * QEMU BOOTP/DHCP server + * + * Copyright (c) 2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <slirp.h> +#include <libslirp.h> +#include <iprt/errcore.h> + +/** Entry in the table of known DHCP clients. */ +typedef struct +{ + uint32_t xid; + bool allocated; + uint8_t macaddr[ETH_ALEN]; + struct in_addr addr; + int number; +} BOOTPClient; +/** Number of DHCP clients supported by NAT. */ +#define NB_ADDR 16 + +#define bootp_clients ((BOOTPClient *)pData->pbootp_clients) + +/* XXX: only DHCP is supported */ +static const uint8_t rfc1533_cookie[4] = { RFC1533_COOKIE }; + +static void bootp_reply(PNATState pData, struct mbuf *m0, int offReply, uint16_t flags); + + +static uint8_t *dhcp_find_option(uint8_t *vendor, size_t vlen, uint8_t tag, ssize_t checklen) +{ + uint8_t *q = vendor; + size_t len = vlen; + + q += sizeof(rfc1533_cookie); + len -= sizeof(rfc1533_cookie); + + while (len > 0) + { + uint8_t *optptr = q; + uint8_t opt; + uint8_t optlen; + + opt = *q++; + --len; + + if (opt == RFC1533_END) + break; + + if (opt == RFC1533_PAD) + continue; + + if (len == 0) + break; /* no option length byte */ + + optlen = *q++; + --len; + + if (len < optlen) + break; /* option value truncated */ + + if (opt == tag) + { + if (checklen > 0 && optlen != checklen) + break; /* wrong option size */ + + return optptr; + } + + q += optlen; + len -= optlen; + } + + return NULL; +} + +static BOOTPClient *bc_alloc_client(PNATState pData) +{ + int i; + LogFlowFuncEnter(); + for (i = 0; i < NB_ADDR; i++) + { + if (!bootp_clients[i].allocated) + { + BOOTPClient *bc; + + bc = &bootp_clients[i]; + memset(bc, 0, sizeof(BOOTPClient)); + bc->allocated = 1; + bc->number = i; + LogFlowFunc(("LEAVE: bc:%d\n", bc->number)); + return bc; + } + } + LogFlowFunc(("LEAVE: NULL\n")); + return NULL; +} + +static BOOTPClient *get_new_addr(PNATState pData, struct in_addr *paddr) +{ + BOOTPClient *bc; + LogFlowFuncEnter(); + bc = bc_alloc_client(pData); + if (!bc) + return NULL; + + paddr->s_addr = RT_H2N_U32(RT_N2H_U32(pData->special_addr.s_addr) | (bc->number + START_ADDR)); + bc->addr.s_addr = paddr->s_addr; + LogFlowFunc(("LEAVE: paddr:%RTnaipv4, bc:%d\n", paddr->s_addr, bc->number)); + return bc; +} + +static int release_addr(PNATState pData, struct in_addr *paddr) +{ + unsigned i; + for (i = 0; i < NB_ADDR; i++) + { + if (paddr->s_addr == bootp_clients[i].addr.s_addr) + { + memset(&bootp_clients[i], 0, sizeof(BOOTPClient)); + return VINF_SUCCESS; + } + } + return VERR_NOT_FOUND; +} + +/* + * from RFC 2131 4.3.1 + * Field DHCPOFFER DHCPACK DHCPNAK + * ----- --------- ------- ------- + * 'op' BOOTREPLY BOOTREPLY BOOTREPLY + * 'htype' (From "Assigned Numbers" RFC) + * 'hlen' (Hardware address length in octets) + * 'hops' 0 0 0 + * 'xid' 'xid' from client 'xid' from client 'xid' from client + * DHCPDISCOVER DHCPREQUEST DHCPREQUEST + * message message message + * 'secs' 0 0 0 + * 'ciaddr' 0 'ciaddr' from 0 + * DHCPREQUEST or 0 + * 'yiaddr' IP address offered IP address 0 + * to client assigned to client + * 'siaddr' IP address of next IP address of next 0 + * bootstrap server bootstrap server + * 'flags' 'flags' from 'flags' from 'flags' from + * client DHCPDISCOVER client DHCPREQUEST client DHCPREQUEST + * message message message + * 'giaddr' 'giaddr' from 'giaddr' from 'giaddr' from + * client DHCPDISCOVER client DHCPREQUEST client DHCPREQUEST + * message message message + * 'chaddr' 'chaddr' from 'chaddr' from 'chaddr' from + * client DHCPDISCOVER client DHCPREQUEST client DHCPREQUEST + * message message message + * 'sname' Server host name Server host name (unused) + * or options or options + * 'file' Client boot file Client boot file (unused) + * name or options name or options + * 'options' options options + * + * Option DHCPOFFER DHCPACK DHCPNAK + * ------ --------- ------- ------- + * Requested IP address MUST NOT MUST NOT MUST NOT + * IP address lease time MUST MUST (DHCPREQUEST) MUST NOT + * MUST NOT (DHCPINFORM) + * Use 'file'/'sname' fields MAY MAY MUST NOT + * DHCP message type DHCPOFFER DHCPACK DHCPNAK + * Parameter request list MUST NOT MUST NOT MUST NOT + * Message SHOULD SHOULD SHOULD + * Client identifier MUST NOT MUST NOT MAY + * Vendor class identifier MAY MAY MAY + * Server identifier MUST MUST MUST + * Maximum message size MUST NOT MUST NOT MUST NOT + * All others MAY MAY MUST NOT + */ +static BOOTPClient *find_addr(PNATState pData, struct in_addr *paddr, const uint8_t *macaddr) +{ + int i; + + LogFlowFunc(("macaddr:%RTmac\n", macaddr)); + for (i = 0; i < NB_ADDR; i++) + { + if ( memcmp(macaddr, bootp_clients[i].macaddr, ETH_ALEN) == 0 + && bootp_clients[i].allocated != 0) + { + BOOTPClient *bc; + + bc = &bootp_clients[i]; + bc->allocated = 1; + paddr->s_addr = RT_H2N_U32(RT_N2H_U32(pData->special_addr.s_addr) | (i + START_ADDR)); + LogFlowFunc(("LEAVE: paddr:%RTnaipv4 bc:%d\n", paddr->s_addr, bc->number)); + return bc; + } + } + LogFlowFunc(("LEAVE: NULL\n")); + return NULL; +} + +static struct mbuf *dhcp_create_msg(PNATState pData, struct bootp_t *bp, struct mbuf *m, uint8_t type) +{ + struct bootp_t *rbp; + struct ethhdr *eh; + uint8_t *q; + + eh = mtod(m, struct ethhdr *); + memcpy(eh->h_source, bp->bp_hwaddr, ETH_ALEN); /* XXX: if_encap just swap source with dest */ + + m->m_data += if_maxlinkhdr; /*reserve ether header */ + + rbp = mtod(m, struct bootp_t *); + memset(rbp, 0, sizeof(struct bootp_t)); + rbp->bp_op = BOOTP_REPLY; + rbp->bp_xid = bp->bp_xid; /* see table 3 of rfc2131*/ + rbp->bp_flags = bp->bp_flags; /* figure 2 of rfc2131 */ + rbp->bp_giaddr.s_addr = bp->bp_giaddr.s_addr; +#if 0 /*check flags*/ + saddr.sin_port = RT_H2N_U16_C(BOOTP_SERVER); + daddr.sin_port = RT_H2N_U16_C(BOOTP_CLIENT); +#endif + rbp->bp_htype = 1; + rbp->bp_hlen = 6; + memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, 6); + + memcpy(rbp->bp_vend, rfc1533_cookie, 4); /* cookie */ + q = rbp->bp_vend; + q += 4; + *q++ = RFC2132_MSG_TYPE; + *q++ = 1; + *q++ = type; + + return m; +} + +static int dhcp_do_ack_offer(PNATState pData, struct mbuf *m, BOOTPClient *bc, int fDhcpRequest) +{ + struct bootp_t *rbp = NULL; + uint8_t *q; + struct in_addr saddr; + int val; + + struct dns_entry *de = NULL; + struct dns_domain_entry *dd = NULL; + int added = 0; + uint8_t *q_dns_header = NULL; + uint32_t lease_time = RT_H2N_U32_C(LEASE_TIME); + uint32_t netmask = RT_H2N_U32(pData->netmask); + + rbp = mtod(m, struct bootp_t *); + q = &rbp->bp_vend[0]; + q += 7; /* !cookie rfc 2132 + TYPE*/ + + /*DHCP Offer specific*/ + /* + * we're care in built-in tftp server about existence/validness of the boot file. + */ + if (bootp_filename) + RTStrPrintf((char*)rbp->bp_file, sizeof(rbp->bp_file), "%s", bootp_filename); + + Log(("NAT: DHCP: bp_file:%s\n", &rbp->bp_file)); + /* Address/port of the DHCP server. */ + rbp->bp_yiaddr = bc->addr; /* Client IP address */ + Log(("NAT: DHCP: bp_yiaddr:%RTnaipv4\n", rbp->bp_yiaddr.s_addr)); + rbp->bp_siaddr = pData->tftp_server; /* Next Server IP address, i.e. TFTP */ + Log(("NAT: DHCP: bp_siaddr:%RTnaipv4\n", rbp->bp_siaddr.s_addr)); + if (fDhcpRequest) + { + rbp->bp_ciaddr.s_addr = bc->addr.s_addr; /* Client IP address */ + } + saddr.s_addr = RT_H2N_U32(RT_N2H_U32(pData->special_addr.s_addr) | CTL_ALIAS); + Log(("NAT: DHCP: s_addr:%RTnaipv4\n", saddr.s_addr)); + +#define FILL_BOOTP_EXT(q, tag, len, pvalue) \ + do { \ + struct bootp_ext *be = (struct bootp_ext *)(q); \ + be->bpe_tag = (tag); \ + be->bpe_len = (len); \ + memcpy(&be[1], (pvalue), (len)); \ + (q) = (uint8_t *)(&be[1]) + (len); \ + }while(0) +/* appending another value to tag, calculates len of whole block*/ +#define FILL_BOOTP_APP(head, q, tag, len, pvalue) \ + do { \ + struct bootp_ext *be = (struct bootp_ext *)(head); \ + memcpy(q, (pvalue), (len)); \ + (q) += (len); \ + Assert(be->bpe_tag == (tag)); \ + be->bpe_len += (len); \ + }while(0) + + + FILL_BOOTP_EXT(q, RFC1533_NETMASK, 4, &netmask); + FILL_BOOTP_EXT(q, RFC1533_GATEWAY, 4, &saddr); + + if (pData->fUseDnsProxy || pData->fUseHostResolver) + { + uint32_t addr = RT_H2N_U32(RT_N2H_U32(pData->special_addr.s_addr) | CTL_DNS); + FILL_BOOTP_EXT(q, RFC1533_DNS, 4, &addr); + } + else if (!TAILQ_EMPTY(&pData->pDnsList)) + { + de = TAILQ_LAST(&pData->pDnsList, dns_list_head); + q_dns_header = q; + FILL_BOOTP_EXT(q, RFC1533_DNS, 4, &de->de_addr.s_addr); + + TAILQ_FOREACH_REVERSE(de, &pData->pDnsList, dns_list_head, de_list) + { + if (TAILQ_LAST(&pData->pDnsList, dns_list_head) == de) + continue; /* first value with head we've ingected before */ + FILL_BOOTP_APP(q_dns_header, q, RFC1533_DNS, 4, &de->de_addr.s_addr); + } + } + + if (pData->fPassDomain && !pData->fUseHostResolver) + { + LIST_FOREACH(dd, &pData->pDomainList, dd_list) + { + + if (dd->dd_pszDomain == NULL) + continue; + /* never meet valid separator here in RFC1533*/ + if (added != 0) + FILL_BOOTP_EXT(q, RFC1533_DOMAINNAME, 1, ","); + else + added = 1; + val = (int)strlen(dd->dd_pszDomain); + FILL_BOOTP_EXT(q, RFC1533_DOMAINNAME, val, dd->dd_pszDomain); + } + } + + FILL_BOOTP_EXT(q, RFC2132_LEASE_TIME, 4, &lease_time); + + if (*slirp_hostname) + { + val = (int)strlen(slirp_hostname); + FILL_BOOTP_EXT(q, RFC1533_HOSTNAME, val, slirp_hostname); + } + /* Temporary fix: do not pollute ARP cache from BOOTP because it may result + in network loss due to cache entry override w/ invalid MAC address. */ + /*slirp_arp_cache_update_or_add(pData, rbp->bp_yiaddr.s_addr, bc->macaddr);*/ + return q - rbp->bp_vend; /*return offset */ +} + +static int dhcp_send_nack(PNATState pData, struct bootp_t *bp, BOOTPClient *bc, struct mbuf *m) +{ + NOREF(bc); + + dhcp_create_msg(pData, bp, m, DHCPNAK); + return 7; +} + +static int dhcp_send_ack(PNATState pData, struct bootp_t *bp, BOOTPClient *bc, struct mbuf *m, int fDhcpRequest) +{ + int offReply = 0; /* boot_reply will fill general options and add END before sending response */ + + AssertReturn(bc != NULL, -1); + + dhcp_create_msg(pData, bp, m, DHCPACK); + slirp_update_guest_addr_guess(pData, bc->addr.s_addr, "DHCP ACK"); + offReply = dhcp_do_ack_offer(pData, m, bc, fDhcpRequest); + return offReply; +} + +static int dhcp_send_offer(PNATState pData, struct bootp_t *bp, BOOTPClient *bc, struct mbuf *m) +{ + int offReply = 0; /* boot_reply will fill general options and add END before sending response */ + + dhcp_create_msg(pData, bp, m, DHCPOFFER); + offReply = dhcp_do_ack_offer(pData, m, bc, /* fDhcpRequest=*/ 0); + return offReply; +} + +/** + * decoding client messages RFC2131 (4.3.6) + * --------------------------------------------------------------------- + * | |INIT-REBOOT |SELECTING |RENEWING |REBINDING | + * --------------------------------------------------------------------- + * |broad/unicast |broadcast |broadcast |unicast |broadcast | + * |server-ip |MUST NOT |MUST |MUST NOT |MUST NOT | + * |requested-ip |MUST |MUST |MUST NOT |MUST NOT | + * |ciaddr |zero |zero |IP address |IP address| + * --------------------------------------------------------------------- + * + */ + +enum DHCP_REQUEST_STATES +{ + INIT_REBOOT, + SELECTING, + RENEWING, + REBINDING, + NONE +}; + +static int dhcp_decode_request(PNATState pData, struct bootp_t *bp, size_t vlen, struct mbuf *m) +{ + BOOTPClient *bc = NULL; + struct in_addr daddr; + int offReply; + uint8_t *req_ip = NULL; + uint8_t *server_ip = NULL; + uint32_t ui32; + enum DHCP_REQUEST_STATES dhcp_stat = NONE; + + /* need to understand which type of request we get */ + req_ip = dhcp_find_option(bp->bp_vend, vlen, + RFC2132_REQ_ADDR, sizeof(struct in_addr)); + server_ip = dhcp_find_option(bp->bp_vend, vlen, + RFC2132_SRV_ID, sizeof(struct in_addr)); + + bc = find_addr(pData, &daddr, bp->bp_hwaddr); + + if (server_ip != NULL) + { + /* selecting */ + if (!bc) + { + LogRel(("NAT: DHCP no IP was allocated\n")); + return -1; + } + + if ( !req_ip + || bp->bp_ciaddr.s_addr != INADDR_ANY) + { + LogRel(("NAT: Invalid SELECTING request\n")); + return -1; /* silently ignored */ + } + dhcp_stat = SELECTING; + /* Assert((bp->bp_ciaddr.s_addr == INADDR_ANY)); */ + } + else + { + if (req_ip != NULL) + { + /* init-reboot */ + dhcp_stat = INIT_REBOOT; + } + else + { + /* table 4 of rfc2131 */ + if (bp->bp_flags & RT_H2N_U16_C(DHCP_FLAGS_B)) + dhcp_stat = REBINDING; + else + dhcp_stat = RENEWING; + } + } + + /*?? renewing ??*/ + switch (dhcp_stat) + { + case RENEWING: + /** + * decoding client messages RFC2131 (4.3.6) + * ------------------------------ + * | |RENEWING | + * ------------------------------ + * |broad/unicast |unicast | + * |server-ip |MUST NOT | + * |requested-ip |MUST NOT | + * |ciaddr |IP address | + * ------------------------------ + */ + if ( server_ip + || req_ip + || bp->bp_ciaddr.s_addr == INADDR_ANY) + { + LogRel(("NAT: Invalid RENEWING dhcp request\n")); + return -1; /* silent ignorance */ + } + if (bc != NULL) + { + /* Assert((bc->addr.s_addr == bp->bp_ciaddr.s_addr)); */ + /*if it already here well just do ack, we aren't aware of dhcp time expiration*/ + } + else + { + if ((bp->bp_ciaddr.s_addr & RT_H2N_U32(pData->netmask)) != pData->special_addr.s_addr) + { + LogRel(("NAT: Client %RTnaipv4 requested IP -- sending NAK\n", bp->bp_ciaddr.s_addr)); + offReply = dhcp_send_nack(pData, bp, bc, m); + return offReply; + } + + bc = bc_alloc_client(pData); + if (!bc) + { + LogRel(("NAT: Can't allocate address. RENEW has been silently ignored\n")); + return -1; + } + + memcpy(bc->macaddr, bp->bp_hwaddr, ETH_ALEN); + bc->addr.s_addr = bp->bp_ciaddr.s_addr; + } + break; + + case INIT_REBOOT: + /** + * decoding client messages RFC2131 (4.3.6) + * ------------------------------ + * | |INIT-REBOOT | + * ------------------------------ + * |broad/unicast |broadcast | + * |server-ip |MUST NOT | + * |requested-ip |MUST | + * |ciaddr |zero | + * ------------------------------ + * + */ + if ( server_ip + || !req_ip + || bp->bp_ciaddr.s_addr != INADDR_ANY) + { + LogRel(("NAT: Invalid INIT-REBOOT dhcp request\n")); + return -1; /* silently ignored */ + } + ui32 = *(uint32_t *)(req_ip + 2); + if ((ui32 & RT_H2N_U32(pData->netmask)) != pData->special_addr.s_addr) + { + LogRel(("NAT: Address %RTnaipv4 has been requested -- sending NAK\n", ui32)); + offReply = dhcp_send_nack(pData, bp, bc, m); + return offReply; + } + + /* find_addr() got some result? */ + if (!bc) + { + bc = bc_alloc_client(pData); + if (!bc) + { + LogRel(("NAT: Can't allocate address. RENEW has been silently ignored\n")); + return -1; + } + } + + memcpy(bc->macaddr, bp->bp_hwaddr, ETH_ALEN); + bc->addr.s_addr = ui32; + break; + + case NONE: + return -1; + + default: + break; + } + + if (bc == NULL) + return -1; + + LogRel(("NAT: DHCP offered IP address %RTnaipv4\n", bc->addr.s_addr)); + offReply = dhcp_send_ack(pData, bp, bc, m, /* fDhcpRequest=*/ 1); + return offReply; +} + +static int dhcp_decode_discover(PNATState pData, struct bootp_t *bp, int fDhcpDiscover, struct mbuf *m) +{ + BOOTPClient *bc; + struct in_addr daddr; + int offReply; + + if (fDhcpDiscover) + { + bc = find_addr(pData, &daddr, bp->bp_hwaddr); + if (!bc) + { + bc = get_new_addr(pData, &daddr); + if (!bc) + { + LogRel(("NAT: DHCP no IP address left\n")); + Log(("no address left\n")); + return -1; + } + memcpy(bc->macaddr, bp->bp_hwaddr, ETH_ALEN); + } + + bc->xid = bp->bp_xid; + LogRel(("NAT: DHCP offered IP address %RTnaipv4\n", bc->addr.s_addr)); + offReply = dhcp_send_offer(pData, bp, bc, m); + return offReply; + } + + bc = find_addr(pData, &daddr, bp->bp_hwaddr); + if (!bc) + { + LogRel(("NAT: DHCP Inform was ignored no boot client was found\n")); + return -1; + } + + LogRel(("NAT: DHCP offered IP address %RTnaipv4\n", bc->addr.s_addr)); + offReply = dhcp_send_ack(pData, bp, bc, m, /* fDhcpRequest=*/ 0); + return offReply; +} + +static int dhcp_decode_release(PNATState pData, struct bootp_t *bp) +{ + int rc = release_addr(pData, &bp->bp_ciaddr); + LogRel(("NAT: %s %RTnaipv4\n", + RT_SUCCESS(rc) ? "DHCP released IP address" : "Ignored DHCP release for IP address", + bp->bp_ciaddr.s_addr)); + return 0; +} + +/** + * fields for discovering t + * Field DHCPDISCOVER DHCPREQUEST DHCPDECLINE, + * DHCPINFORM DHCPRELEASE + * ----- ------------ ----------- ----------- + * 'op' BOOTREQUEST BOOTREQUEST BOOTREQUEST + * 'htype' (From "Assigned Numbers" RFC) + * 'hlen' (Hardware address length in octets) + * 'hops' 0 0 0 + * 'xid' selected by client 'xid' from server selected by + * DHCPOFFER message client + * 'secs' 0 or seconds since 0 or seconds since 0 + * DHCP process started DHCP process started + * 'flags' Set 'BROADCAST' Set 'BROADCAST' 0 + * flag if client flag if client + * requires broadcast requires broadcast + * reply reply + * 'ciaddr' 0 (DHCPDISCOVER) 0 or client's 0 (DHCPDECLINE) + * client's network address client's network + * network address (BOUND/RENEW/REBIND) address + * (DHCPINFORM) (DHCPRELEASE) + * 'yiaddr' 0 0 0 + * 'siaddr' 0 0 0 + * 'giaddr' 0 0 0 + * 'chaddr' client's hardware client's hardware client's hardware + * address address address + * 'sname' options, if options, if (unused) + * indicated in indicated in + * 'sname/file' 'sname/file' + * option; otherwise option; otherwise + * unused unused + * 'file' options, if options, if (unused) + * indicated in indicated in + * 'sname/file' 'sname/file' + * option; otherwise option; otherwise + * unused unused + * 'options' options options (unused) + * Requested IP address MAY MUST (in MUST + * (DISCOVER) SELECTING or (DHCPDECLINE), + * MUST NOT INIT-REBOOT) MUST NOT + * (INFORM) MUST NOT (in (DHCPRELEASE) + * BOUND or + * RENEWING) + * IP address lease time MAY MAY MUST NOT + * (DISCOVER) + * MUST NOT + * (INFORM) + * Use 'file'/'sname' fields MAY MAY MAY + * DHCP message type DHCPDISCOVER/ DHCPREQUEST DHCPDECLINE/ + * DHCPINFORM DHCPRELEASE + * Client identifier MAY MAY MAY + * Vendor class identifier MAY MAY MUST NOT + * Server identifier MUST NOT MUST (after MUST + * SELECTING) + * MUST NOT (after + * INIT-REBOOT, + * BOUND, RENEWING + * or REBINDING) + * Parameter request list MAY MAY MUST NOT + * Maximum message size MAY MAY MUST NOT + * Message SHOULD NOT SHOULD NOT SHOULD + * Site-specific MAY MAY MUST NOT + * All others MAY MAY MUST NOT + * + */ +static void dhcp_decode(PNATState pData, struct bootp_t *bp, size_t vlen) +{ + const uint8_t *pu8RawDhcpObject; + int rc; + struct in_addr req_ip; + int fDhcpDiscover = 0; + uint8_t *parameter_list = NULL; + struct mbuf *m = NULL; + + if (memcmp(bp->bp_vend, rfc1533_cookie, sizeof(rfc1533_cookie)) != 0) + return; + + pu8RawDhcpObject = dhcp_find_option(bp->bp_vend, vlen, RFC2132_MSG_TYPE, 1); + if (pu8RawDhcpObject == NULL) + return; + if (pu8RawDhcpObject[1] != 1) /* option length */ + return; + + /** + * We're going update dns list at least once per DHCP transaction (!not on every operation + * within transaction), assuming that transaction can't be longer than 1 min. + * + * @note: if we have notification update (HAVE_NOTIFICATION_FOR_DNS_UPDATE) + * provided by host, we don't need implicitly re-initialize dns list. + * + * @note: NATState::fUseHostResolver became (r89055) the flag signalling that Slirp + * wasn't able to fetch fresh host DNS info and fall down to use host-resolver, on one + * of the previous attempts to proxy dns requests to Host's name-resolving API + * + * @note: Checking NATState::fUseHostResolver == true, we want to try restore behaviour initialy + * wanted by user ASAP (P here when host serialize its configuration in files parsed by Slirp). + */ + if ( !HAVE_NOTIFICATION_FOR_DNS_UPDATE + && !pData->fUseHostResolverPermanent + && ( pData->dnsLastUpdate == 0 + || curtime - pData->dnsLastUpdate > 60 * 1000 /* one minute */ + || pData->fUseHostResolver)) + { + uint8_t i; + + parameter_list = dhcp_find_option(bp->bp_vend, vlen, RFC2132_PARAM_LIST, -1); + for (i = 0; parameter_list && i < parameter_list[1]; ++i) + { + if (parameter_list[2 + i] == RFC1533_DNS) + { + /* XXX: How differs it from host Suspend/Resume? */ + slirpReleaseDnsSettings(pData); + slirpInitializeDnsSettings(pData); + pData->dnsLastUpdate = curtime; + break; + } + } + } + + m = m_getcl(pData, M_DONTWAIT, MT_HEADER, M_PKTHDR); + if (!m) + { + LogRel(("NAT: Can't allocate memory for response!\n")); + return; + } + + switch (*(pu8RawDhcpObject + 2)) + { + case DHCPDISCOVER: + fDhcpDiscover = 1; + RT_FALL_THRU(); + case DHCPINFORM: + rc = dhcp_decode_discover(pData, bp, fDhcpDiscover, m); + if (rc > 0) + goto reply; + break; + + case DHCPREQUEST: + rc = dhcp_decode_request(pData, bp, vlen, m); + if (rc > 0) + goto reply; + break; + + case DHCPRELEASE: + dhcp_decode_release(pData, bp); + /* no reply required */ + break; + + case DHCPDECLINE: + pu8RawDhcpObject = dhcp_find_option(bp->bp_vend, vlen, + RFC2132_REQ_ADDR, sizeof(struct in_addr)); + if (pu8RawDhcpObject == NULL) + { + Log(("NAT: RFC2132_REQ_ADDR not found\n")); + break; + } + + req_ip.s_addr = *(uint32_t *)(pu8RawDhcpObject + 2); + rc = bootp_cache_lookup_ether_by_ip(pData, req_ip.s_addr, NULL); + if (RT_FAILURE(rc)) + { + /* Not registered */ + BOOTPClient *bc; + bc = bc_alloc_client(pData); + Assert(bc); + if (!bc) + { + LogRel(("NAT: Can't allocate bootp client object\n")); + break; + } + bc->addr.s_addr = req_ip.s_addr; + slirp_arp_who_has(pData, bc->addr.s_addr); + LogRel(("NAT: %RTnaipv4 has been already registered\n", req_ip)); + } + /* no response required */ + break; + + default: + /* unsupported DHCP message type */ + break; + } + /* silently ignore */ + m_freem(pData, m); + return; + +reply: + bootp_reply(pData, m, rc, bp->bp_flags); +} + +static void bootp_reply(PNATState pData, struct mbuf *m, int offReply, uint16_t flags) +{ + struct sockaddr_in saddr, daddr; + struct bootp_t *rbp = NULL; + uint8_t *q = NULL; + int nack; + rbp = mtod(m, struct bootp_t *); + Assert((m)); + Assert((rbp)); + q = rbp->bp_vend; + nack = (q[6] == DHCPNAK); + q += offReply; + + saddr.sin_addr.s_addr = RT_H2N_U32(RT_N2H_U32(pData->special_addr.s_addr) | CTL_ALIAS); + + FILL_BOOTP_EXT(q, RFC2132_SRV_ID, 4, &saddr.sin_addr); + + *q++ = RFC1533_END; /* end of message */ + + m->m_pkthdr.header = mtod(m, void *); + m->m_len = sizeof(struct bootp_t) + - sizeof(struct ip) + - sizeof(struct udphdr); + m->m_data += sizeof(struct udphdr) + + sizeof(struct ip); + if ( (flags & RT_H2N_U16_C(DHCP_FLAGS_B)) + || nack != 0) + daddr.sin_addr.s_addr = INADDR_BROADCAST; + else + daddr.sin_addr.s_addr = rbp->bp_yiaddr.s_addr; /*unicast requested by client*/ + saddr.sin_port = RT_H2N_U16_C(BOOTP_SERVER); + daddr.sin_port = RT_H2N_U16_C(BOOTP_CLIENT); + udp_output2(pData, NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); +} + +void bootp_input(PNATState pData, struct mbuf *m) +{ + struct bootp_t *bp = mtod(m, struct bootp_t *); + u_int mlen = m_length(m, NULL); + size_t vlen; + + if (mlen < RT_UOFFSETOF(struct bootp_t, bp_vend) + sizeof(rfc1533_cookie)) + { + LogRelMax(50, ("NAT: ignoring invalid BOOTP request (mlen %u too short)\n", mlen)); + return; + } + + if (bp->bp_op != BOOTP_REQUEST) + { + LogRelMax(50, ("NAT: ignoring invalid BOOTP request (wrong opcode %u)\n", bp->bp_op)); + return; + } + + if (bp->bp_htype != RTNET_ARP_ETHER) + { + LogRelMax(50, ("NAT: ignoring invalid BOOTP request (wrong HW type %u)\n", bp->bp_htype)); + return; + } + + if (bp->bp_hlen != ETH_ALEN) + { + LogRelMax(50, ("NAT: ignoring invalid BOOTP request (wrong HW address length %u)\n", bp->bp_hlen)); + return; + } + + if (bp->bp_hops != 0) + { + LogRelMax(50, ("NAT: ignoring invalid BOOTP request (wrong hop count %u)\n", bp->bp_hops)); + return; + } + + vlen = mlen - RT_UOFFSETOF(struct bootp_t, bp_vend); + dhcp_decode(pData, bp, vlen); +} + +int bootp_cache_lookup_ip_by_ether(PNATState pData,const uint8_t* ether, uint32_t *pip) +{ + int i; + + if (!ether || !pip) + return VERR_INVALID_PARAMETER; + + for (i = 0; i < NB_ADDR; i++) + { + if ( bootp_clients[i].allocated + && memcmp(bootp_clients[i].macaddr, ether, ETH_ALEN) == 0) + { + *pip = bootp_clients[i].addr.s_addr; + return VINF_SUCCESS; + } + } + + *pip = INADDR_ANY; + return VERR_NOT_FOUND; +} + +int bootp_cache_lookup_ether_by_ip(PNATState pData, uint32_t ip, uint8_t *ether) +{ + int i; + for (i = 0; i < NB_ADDR; i++) + { + if ( bootp_clients[i].allocated + && ip == bootp_clients[i].addr.s_addr) + { + if (ether != NULL) + memcpy(ether, bootp_clients[i].macaddr, ETH_ALEN); + return VINF_SUCCESS; + } + } + + return VERR_NOT_FOUND; +} + +/* + * Initialize dhcp server + * @returns 0 - if initialization is ok, non-zero otherwise + */ +int bootp_dhcp_init(PNATState pData) +{ + pData->pbootp_clients = RTMemAllocZ(sizeof(BOOTPClient) * NB_ADDR); + if (!pData->pbootp_clients) + return VERR_NO_MEMORY; + + return VINF_SUCCESS; +} + +int bootp_dhcp_fini(PNATState pData) +{ + if (pData->pbootp_clients != NULL) + RTMemFree(pData->pbootp_clients); + + return VINF_SUCCESS; +} diff --git a/src/VBox/Devices/Network/slirp/bootp.h b/src/VBox/Devices/Network/slirp/bootp.h new file mode 100644 index 00000000..9c8ec8cc --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bootp.h @@ -0,0 +1,158 @@ +/* $Id: bootp.h $ */ +/** @file + * NAT - BOOTP/DHCP server emulation (declarations/defines). + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* bootp/dhcp defines */ + +#define BOOTP_SERVER 67 +#define BOOTP_CLIENT 68 + +#define BOOTP_REQUEST 1 +#define BOOTP_REPLY 2 + +#define RFC1533_COOKIE 99, 130, 83, 99 +#define RFC1533_PAD 0 +#define RFC1533_NETMASK 1 +#define RFC1533_TIMEOFFSET 2 +#define RFC1533_GATEWAY 3 +#define RFC1533_TIMESERVER 4 +#define RFC1533_IEN116NS 5 +#define RFC1533_DNS 6 +#define RFC1533_LOGSERVER 7 +#define RFC1533_COOKIESERVER 8 +#define RFC1533_LPRSERVER 9 +#define RFC1533_IMPRESSSERVER 10 +#define RFC1533_RESOURCESERVER 11 +#define RFC1533_HOSTNAME 12 +#define RFC1533_BOOTFILESIZE 13 +#define RFC1533_MERITDUMPFILE 14 +#define RFC1533_DOMAINNAME 15 +#define RFC1533_SWAPSERVER 16 +#define RFC1533_ROOTPATH 17 +#define RFC1533_EXTENSIONPATH 18 +#define RFC1533_IPFORWARDING 19 +#define RFC1533_IPSOURCEROUTING 20 +#define RFC1533_IPPOLICYFILTER 21 +#define RFC1533_IPMAXREASSEMBLY 22 +#define RFC1533_IPTTL 23 +#define RFC1533_IPMTU 24 +#define RFC1533_IPMTUPLATEAU 25 +#define RFC1533_INTMTU 26 +#define RFC1533_INTLOCALSUBNETS 27 +#define RFC1533_INTBROADCAST 28 +#define RFC1533_INTICMPDISCOVER 29 +#define RFC1533_INTICMPRESPOND 30 +#define RFC1533_INTROUTEDISCOVER 31 +#define RFC1533_INTROUTESOLICIT 32 +#define RFC1533_INTSTATICROUTES 33 +#define RFC1533_LLTRAILERENCAP 34 +#define RFC1533_LLARPCACHETMO 35 +#define RFC1533_LLETHERNETENCAP 36 +#define RFC1533_TCPTTL 37 +#define RFC1533_TCPKEEPALIVETMO 38 +#define RFC1533_TCPKEEPALIVEGB 39 +#define RFC1533_NISDOMAIN 40 +#define RFC1533_NISSERVER 41 +#define RFC1533_NTPSERVER 42 +#define RFC1533_VENDOR 43 +#define RFC1533_NBNS 44 +#define RFC1533_NBDD 45 +#define RFC1533_NBNT 46 +#define RFC1533_NBSCOPE 47 +#define RFC1533_XFS 48 +#define RFC1533_XDM 49 + +#define RFC2132_REQ_ADDR 50 +#define RFC2132_LEASE_TIME 51 +#define RFC2132_MSG_TYPE 53 +#define RFC2132_SRV_ID 54 +#define RFC2132_PARAM_LIST 55 +#define RFC2132_MAX_SIZE 57 +#define RFC2132_RENEWAL_TIME 58 +#define RFC2132_REBIND_TIME 59 + +#define DHCPDISCOVER 1 +#define DHCPOFFER 2 +#define DHCPREQUEST 3 +#define DHCPDECLINE 4 +#define DHCPACK 5 +#define DHCPNAK 6 +#define DHCPRELEASE 7 +#define DHCPINFORM 8 + +#define RFC1533_VENDOR_MAJOR 0 +#define RFC1533_VENDOR_MINOR 0 + +#define RFC1533_VENDOR_MAGIC 128 +#define RFC1533_VENDOR_ADDPARM 129 +#define RFC1533_VENDOR_ETHDEV 130 +#define RFC1533_VENDOR_HOWTO 132 +#define RFC1533_VENDOR_MNUOPTS 160 +#define RFC1533_VENDOR_SELECTION 176 +#define RFC1533_VENDOR_MOTD 184 +#define RFC1533_VENDOR_NUMOFMOTD 8 +#define RFC1533_VENDOR_IMG 192 +#define RFC1533_VENDOR_NUMOFIMG 16 + +#define RFC1533_END 255 +#define BOOTP_VENDOR_LEN 64 +#define DHCP_OPT_LEN 312 + +/* RFC 2131 */ +struct bootp_t +{ + struct ip ip; /**< header: IP header */ + struct udphdr udp; /**< header: UDP header */ + uint8_t bp_op; /**< opcode (BOOTP_REQUEST, BOOTP_REPLY) */ + uint8_t bp_htype; /**< hardware type */ + uint8_t bp_hlen; /**< hardware address length */ + uint8_t bp_hops; /**< hop count */ + uint32_t bp_xid; /**< transaction ID */ + uint16_t bp_secs; /**< numnber of seconds */ + uint16_t bp_flags; /**< flags (DHCP_FLAGS_B) */ + struct in_addr bp_ciaddr; /**< client IP address */ + struct in_addr bp_yiaddr; /**< your IP address */ + struct in_addr bp_siaddr; /**< server IP address */ + struct in_addr bp_giaddr; /**< gateway IP address */ + uint8_t bp_hwaddr[16]; /** client hardware address */ + uint8_t bp_sname[64]; /** server host name */ + uint8_t bp_file[128]; /** boot filename */ + uint8_t bp_vend[DHCP_OPT_LEN]; /**< vendor specific info */ +}; + + +#define DHCP_FLAGS_B (1<<15) /**< B, broadcast */ +struct bootp_ext +{ + uint8_t bpe_tag; + uint8_t bpe_len; +}; + +void bootp_input(PNATState, struct mbuf *m); +int bootp_cache_lookup_ip_by_ether(PNATState, const uint8_t *, uint32_t *); +int bootp_cache_lookup_ether_by_ip(PNATState, uint32_t, uint8_t *); +int bootp_dhcp_init(PNATState); +int bootp_dhcp_fini(PNATState); diff --git a/src/VBox/Devices/Network/slirp/bsd/Makefile.kup b/src/VBox/Devices/Network/slirp/bsd/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/Makefile.kup diff --git a/src/VBox/Devices/Network/slirp/bsd/amd64/Makefile.kup b/src/VBox/Devices/Network/slirp/bsd/amd64/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/amd64/Makefile.kup diff --git a/src/VBox/Devices/Network/slirp/bsd/amd64/in_cksum.c b/src/VBox/Devices/Network/slirp/bsd/amd64/in_cksum.c new file mode 100644 index 00000000..351003e3 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/amd64/in_cksum.c @@ -0,0 +1,242 @@ +/* $NetBSD: in_cksum.c,v 1.7 1997/09/02 13:18:15 thorpej Exp $ */ + +/*- + * Copyright (c) 1988, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 1996 + * Matt Thomas <matt@3am-software.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 + */ + +#ifndef VBOX +#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ +__FBSDID("$FreeBSD: src/sys/amd64/amd64/in_cksum.c,v 1.5.20.1 2009/04/15 03:14:26 kensmith Exp $"); + +#include <sys/param.h> +#include <sys/mbuf.h> +#include <sys/systm.h> +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <machine/in_cksum.h> +#else +# include "in_cksum.h" +# include "slirp.h" +#endif + +/* + * Checksum routine for Internet Protocol family headers + * (Portable Alpha version). + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + */ + +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) +#define REDUCE32 \ + { \ + q_util.q = sum; \ + sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ + } +#define REDUCE16 \ + { \ + q_util.q = sum; \ + l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ + sum = l_util.s[0] + l_util.s[1]; \ + ADDCARRY(sum); \ + } + +static const u_int32_t in_masks[] = { + /*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/ + 0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF, /* offset 0 */ + 0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00, /* offset 1 */ + 0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000, /* offset 2 */ + 0x00000000, 0xFF000000, 0xFF000000, 0xFF000000, /* offset 3 */ +}; + +union l_util { + u_int16_t s[2]; + u_int32_t l; +}; +union q_util { + u_int16_t s[4]; + u_int32_t l[2]; + u_int64_t q; +}; + +static u_int64_t +in_cksumdata(const void *buf, int len) +{ + const u_int32_t *lw = (const u_int32_t *) buf; + u_int64_t sum = 0; + u_int64_t prefilled; + int offset; + union q_util q_util; + + if ((3 & (intptr_t) lw) == 0 && len == 20) { + sum = (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4]; + REDUCE32; + return sum; + } + + if ((offset = 3 & (intptr_t) lw) != 0) { + const u_int32_t *masks = in_masks + (offset << 2); + lw = (u_int32_t *) (((RTHCUINTPTR) lw) - offset); + sum = *lw++ & masks[len >= 3 ? 3 : len]; + len -= 4 - offset; + if (len <= 0) { + REDUCE32; + return sum; + } + } +#if 0 + /* + * Force to cache line boundary. + */ + offset = 32 - (0x1f & (long) lw); + if (offset < 32 && len > offset) { + len -= offset; + if (4 & offset) { + sum += (u_int64_t) lw[0]; + lw += 1; + } + if (8 & offset) { + sum += (u_int64_t) lw[0] + lw[1]; + lw += 2; + } + if (16 & offset) { + sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3]; + lw += 4; + } + } +#endif + /* + * access prefilling to start load of next cache line. + * then add current cache line + * save result of prefilling for loop iteration. + */ + prefilled = lw[0]; + while ((len -= 32) >= 4) { + u_int64_t prefilling = lw[8]; + sum += prefilled + lw[1] + lw[2] + lw[3] + + lw[4] + lw[5] + lw[6] + lw[7]; + lw += 8; + prefilled = prefilling; + } + if (len >= 0) { + sum += prefilled + lw[1] + lw[2] + lw[3] + + lw[4] + lw[5] + lw[6] + lw[7]; + lw += 8; + } else { + len += 32; + } + while ((len -= 16) >= 0) { + sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3]; + lw += 4; + } + len += 16; + while ((len -= 4) >= 0) { + sum += (u_int64_t) *lw++; + } + len += 4; + if (len > 0) + sum += (u_int64_t) (in_masks[len] & *lw); + REDUCE32; + return sum; +} + +u_short +in_addword(u_short a, u_short b) +{ + u_int64_t sum = a + b; + + ADDCARRY(sum); + return (sum); +} + +u_short +in_pseudo(u_int32_t a, u_int32_t b, u_int32_t c) +{ + u_int64_t sum; + union q_util q_util; + union l_util l_util; + + sum = (u_int64_t) a + b + c; + REDUCE16; + return (sum); +} + +u_short +in_cksum_skip(struct mbuf *m, int len, int skip) +{ + u_int64_t sum = 0; + int mlen = 0; + int clen = 0; + caddr_t addr; + union q_util q_util; + union l_util l_util; + + len -= skip; + for (; skip && m; m = m->m_next) { + if (m->m_len > skip) { + mlen = m->m_len - skip; + addr = mtod(m, caddr_t) + skip; + goto skip_start; + } else { + skip -= m->m_len; + } + } + + for (; m && len; m = m->m_next) { + if (m->m_len == 0) + continue; + mlen = m->m_len; + addr = mtod(m, caddr_t); +skip_start: + if (len < mlen) + mlen = len; + if ((clen ^ (intptr_t) addr) & 1) + sum += in_cksumdata(addr, mlen) << 8; + else + sum += in_cksumdata(addr, mlen); + + clen += mlen; + len -= mlen; + } + REDUCE16; + return (~sum & 0xffff); +} + +u_int in_cksum_hdr(const struct ip *ip) +{ + u_int64_t sum = in_cksumdata(ip, sizeof(struct ip)); + union q_util q_util; + union l_util l_util; + REDUCE16; + return (~sum & 0xffff); +} diff --git a/src/VBox/Devices/Network/slirp/bsd/amd64/include/in_cksum.h b/src/VBox/Devices/Network/slirp/bsd/amd64/include/in_cksum.h new file mode 100644 index 00000000..47a4565e --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/amd64/include/in_cksum.h @@ -0,0 +1,84 @@ +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from tahoe: in_cksum.c 1.2 86/01/05 + * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91 + * from: Id: in_cksum.c,v 1.8 1995/12/03 18:35:19 bde Exp + * $FreeBSD: src/sys/amd64/include/in_cksum.h,v 1.5.20.1 2009/04/15 03:14:26 kensmith Exp $ + */ + +#ifndef _MACHINE_IN_CKSUM_H_ +#define _MACHINE_IN_CKSUM_H_ 1 + +#ifndef VBOX +#ifndef _SYS_CDEFS_H_ +#error this file needs sys/cdefs.h as a prerequisite +#endif + +#include <sys/cdefs.h> +#else +# include "slirp.h" +#endif + +#define in_cksum(m, len) in_cksum_skip(m, len, 0) + +/* + * It it useful to have an Internet checksum routine which is inlineable + * and optimized specifically for the task of computing IP header checksums + * in the normal case (where there are no options and the header length is + * therefore always exactly five 32-bit words. + */ +#ifdef __CC_SUPPORTS___INLINE + +static __inline void +in_cksum_update(struct ip *ip) +{ + int __tmpsum; + __tmpsum = (int)ntohs(ip->ip_sum) + 256; + ip->ip_sum = htons(__tmpsum + (__tmpsum >> 16)); +} + +#else + +#define in_cksum_update(ip) \ + do { \ + int __tmpsum; \ + __tmpsum = (int)ntohs(ip->ip_sum) + 256; \ + ip->ip_sum = htons(__tmpsum + (__tmpsum >> 16)); \ + } while(0) + +#endif + +#if defined(_KERNEL) || defined(VBOX) +u_int in_cksum_hdr(const struct ip *ip); +u_short in_addword(u_short sum, u_short b); +u_short in_pseudo(u_int sum, u_int b, u_int c); +u_short in_cksum_skip(struct mbuf *m, int len, int skip); +#endif + +#endif /* _MACHINE_IN_CKSUM_H_ */ diff --git a/src/VBox/Devices/Network/slirp/bsd/arm64/Makefile.kup b/src/VBox/Devices/Network/slirp/bsd/arm64/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/arm64/Makefile.kup diff --git a/src/VBox/Devices/Network/slirp/bsd/arm64/in_cksum.c b/src/VBox/Devices/Network/slirp/bsd/arm64/in_cksum.c new file mode 100644 index 00000000..351003e3 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/arm64/in_cksum.c @@ -0,0 +1,242 @@ +/* $NetBSD: in_cksum.c,v 1.7 1997/09/02 13:18:15 thorpej Exp $ */ + +/*- + * Copyright (c) 1988, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 1996 + * Matt Thomas <matt@3am-software.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 + */ + +#ifndef VBOX +#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ +__FBSDID("$FreeBSD: src/sys/amd64/amd64/in_cksum.c,v 1.5.20.1 2009/04/15 03:14:26 kensmith Exp $"); + +#include <sys/param.h> +#include <sys/mbuf.h> +#include <sys/systm.h> +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <machine/in_cksum.h> +#else +# include "in_cksum.h" +# include "slirp.h" +#endif + +/* + * Checksum routine for Internet Protocol family headers + * (Portable Alpha version). + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + */ + +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) +#define REDUCE32 \ + { \ + q_util.q = sum; \ + sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ + } +#define REDUCE16 \ + { \ + q_util.q = sum; \ + l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ + sum = l_util.s[0] + l_util.s[1]; \ + ADDCARRY(sum); \ + } + +static const u_int32_t in_masks[] = { + /*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/ + 0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF, /* offset 0 */ + 0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00, /* offset 1 */ + 0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000, /* offset 2 */ + 0x00000000, 0xFF000000, 0xFF000000, 0xFF000000, /* offset 3 */ +}; + +union l_util { + u_int16_t s[2]; + u_int32_t l; +}; +union q_util { + u_int16_t s[4]; + u_int32_t l[2]; + u_int64_t q; +}; + +static u_int64_t +in_cksumdata(const void *buf, int len) +{ + const u_int32_t *lw = (const u_int32_t *) buf; + u_int64_t sum = 0; + u_int64_t prefilled; + int offset; + union q_util q_util; + + if ((3 & (intptr_t) lw) == 0 && len == 20) { + sum = (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4]; + REDUCE32; + return sum; + } + + if ((offset = 3 & (intptr_t) lw) != 0) { + const u_int32_t *masks = in_masks + (offset << 2); + lw = (u_int32_t *) (((RTHCUINTPTR) lw) - offset); + sum = *lw++ & masks[len >= 3 ? 3 : len]; + len -= 4 - offset; + if (len <= 0) { + REDUCE32; + return sum; + } + } +#if 0 + /* + * Force to cache line boundary. + */ + offset = 32 - (0x1f & (long) lw); + if (offset < 32 && len > offset) { + len -= offset; + if (4 & offset) { + sum += (u_int64_t) lw[0]; + lw += 1; + } + if (8 & offset) { + sum += (u_int64_t) lw[0] + lw[1]; + lw += 2; + } + if (16 & offset) { + sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3]; + lw += 4; + } + } +#endif + /* + * access prefilling to start load of next cache line. + * then add current cache line + * save result of prefilling for loop iteration. + */ + prefilled = lw[0]; + while ((len -= 32) >= 4) { + u_int64_t prefilling = lw[8]; + sum += prefilled + lw[1] + lw[2] + lw[3] + + lw[4] + lw[5] + lw[6] + lw[7]; + lw += 8; + prefilled = prefilling; + } + if (len >= 0) { + sum += prefilled + lw[1] + lw[2] + lw[3] + + lw[4] + lw[5] + lw[6] + lw[7]; + lw += 8; + } else { + len += 32; + } + while ((len -= 16) >= 0) { + sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3]; + lw += 4; + } + len += 16; + while ((len -= 4) >= 0) { + sum += (u_int64_t) *lw++; + } + len += 4; + if (len > 0) + sum += (u_int64_t) (in_masks[len] & *lw); + REDUCE32; + return sum; +} + +u_short +in_addword(u_short a, u_short b) +{ + u_int64_t sum = a + b; + + ADDCARRY(sum); + return (sum); +} + +u_short +in_pseudo(u_int32_t a, u_int32_t b, u_int32_t c) +{ + u_int64_t sum; + union q_util q_util; + union l_util l_util; + + sum = (u_int64_t) a + b + c; + REDUCE16; + return (sum); +} + +u_short +in_cksum_skip(struct mbuf *m, int len, int skip) +{ + u_int64_t sum = 0; + int mlen = 0; + int clen = 0; + caddr_t addr; + union q_util q_util; + union l_util l_util; + + len -= skip; + for (; skip && m; m = m->m_next) { + if (m->m_len > skip) { + mlen = m->m_len - skip; + addr = mtod(m, caddr_t) + skip; + goto skip_start; + } else { + skip -= m->m_len; + } + } + + for (; m && len; m = m->m_next) { + if (m->m_len == 0) + continue; + mlen = m->m_len; + addr = mtod(m, caddr_t); +skip_start: + if (len < mlen) + mlen = len; + if ((clen ^ (intptr_t) addr) & 1) + sum += in_cksumdata(addr, mlen) << 8; + else + sum += in_cksumdata(addr, mlen); + + clen += mlen; + len -= mlen; + } + REDUCE16; + return (~sum & 0xffff); +} + +u_int in_cksum_hdr(const struct ip *ip) +{ + u_int64_t sum = in_cksumdata(ip, sizeof(struct ip)); + union q_util q_util; + union l_util l_util; + REDUCE16; + return (~sum & 0xffff); +} diff --git a/src/VBox/Devices/Network/slirp/bsd/arm64/include/in_cksum.h b/src/VBox/Devices/Network/slirp/bsd/arm64/include/in_cksum.h new file mode 100644 index 00000000..47a4565e --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/arm64/include/in_cksum.h @@ -0,0 +1,84 @@ +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from tahoe: in_cksum.c 1.2 86/01/05 + * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91 + * from: Id: in_cksum.c,v 1.8 1995/12/03 18:35:19 bde Exp + * $FreeBSD: src/sys/amd64/include/in_cksum.h,v 1.5.20.1 2009/04/15 03:14:26 kensmith Exp $ + */ + +#ifndef _MACHINE_IN_CKSUM_H_ +#define _MACHINE_IN_CKSUM_H_ 1 + +#ifndef VBOX +#ifndef _SYS_CDEFS_H_ +#error this file needs sys/cdefs.h as a prerequisite +#endif + +#include <sys/cdefs.h> +#else +# include "slirp.h" +#endif + +#define in_cksum(m, len) in_cksum_skip(m, len, 0) + +/* + * It it useful to have an Internet checksum routine which is inlineable + * and optimized specifically for the task of computing IP header checksums + * in the normal case (where there are no options and the header length is + * therefore always exactly five 32-bit words. + */ +#ifdef __CC_SUPPORTS___INLINE + +static __inline void +in_cksum_update(struct ip *ip) +{ + int __tmpsum; + __tmpsum = (int)ntohs(ip->ip_sum) + 256; + ip->ip_sum = htons(__tmpsum + (__tmpsum >> 16)); +} + +#else + +#define in_cksum_update(ip) \ + do { \ + int __tmpsum; \ + __tmpsum = (int)ntohs(ip->ip_sum) + 256; \ + ip->ip_sum = htons(__tmpsum + (__tmpsum >> 16)); \ + } while(0) + +#endif + +#if defined(_KERNEL) || defined(VBOX) +u_int in_cksum_hdr(const struct ip *ip); +u_short in_addword(u_short sum, u_short b); +u_short in_pseudo(u_int sum, u_int b, u_int c); +u_short in_cksum_skip(struct mbuf *m, int len, int skip); +#endif + +#endif /* _MACHINE_IN_CKSUM_H_ */ diff --git a/src/VBox/Devices/Network/slirp/bsd/i386/Makefile.kup b/src/VBox/Devices/Network/slirp/bsd/i386/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/i386/Makefile.kup diff --git a/src/VBox/Devices/Network/slirp/bsd/i386/in_cksum.c b/src/VBox/Devices/Network/slirp/bsd/i386/in_cksum.c new file mode 100644 index 00000000..2be70636 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/i386/in_cksum.c @@ -0,0 +1,499 @@ +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from tahoe: in_cksum.c 1.2 86/01/05 + * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91 + */ + +#ifndef VBOX +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/i386/i386/in_cksum.c,v 1.28.10.1.6.1 2009/04/15 03:14:26 kensmith Exp $"); + +/* + * MPsafe: alfred + */ +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/mbuf.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> + +#include <machine/in_cksum.h> +#else +# include "in_cksum.h" +# include "slirp.h" +#endif + +/* + * Checksum routine for Internet Protocol family headers. + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + * + * This implementation is 386 version. + */ + +#undef ADDCARRY +#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff +/* + * icc needs to be special cased here, as the asm code below results + * in broken code if compiled with icc. + */ +#if !defined(__GNUCLIKE_ASM) || defined(__INTEL_COMPILER) +/* non gcc parts stolen from sys/alpha/alpha/in_cksum.c */ +#define REDUCE32 \ + { \ + q_util.q = sum; \ + sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ + } +#define REDUCE16 \ + { \ + q_util.q = sum; \ + l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ + sum = l_util.s[0] + l_util.s[1]; \ + ADDCARRY(sum); \ + } +#endif +#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);} + +#if !defined(__GNUCLIKE_ASM) || defined(__INTEL_COMPILER) +static const u_int32_t in_masks[] = { + /*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/ + 0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF, /* offset 0 */ + 0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00, /* offset 1 */ + 0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000, /* offset 2 */ + 0x00000000, 0xFF000000, 0xFF000000, 0xFF000000, /* offset 3 */ +}; + +union l_util { + u_int16_t s[2]; + u_int32_t l; +}; +union q_util { + u_int16_t s[4]; + u_int32_t l[2]; + u_int64_t q; +}; + +static u_int64_t +in_cksumdata(const u_int32_t *lw, int len) +{ + u_int64_t sum = 0; + u_int64_t prefilled; + int offset; + union q_util q_util; + + if ((3 & (long) lw) == 0 && len == 20) { + sum = (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4]; + REDUCE32; + return sum; + } + + if ((offset = 3 & (long) lw) != 0) { + const u_int32_t *masks = in_masks + (offset << 2); + lw = (u_int32_t *) (((RTHCUINTPTR) lw) - offset); + sum = *lw++ & masks[len >= 3 ? 3 : len]; + len -= 4 - offset; + if (len <= 0) { + REDUCE32; + return sum; + } + } +#if 0 + /* + * Force to cache line boundary. + */ + offset = 32 - (0x1f & (long) lw); + if (offset < 32 && len > offset) { + len -= offset; + if (4 & offset) { + sum += (u_int64_t) lw[0]; + lw += 1; + } + if (8 & offset) { + sum += (u_int64_t) lw[0] + lw[1]; + lw += 2; + } + if (16 & offset) { + sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3]; + lw += 4; + } + } +#endif + /* + * access prefilling to start load of next cache line. + * then add current cache line + * save result of prefilling for loop iteration. + */ + prefilled = lw[0]; + while ((len -= 32) >= 4) { + u_int64_t prefilling = lw[8]; + sum += prefilled + lw[1] + lw[2] + lw[3] + + lw[4] + lw[5] + lw[6] + lw[7]; + lw += 8; + prefilled = prefilling; + } + if (len >= 0) { + sum += prefilled + lw[1] + lw[2] + lw[3] + + lw[4] + lw[5] + lw[6] + lw[7]; + lw += 8; + } else { + len += 32; + } + while ((len -= 16) >= 0) { + sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3]; + lw += 4; + } + len += 16; + while ((len -= 4) >= 0) { + sum += (u_int64_t) *lw++; + } + len += 4; + if (len > 0) + sum += (u_int64_t) (in_masks[len] & *lw); + REDUCE32; + return sum; +} + +u_short +in_addword(u_short a, u_short b) +{ + u_int64_t sum = a + b; + + ADDCARRY(sum); + return (sum); +} + +u_short +in_pseudo(u_int32_t a, u_int32_t b, u_int32_t c) +{ + u_int64_t sum; + union q_util q_util; + union l_util l_util; + + sum = (u_int64_t) a + b + c; + REDUCE16; + return (sum); +} + +u_short +in_cksum_skip(struct mbuf *m, int len, int skip) +{ + u_int64_t sum = 0; + int mlen = 0; + int clen = 0; + caddr_t addr; + union q_util q_util; + union l_util l_util; + + len -= skip; + for (; skip && m; m = m->m_next) { + if (m->m_len > skip) { + mlen = m->m_len - skip; + addr = mtod(m, caddr_t) + skip; + goto skip_start; + } else { + skip -= m->m_len; + } + } + + for (; m && len; m = m->m_next) { + if (m->m_len == 0) + continue; + mlen = m->m_len; + addr = mtod(m, caddr_t); +skip_start: + if (len < mlen) + mlen = len; + if ((clen ^ (long) addr) & 1) + sum += in_cksumdata((const u_int32_t *)addr, mlen) << 8; + else + sum += in_cksumdata((const u_int32_t *)addr, mlen); + + clen += mlen; + len -= mlen; + } + REDUCE16; + return (~sum & 0xffff); +} + +u_int in_cksum_hdr(const struct ip *ip) +{ + u_int64_t sum = in_cksumdata((const u_int32_t *)ip, sizeof(struct ip)); + union q_util q_util; + union l_util l_util; + + REDUCE16; + return (~sum & 0xffff); +} +#else + +/* + * These asm statements require __volatile because they pass information + * via the condition codes. GCC does not currently provide a way to specify + * the condition codes as an input or output operand. + * + * The LOAD macro below is effectively a prefetch into cache. GCC will + * load the value into a register but will not use it. Since modern CPUs + * reorder operations, this will generally take place in parallel with + * other calculations. + */ +u_short +in_cksum_skip(m, len, skip) + struct mbuf *m; + int len; + int skip; +{ + register u_short *w; + register unsigned sum = 0; + register int mlen = 0; + int byte_swapped = 0; + union { char c[2]; u_short s; } su; + + len -= skip; + for (; skip && m; m = m->m_next) { + if (m->m_len > skip) { + mlen = m->m_len - skip; + w = (u_short *)(mtod(m, u_char *) + skip); + goto skip_start; + } else { + skip -= m->m_len; + } + } + + for (;m && len; m = m->m_next) { + if (m->m_len == 0) + continue; + w = mtod(m, u_short *); + if (mlen == -1) { + /* + * The first byte of this mbuf is the continuation + * of a word spanning between this mbuf and the + * last mbuf. + */ + + /* su.c[0] is already saved when scanning previous + * mbuf. sum was REDUCEd when we found mlen == -1 + */ + su.c[1] = *(u_char *)w; + sum += su.s; + w = (u_short *)((char *)w + 1); + mlen = m->m_len - 1; + len--; + } else + mlen = m->m_len; +skip_start: + if (len < mlen) + mlen = len; + len -= mlen; + /* + * Force to long boundary so we do longword aligned + * memory operations + */ + if (3 & (int) w) { + REDUCE; + if ((1 & (int) w) && (mlen > 0)) { + sum <<= 8; + su.c[0] = *(char *)w; + w = (u_short *)((char *)w + 1); + mlen--; + byte_swapped = 1; + } + if ((2 & (int) w) && (mlen >= 2)) { + sum += *w++; + mlen -= 2; + } + } + /* + * Advance to a 486 cache line boundary. + */ + if (4 & (int) w && mlen >= 4) { + __asm __volatile ( + "addl %1, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (((const u_int32_t *)w)[0]) + ); + w += 2; + mlen -= 4; + } + if (8 & (int) w && mlen >= 8) { + __asm __volatile ( + "addl %1, %0\n" + "adcl %2, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (((const u_int32_t *)w)[0]), + "g" (((const u_int32_t *)w)[1]) + ); + w += 4; + mlen -= 8; + } + /* + * Do as much of the checksum as possible 32 bits at at time. + * In fact, this loop is unrolled to make overhead from + * branches &c small. + */ + mlen -= 1; + while ((mlen -= 32) >= 0) { + /* + * Add with carry 16 words and fold in the last + * carry by adding a 0 with carry. + * + * The early ADD(16) and the LOAD(32) are to load + * the next 2 cache lines in advance on 486's. The + * 486 has a penalty of 2 clock cycles for loading + * a cache line, plus whatever time the external + * memory takes to load the first word(s) addressed. + * These penalties are unavoidable. Subsequent + * accesses to a cache line being loaded (and to + * other external memory?) are delayed until the + * whole load finishes. These penalties are mostly + * avoided by not accessing external memory for + * 8 cycles after the ADD(16) and 12 cycles after + * the LOAD(32). The loop terminates when mlen + * is initially 33 (not 32) to guaranteed that + * the LOAD(32) is within bounds. + */ + __asm __volatile ( + "addl %1, %0\n" + "adcl %2, %0\n" + "adcl %3, %0\n" + "adcl %4, %0\n" + "adcl %5, %0\n" + "mov %6, %%eax\n" + "adcl %7, %0\n" + "adcl %8, %0\n" + "adcl %9, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (((const u_int32_t *)w)[4]), + "g" (((const u_int32_t *)w)[0]), + "g" (((const u_int32_t *)w)[1]), + "g" (((const u_int32_t *)w)[2]), + "g" (((const u_int32_t *)w)[3]), + "g" (((const u_int32_t *)w)[8]), + "g" (((const u_int32_t *)w)[5]), + "g" (((const u_int32_t *)w)[6]), + "g" (((const u_int32_t *)w)[7]) + : "eax" + ); + w += 16; + } + mlen += 32 + 1; + if (mlen >= 32) { + __asm __volatile ( + "addl %1, %0\n" + "adcl %2, %0\n" + "adcl %3, %0\n" + "adcl %4, %0\n" + "adcl %5, %0\n" + "adcl %6, %0\n" + "adcl %7, %0\n" + "adcl %8, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (((const u_int32_t *)w)[4]), + "g" (((const u_int32_t *)w)[0]), + "g" (((const u_int32_t *)w)[1]), + "g" (((const u_int32_t *)w)[2]), + "g" (((const u_int32_t *)w)[3]), + "g" (((const u_int32_t *)w)[5]), + "g" (((const u_int32_t *)w)[6]), + "g" (((const u_int32_t *)w)[7]) + ); + w += 16; + mlen -= 32; + } + if (mlen >= 16) { + __asm __volatile ( + "addl %1, %0\n" + "adcl %2, %0\n" + "adcl %3, %0\n" + "adcl %4, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (((const u_int32_t *)w)[0]), + "g" (((const u_int32_t *)w)[1]), + "g" (((const u_int32_t *)w)[2]), + "g" (((const u_int32_t *)w)[3]) + ); + w += 8; + mlen -= 16; + } + if (mlen >= 8) { + __asm __volatile ( + "addl %1, %0\n" + "adcl %2, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (((const u_int32_t *)w)[0]), + "g" (((const u_int32_t *)w)[1]) + ); + w += 4; + mlen -= 8; + } + if (mlen == 0 && byte_swapped == 0) + continue; /* worth 1% maybe ?? */ + REDUCE; + while ((mlen -= 2) >= 0) { + sum += *w++; + } + if (byte_swapped) { + sum <<= 8; + byte_swapped = 0; + if (mlen == -1) { + su.c[1] = *(char *)w; + sum += su.s; + mlen = 0; + } else + mlen = -1; + } else if (mlen == -1) + /* + * This mbuf has odd number of bytes. + * There could be a word split betwen + * this mbuf and the next mbuf. + * Save the last byte (to prepend to next mbuf). + */ + su.c[0] = *(char *)w; + } + + if (len) + printf("%s: out of data by %d\n", __func__, len); + if (mlen == -1) { + /* The last mbuf has odd # of bytes. Follow the + standard (the odd byte is shifted left by 8 bits) */ + su.c[1] = 0; + sum += su.s; + } + REDUCE; + return (~sum & 0xffff); +} +#endif diff --git a/src/VBox/Devices/Network/slirp/bsd/i386/include/in_cksum.h b/src/VBox/Devices/Network/slirp/bsd/i386/include/in_cksum.h new file mode 100644 index 00000000..0d20d193 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/i386/include/in_cksum.h @@ -0,0 +1,145 @@ +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from tahoe: in_cksum.c 1.2 86/01/05 + * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91 + * from: Id: in_cksum.c,v 1.8 1995/12/03 18:35:19 bde Exp + * $FreeBSD: src/sys/i386/include/in_cksum.h,v 1.17.10.1.6.1 2009/04/15 03:14:26 kensmith Exp $ + */ + +#ifndef _MACHINE_IN_CKSUM_H_ +#define _MACHINE_IN_CKSUM_H_ 1 + +#ifndef VBOX +#ifndef _SYS_CDEFS_H_ +#error this file needs sys/cdefs.h as a prerequisite +#endif + +/* + * MP safe (alfred) + */ + +#include <sys/cdefs.h> +#else +# include "slirp.h" +#endif + +#define in_cksum(m, len) in_cksum_skip(m, len, 0) + +/* + * It it useful to have an Internet checksum routine which is inlineable + * and optimized specifically for the task of computing IP header checksums + * in the normal case (where there are no options and the header length is + * therefore always exactly five 32-bit words. + */ +#if defined(__GNUCLIKE_ASM) && !defined(__INTEL_COMPILER) +static __inline u_int +in_cksum_hdr(const struct ip *ip) +{ + register u_int sum = 0; + + __asm __volatile ( + "addl %1, %0\n" + "adcl %2, %0\n" + "adcl %3, %0\n" + "adcl %4, %0\n" + "adcl %5, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (((const u_int32_t *)ip)[0]), + "g" (((const u_int32_t *)ip)[1]), + "g" (((const u_int32_t *)ip)[2]), + "g" (((const u_int32_t *)ip)[3]), + "g" (((const u_int32_t *)ip)[4]) + ); + sum = (sum & 0xffff) + (sum >> 16); + if (sum > 0xffff) + sum -= 0xffff; + + return ~sum & 0xffff; +} + +static __inline void +in_cksum_update(struct ip *ip) +{ + int __tmpsum; + __tmpsum = (int)ntohs(ip->ip_sum) + 256; + ip->ip_sum = htons(__tmpsum + (__tmpsum >> 16)); +} + +static __inline u_short +in_addword(u_short sum, u_short b) +{ + /* __volatile is necessary because the condition codes are used. */ + __asm __volatile ( + "addw %1, %0\n" + "adcw $0, %0" + : "+r" (sum) + : "r" (b) + ); + return (sum); +} + +static __inline u_short +in_pseudo(u_int sum, u_int b, u_int c) +{ + /* __volatile is necessary because the condition codes are used. */ + __asm __volatile ( + "addl %1, %0\n" + "adcl %2, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (b), + "g" (c) + ); + sum = (sum & 0xffff) + (sum >> 16); + if (sum > 0xffff) + sum -= 0xffff; + return (sum); +} + +#else +#define in_cksum_update(ip) \ + do { \ + int __tmpsum; \ + __tmpsum = (int)ntohs(ip->ip_sum) + 256; \ + ip->ip_sum = htons(__tmpsum + (__tmpsum >> 16)); \ + } while(0) + +#endif + +#if defined(_KERNEL) || defined(VBOX) +#if !defined(__GNUCLIKE_ASM) || defined(__INTEL_COMPILER) +u_int in_cksum_hdr(const struct ip *ip); +u_short in_addword(u_short sum, u_short b); +u_short in_pseudo(u_int sum, u_int b, u_int c); +#endif +u_short in_cksum_skip(struct mbuf *m, int len, int skip); +#endif /* _KERNEL */ + +#endif /* _MACHINE_IN_CKSUM_H_ */ diff --git a/src/VBox/Devices/Network/slirp/bsd/kern/Makefile.kup b/src/VBox/Devices/Network/slirp/bsd/kern/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/kern/Makefile.kup diff --git a/src/VBox/Devices/Network/slirp/bsd/kern/kern_mbuf.c b/src/VBox/Devices/Network/slirp/bsd/kern/kern_mbuf.c new file mode 100644 index 00000000..c908e2fd --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/kern/kern_mbuf.c @@ -0,0 +1,824 @@ +/*- + * Copyright (c) 2004, 2005, + * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef VBOX +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/kern/kern_mbuf.c,v 1.32.2.5.2.1 2009/04/15 03:14:26 kensmith Exp $"); + +#include "opt_mac.h" +#include "opt_param.h" + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/systm.h> +#include <sys/mbuf.h> +#include <sys/domain.h> +#include <sys/eventhandler.h> +#include <sys/kernel.h> +#include <sys/protosw.h> +#include <sys/smp.h> +#include <sys/sysctl.h> + +#include <security/mac/mac_framework.h> + +#include <vm/vm.h> +#include <vm/vm_page.h> +#include <vm/uma.h> +#include <vm/uma_int.h> +#include <vm/uma_dbg.h> +#else +# include <iprt/param.h> +# include <slirp.h> +# define IN_BSD +# include "ext.h" +#endif + +/* + * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA + * Zones. + * + * Mbuf Clusters (2K, contiguous) are allocated from the Cluster + * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the + * administrator so desires. + * + * Mbufs are allocated from a UMA Master Zone called the Mbuf + * Zone. + * + * Additionally, FreeBSD provides a Packet Zone, which it + * configures as a Secondary Zone to the Mbuf Master Zone, + * thus sharing backend Slab kegs with the Mbuf Master Zone. + * + * Thus common-case allocations and locking are simplified: + * + * m_clget() m_getcl() + * | | + * | .------------>[(Packet Cache)] m_get(), m_gethdr() + * | | [ Packet ] | + * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] + * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] + * | \________ | + * [ Cluster Keg ] \ / + * | [ Mbuf Keg ] + * [ Cluster Slabs ] | + * | [ Mbuf Slabs ] + * \____________(VM)_________________/ + * + * + * Whenever an object is allocated with uma_zalloc() out of + * one of the Zones its _ctor_ function is executed. The same + * for any deallocation through uma_zfree() the _dtor_ function + * is executed. + * + * Caches are per-CPU and are filled from the Master Zone. + * + * Whenever an object is allocated from the underlying global + * memory pool it gets pre-initialized with the _zinit_ functions. + * When the Keg's are overfull objects get decomissioned with + * _zfini_ functions and free'd back to the global memory pool. + * + */ + +#ifndef VBOX +int nmbclusters; /* limits number of mbuf clusters */ +int nmbjumbop; /* limits number of page size jumbo clusters */ +int nmbjumbo9; /* limits number of 9k jumbo clusters */ +int nmbjumbo16; /* limits number of 16k jumbo clusters */ +struct mbstat mbstat; +#endif + +/* + * tunable_mbinit() has to be run before init_maxsockets() thus + * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets() + * runs at SI_ORDER_ANY. + */ +static void +tunable_mbinit(void *dummy) +{ +#ifdef VBOX + PNATState pData = (PNATState)dummy; +#endif + /* This has to be done before VM init. */ + nmbclusters = 1024 + maxusers * 64; + nmbjumbop = nmbclusters / 2; + nmbjumbo9 = nmbjumbop / 2; + nmbjumbo16 = nmbjumbo9 / 2; + TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); +} +SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, NULL); + +#ifndef VBOX +/* XXX: These should be tuneables. Can't change UMA limits on the fly. */ +static int +sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) +{ + int error, newnmbclusters; + + newnmbclusters = nmbclusters; + error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); + if (error == 0 && req->newptr) { + if (newnmbclusters > nmbclusters) { + nmbclusters = newnmbclusters; + uma_zone_set_max(zone_clust, nmbclusters); + EVENTHANDLER_INVOKE(nmbclusters_change); + } else + error = EINVAL; + } + return (error); +} +SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, +&nmbclusters, 0, sysctl_nmbclusters, "IU", + "Maximum number of mbuf clusters allowed"); + +static int +sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) +{ + int error, newnmbjumbop; + + newnmbjumbop = nmbjumbop; + error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); + if (error == 0 && req->newptr) { + if (newnmbjumbop> nmbjumbop) { + nmbjumbop = newnmbjumbop; + uma_zone_set_max(zone_jumbop, nmbjumbop); + } else + error = EINVAL; + } + return (error); +} +SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, +&nmbjumbop, 0, sysctl_nmbjumbop, "IU", + "Maximum number of mbuf page size jumbo clusters allowed"); + + +static int +sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) +{ + int error, newnmbjumbo9; + + newnmbjumbo9 = nmbjumbo9; + error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); + if (error == 0 && req->newptr) { + if (newnmbjumbo9> nmbjumbo9) { + nmbjumbo9 = newnmbjumbo9; + uma_zone_set_max(zone_jumbo9, nmbjumbo9); + } else + error = EINVAL; + } + return (error); +} +SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, +&nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", + "Maximum number of mbuf 9k jumbo clusters allowed"); + +static int +sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) +{ + int error, newnmbjumbo16; + + newnmbjumbo16 = nmbjumbo16; + error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); + if (error == 0 && req->newptr) { + if (newnmbjumbo16> nmbjumbo16) { + nmbjumbo16 = newnmbjumbo16; + uma_zone_set_max(zone_jumbo16, nmbjumbo16); + } else + error = EINVAL; + } + return (error); +} +SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, +&nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", + "Maximum number of mbuf 16k jumbo clusters allowed"); + + + +SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, + "Mbuf general information and statistics"); + +/* + * Zones from which we allocate. + */ +uma_zone_t zone_mbuf; +uma_zone_t zone_clust; +uma_zone_t zone_pack; +uma_zone_t zone_jumbop; +uma_zone_t zone_jumbo9; +uma_zone_t zone_jumbo16; +uma_zone_t zone_ext_refcnt; + +/* + * Local prototypes. + */ +static int mb_ctor_mbuf(void *, int, void *, int); +static int mb_ctor_clust(void *, int, void *, int); +static int mb_ctor_pack(void *, int, void *, int); +static void mb_dtor_mbuf(void *, int, void *); +static void mb_dtor_clust(void *, int, void *); +static void mb_dtor_pack(void *, int, void *); +static int mb_zinit_pack(void *, int, int); +static void mb_zfini_pack(void *, int); +#else +/* + * Local prototypes. + */ +static int mb_ctor_mbuf(PNATState, void *, int, void *, int); +static int mb_ctor_clust(PNATState, void *, int, void *, int); +static int mb_ctor_pack(PNATState, void *, int, void *, int); +static void mb_dtor_mbuf(PNATState, void *, int, void *); +static void mb_dtor_clust(PNATState, void *, int, void *); +static void mb_dtor_pack(PNATState, void *, int, void *); +static int mb_zinit_pack(PNATState, void *, int, int); +static void mb_zfini_pack(PNATState, void *, int); +#endif + +/*static void mb_reclaim(void *); - unused */ +#ifndef VBOX +static void mbuf_init(void *); +static void *mbuf_jumbo_alloc(uma_zone_t, int, u_int8_t *, int); +static void mbuf_jumbo_free(void *, int, u_int8_t); +#endif + +#ifndef VBOX +static MALLOC_DEFINE(M_JUMBOFRAME, "jumboframes", "mbuf jumbo frame buffers"); + +/* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */ +CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); +#else +#define uma_zcreate(a0, a1, a2, a3, a4, a5, a6, a7) \ + uma_zcreate(pData, a0, a1, a2, a3, a4, a5, a6, a7) +#endif + +/* + * Initialize FreeBSD Network buffer allocation. + */ +SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); +#ifndef VBOX +static void +#else +void +#endif +mbuf_init(void *dummy) +{ + + /* + * Configure UMA zones for Mbufs, Clusters, and Packets. + */ +#ifndef VBOX + zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, + mb_ctor_mbuf, mb_dtor_mbuf, +#ifdef INVARIANTS + trash_init, trash_fini, +#else + NULL, NULL, +#endif + MSIZE - 1, UMA_ZONE_MAXBUCKET); + + zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, + mb_ctor_clust, mb_dtor_clust, +#ifdef INVARIANTS + trash_init, trash_fini, +#else + NULL, NULL, +#endif + UMA_ALIGN_PTR, UMA_ZONE_REFCNT); +#else /*!VBOX*/ + PNATState pData = (PNATState)dummy; + tunable_mbinit(pData); + zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, + mb_ctor_mbuf, mb_dtor_mbuf, + NULL, NULL, + MSIZE - 1, UMA_ZONE_MAXBUCKET); + if (nmbclusters > 0) + uma_zone_set_max(zone_mbuf, nmbclusters); + + zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, + mb_ctor_clust, mb_dtor_clust, + NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_REFCNT); +#endif /*VBOX*/ + if (nmbclusters > 0) + uma_zone_set_max(zone_clust, nmbclusters); + + zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, + mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); + + /* Make jumbo frame zone too. Page size, 9k and 16k. */ +#ifndef VBOX + zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, + mb_ctor_clust, mb_dtor_clust, +#ifdef INVARIANTS + trash_init, trash_fini, +#else + NULL, NULL, +#endif + UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + if (nmbjumbop > 0) + uma_zone_set_max(zone_jumbop, nmbjumbop); + + zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, + mb_ctor_clust, mb_dtor_clust, +#ifdef INVARIANTS + trash_init, trash_fini, +#else + NULL, NULL, +#endif + UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + if (nmbjumbo9 > 0) + uma_zone_set_max(zone_jumbo9, nmbjumbo9); + uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); + uma_zone_set_freef(zone_jumbo9, mbuf_jumbo_free); + + zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, + mb_ctor_clust, mb_dtor_clust, +#ifdef INVARIANTS + trash_init, trash_fini, +#else + NULL, NULL, +#endif + UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + if (nmbjumbo16 > 0) + uma_zone_set_max(zone_jumbo16, nmbjumbo16); +#else /*!VBOX*/ + zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, + mb_ctor_clust, mb_dtor_clust, + NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + if (nmbjumbop > 0) + uma_zone_set_max(zone_jumbop, nmbjumbop); + + zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, + mb_ctor_clust, mb_dtor_clust, + NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + if (nmbjumbo9 > 0) + uma_zone_set_max(zone_jumbo9, nmbjumbo9); + + zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, + mb_ctor_clust, mb_dtor_clust, + NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + if (nmbjumbo16 > 0) + uma_zone_set_max(zone_jumbo16, nmbjumbo16); +#endif /*VBOX*/ + + zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), + NULL, NULL, + NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_ZINIT); + + /* uma_prealloc() goes here... */ + + /* + * Hook event handler for low-memory situation, used to + * drain protocols and push data back to the caches (UMA + * later pushes it back to VM). + */ + EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, + EVENTHANDLER_PRI_FIRST); + + /* + * [Re]set counters and local statistics knobs. + * XXX Some of these should go and be replaced, but UMA stat + * gathering needs to be revised. + */ + mbstat.m_mbufs = 0; + mbstat.m_mclusts = 0; + mbstat.m_drain = 0; + mbstat.m_msize = MSIZE; + mbstat.m_mclbytes = MCLBYTES; + mbstat.m_minclsize = MINCLSIZE; + mbstat.m_mlen = MLEN; + mbstat.m_mhlen = MHLEN; + mbstat.m_numtypes = MT_NTYPES; + + mbstat.m_mcfail = mbstat.m_mpfail = 0; + mbstat.sf_iocnt = 0; + mbstat.sf_allocwait = mbstat.sf_allocfail = 0; +} + +#ifndef VBOX +/* + * UMA backend page allocator for the jumbo frame zones. + * + * Allocates kernel virtual memory that is backed by contiguous physical + * pages. + */ +static void * +mbuf_jumbo_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int fWait) +{ + + /* Inform UMA that this allocator uses kernel_map/object. */ + *flags = UMA_SLAB_KERNEL; + return (contigmalloc(bytes, M_JUMBOFRAME, fWait, (vm_paddr_t)0, + ~(vm_paddr_t)0, 1, 0)); +} + +/* + * UMA backend page deallocator for the jumbo frame zones. + */ +static void +mbuf_jumbo_free(void *mem, int size, u_int8_t flags) +{ + + contigfree(mem, size, M_JUMBOFRAME); +} +#endif + +/* + * Constructor for Mbuf master zone. + * + * The 'arg' pointer points to a mb_args structure which + * contains call-specific information required to support the + * mbuf allocation API. See mbuf.h. + */ +static int +#ifndef VBOX +mb_ctor_mbuf(void *mem, int size, void *arg, int how) +#else +mb_ctor_mbuf(PNATState pData, void *mem, int size, void *arg, int how) +#endif +{ + struct mbuf *m; + struct mb_args *args; +#ifdef MAC + int error; +#endif + int flags; + short type; +#ifdef VBOX + NOREF(pData); +#endif + +#ifdef INVARIANTS + trash_ctor(mem, size, arg, how); +#elif defined(VBOX) + NOREF(size); + NOREF(how); +#endif + m = (struct mbuf *)mem; + args = (struct mb_args *)arg; + flags = args->flags; + type = args->type; + + /* + * The mbuf is initialized later. The caller has the + * responsibility to set up any MAC labels too. + */ + if (type == MT_NOINIT) + return (0); + + m->m_next = NULL; + m->m_nextpkt = NULL; + m->m_len = 0; + m->m_flags = flags; + m->m_type = type; + if (flags & M_PKTHDR) { + m->m_data = m->m_pktdat; + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.len = 0; + m->m_pkthdr.header = NULL; + m->m_pkthdr.csum_flags = 0; + m->m_pkthdr.csum_data = 0; + m->m_pkthdr.tso_segsz = 0; + m->m_pkthdr.ether_vtag = 0; + SLIST_INIT(&m->m_pkthdr.tags); +#ifdef MAC + /* If the label init fails, fail the alloc */ + error = mac_init_mbuf(m, how); + if (error) + return (error); +#endif + } else + m->m_data = m->m_dat; + return (0); +} + +/* + * The Mbuf master zone destructor. + */ +static void +#ifndef VBOX +mb_dtor_mbuf(void *mem, int size, void *arg) +#else +mb_dtor_mbuf(PNATState pData, void *mem, int size, void *arg) +#endif +{ + struct mbuf *m; + uintptr_t flags; +#ifdef VBOX + NOREF(pData); +#endif + + m = (struct mbuf *)mem; + flags = (uintptr_t)arg; + + if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0) + m_tag_delete_chain(m, NULL); + KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); + KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); +#ifdef INVARIANTS + trash_dtor(mem, size, arg); +#elif defined(VBOX) + NOREF(size); + NOREF(arg); +#endif +} + +/* + * The Mbuf Packet zone destructor. + */ +static void +#ifndef VBOX +mb_dtor_pack(void *mem, int size, void *arg) +#else +mb_dtor_pack(PNATState pData, void *mem, int size, void *arg) +#endif +{ + struct mbuf *m; + + m = (struct mbuf *)mem; + if ((m->m_flags & M_PKTHDR) != 0) + m_tag_delete_chain(m, NULL); + + /* Make sure we've got a clean cluster back. */ + KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); + KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); + KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); + KASSERT(m->m_ext.ext_args == NULL, ("%s: ext_args != NULL", __func__)); + KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); + KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); + KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__)); +#ifdef INVARIANTS + trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); +#elif defined(VBOX) + NOREF(size); + NOREF(arg); +#endif + /* + * If there are processes blocked on zone_clust, waiting for pages to be freed up, + * cause them to be woken up by draining the packet zone. We are exposed to a race here + * (in the check for the UMA_ZFLAG_FULL) where we might miss the flag set, but that is + * deliberate. We don't want to acquire the zone lock for every mbuf free. + */ + if (uma_zone_exhausted_nolock(zone_clust)) + zone_drain(zone_pack); +} + +/* + * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. + * + * Here the 'arg' pointer points to the Mbuf which we + * are configuring cluster storage for. If 'arg' is + * empty we allocate just the cluster without setting + * the mbuf to it. See mbuf.h. + */ +static int +#ifndef VBOX +mb_ctor_clust(void *mem, int size, void *arg, int how) +#else +mb_ctor_clust(PNATState pData, void *mem, int size, void *arg, int how) +#endif +{ + struct mbuf *m; + u_int *refcnt; + int type; + uma_zone_t zone; +#ifdef VBOX + NOREF(how); +#endif + +#ifdef INVARIANTS + trash_ctor(mem, size, arg, how); +#elif defined(VBOX) + NOREF(how); +#endif + switch (size) { + case MCLBYTES: + type = EXT_CLUSTER; + zone = zone_clust; + break; +#if MJUMPAGESIZE != MCLBYTES + case MJUMPAGESIZE: + type = EXT_JUMBOP; + zone = zone_jumbop; + break; +#endif + case MJUM9BYTES: + type = EXT_JUMBO9; + zone = zone_jumbo9; + break; + case MJUM16BYTES: + type = EXT_JUMBO16; + zone = zone_jumbo16; + break; + default: + panic("unknown cluster size"); + break; + } + + m = (struct mbuf *)arg; + refcnt = uma_find_refcnt(zone, mem); + *refcnt = 1; + if (m != NULL) { + m->m_ext.ext_buf = (caddr_t)mem; + m->m_data = m->m_ext.ext_buf; + m->m_flags |= M_EXT; + m->m_ext.ext_free = NULL; + m->m_ext.ext_args = NULL; + m->m_ext.ext_size = size; + m->m_ext.ext_type = type; + m->m_ext.ref_cnt = refcnt; + } + + return (0); +} + +/* + * The Mbuf Cluster zone destructor. + */ +static void +#ifndef VBOX +mb_dtor_clust(void *mem, int size, void *arg) +#else +mb_dtor_clust(PNATState pData, void *mem, int size, void *arg) +#endif +{ +#ifdef INVARIANTS + uma_zone_t zone; + + zone = m_getzone(size); + KASSERT(*(uma_find_refcnt(zone, mem)) <= 1, + ("%s: refcnt incorrect %u", __func__, + *(uma_find_refcnt(zone, mem))) ); + + trash_dtor(mem, size, arg); +#elif defined(VBOX) + NOREF(pData); + NOREF(mem); + NOREF(size); + NOREF(arg); +#endif +} + +/* + * The Packet secondary zone's init routine, executed on the + * object's transition from mbuf keg slab to zone cache. + */ +static int +#ifndef VBOX +mb_zinit_pack(void *mem, int size, int how) +#else +mb_zinit_pack(PNATState pData, void *mem, int size, int how) +#endif +{ + struct mbuf *m; + + m = (struct mbuf *)mem; /* m is virgin. */ + if (uma_zalloc_arg(zone_clust, m, how) == NULL || + m->m_ext.ext_buf == NULL) + return (ENOMEM); + m->m_ext.ext_type = EXT_PACKET; /* Override. */ +#ifdef INVARIANTS + trash_init(m->m_ext.ext_buf, MCLBYTES, how); +#elif defined(VBOX) + NOREF(size); +#endif + return (0); +} + +/* + * The Packet secondary zone's fini routine, executed on the + * object's transition from zone cache to keg slab. + */ +static void +#ifndef VBOX +mb_zfini_pack(void *mem, int size) +#else +mb_zfini_pack(PNATState pData, void *mem, int size) +#endif +{ + struct mbuf *m; + + m = (struct mbuf *)mem; +#ifdef INVARIANTS + trash_fini(m->m_ext.ext_buf, MCLBYTES); +#endif + uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); +#ifdef INVARIANTS + trash_dtor(mem, size, NULL); +#elif defined(VBOX) + NOREF(size); +#endif +} + +/* + * The "packet" keg constructor. + */ +static int +#ifndef VBOX +mb_ctor_pack(void *mem, int size, void *arg, int how) +#else +mb_ctor_pack(PNATState pData, void *mem, int size, void *arg, int how) +#endif +{ + struct mbuf *m; + struct mb_args *args; +#ifdef MAC + int error; +#endif + int flags; + short type; +#ifdef VBOX + NOREF(pData); + NOREF(size); +#endif + + m = (struct mbuf *)mem; + args = (struct mb_args *)arg; + flags = args->flags; + type = args->type; + +#ifdef INVARIANTS + trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); +#elif defined(VBOX) + NOREF(how); +#endif + m->m_next = NULL; + m->m_nextpkt = NULL; + m->m_data = m->m_ext.ext_buf; + m->m_len = 0; + m->m_flags = (flags | M_EXT); + m->m_type = type; + + if (flags & M_PKTHDR) { + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.len = 0; + m->m_pkthdr.header = NULL; + m->m_pkthdr.csum_flags = 0; + m->m_pkthdr.csum_data = 0; + m->m_pkthdr.tso_segsz = 0; + m->m_pkthdr.ether_vtag = 0; + SLIST_INIT(&m->m_pkthdr.tags); +#ifdef MAC + /* If the label init fails, fail the alloc */ + error = mac_init_mbuf(m, how); + if (error) + return (error); +#endif + } + /* m_ext is already initialized. */ + + return (0); +} + +#if 0 /* unused */ +/* + * This is the protocol drain routine. + * + * No locks should be held when this is called. The drain routines have to + * presently acquire some locks which raises the possibility of lock order + * reversal. + */ +static void +mb_reclaim(void *junk) +{ +#ifndef VBOX + struct domain *dp; + struct protosw *pr; + + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, + "mb_reclaim()"); + + for (dp = domains; dp != NULL; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_drain != NULL) + (*pr->pr_drain)(); +#else + NOREF(junk); +#endif +} +#endif /* unused */ diff --git a/src/VBox/Devices/Network/slirp/bsd/kern/subr_sbuf.c b/src/VBox/Devices/Network/slirp/bsd/kern/subr_sbuf.c new file mode 100644 index 00000000..c2021f5b --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/kern/subr_sbuf.c @@ -0,0 +1,594 @@ +/*- + * Copyright (c) 2000 Poul-Henning Kamp and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef VBOX +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/kern/subr_sbuf.c,v 1.30.8.1 2009/04/15 03:14:26 kensmith Exp $"); + +#include <sys/param.h> + +#ifdef _KERNEL +#include <sys/ctype.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/systm.h> +#include <sys/uio.h> +#include <machine/stdarg.h> +#else /* _KERNEL */ +#include <ctype.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#endif /* _KERNEL */ + +#include <sys/sbuf.h> + +#ifdef _KERNEL +static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers"); +#define SBMALLOC(size) malloc(size, M_SBUF, M_WAITOK) +#define SBFREE(buf) free(buf, M_SBUF) +#else /* _KERNEL */ +#define KASSERT(e, m) +#define SBMALLOC(size) malloc(size) +#define SBFREE(buf) free(buf) +#define min(x,y) MIN(x,y) +#endif /* _KERNEL */ +#else /* VBOX */ +# include <iprt/param.h> +# include <iprt/ctype.h> +# include <slirp.h> +# define SBMALLOC(size) RTMemAlloc((size)) +# define SBFREE(buf) RTMemFree((buf)) +#endif + +/* + * Predicates + */ +#define SBUF_ISDYNAMIC(s) ((s)->s_flags & SBUF_DYNAMIC) +#define SBUF_ISDYNSTRUCT(s) ((s)->s_flags & SBUF_DYNSTRUCT) +#define SBUF_ISFINISHED(s) ((s)->s_flags & SBUF_FINISHED) +#define SBUF_HASOVERFLOWED(s) ((s)->s_flags & SBUF_OVERFLOWED) +#define SBUF_HASROOM(s) ((s)->s_len < (s)->s_size - 1) +#define SBUF_FREESPACE(s) ((s)->s_size - (s)->s_len - 1) +#define SBUF_CANEXTEND(s) ((s)->s_flags & SBUF_AUTOEXTEND) + +/* + * Set / clear flags + */ +#define SBUF_SETFLAG(s, f) do { (s)->s_flags |= (f); } while (0) +#define SBUF_CLEARFLAG(s, f) do { (s)->s_flags &= ~(f); } while (0) + +#define SBUF_MINEXTENDSIZE 16 /* Should be power of 2. */ +#define SBUF_MAXEXTENDSIZE PAGE_SIZE +#define SBUF_MAXEXTENDINCR PAGE_SIZE + +/* + * Debugging support + */ +#if defined(_KERNEL) && defined(INVARIANTS) +static void +_assert_sbuf_integrity(const char *fun, struct sbuf *s) +{ + KASSERT(s != NULL, + ("%s called with a NULL sbuf pointer", fun)); + KASSERT(s->s_buf != NULL, + ("%s called with uninitialized or corrupt sbuf", fun)); + KASSERT(s->s_len < s->s_size, + ("wrote past end of sbuf (%d >= %d)", s->s_len, s->s_size)); +} + +static void +_assert_sbuf_state(const char *fun, struct sbuf *s, int state) +{ + KASSERT((s->s_flags & SBUF_FINISHED) == state, + ("%s called with %sfinished or corrupt sbuf", fun, + (state ? "un" : ""))); +} +#define assert_sbuf_integrity(s) _assert_sbuf_integrity(__func__, (s)) +#define assert_sbuf_state(s, i) _assert_sbuf_state(__func__, (s), (i)) +#else /* _KERNEL && INVARIANTS */ +#define assert_sbuf_integrity(s) do { } while (0) +#define assert_sbuf_state(s, i) do { } while (0) +#endif /* _KERNEL && INVARIANTS */ + +static int +sbuf_extendsize(int size) +{ + int newsize; + + newsize = SBUF_MINEXTENDSIZE; + while (newsize < size) { + if (newsize < (int)SBUF_MAXEXTENDSIZE) + newsize *= 2; + else + newsize += SBUF_MAXEXTENDINCR; + } + + return (newsize); +} + + +/* + * Extend an sbuf. + */ +static int +sbuf_extend(struct sbuf *s, int addlen) +{ + char *newbuf; + int newsize; + + if (!SBUF_CANEXTEND(s)) + return (-1); + + newsize = sbuf_extendsize(s->s_size + addlen); + newbuf = (char *)SBMALLOC(newsize); + if (newbuf == NULL) + return (-1); + bcopy(s->s_buf, newbuf, s->s_size); + if (SBUF_ISDYNAMIC(s)) + SBFREE(s->s_buf); + else + SBUF_SETFLAG(s, SBUF_DYNAMIC); + s->s_buf = newbuf; + s->s_size = newsize; + return (0); +} + +/* + * Initialize an sbuf. + * If buf is non-NULL, it points to a static or already-allocated string + * big enough to hold at least length characters. + */ +struct sbuf * +sbuf_new(struct sbuf *s, char *buf, int length, int flags) +{ + KASSERT(length >= 0, + ("attempt to create an sbuf of negative length (%d)", length)); + KASSERT((flags & ~SBUF_USRFLAGMSK) == 0, + ("%s called with invalid flags", __func__)); + + flags &= SBUF_USRFLAGMSK; + if (s == NULL) { + s = (struct sbuf *)SBMALLOC(sizeof *s); + if (s == NULL) + return (NULL); + bzero(s, sizeof *s); + s->s_flags = flags; + SBUF_SETFLAG(s, SBUF_DYNSTRUCT); + } else { + bzero(s, sizeof *s); + s->s_flags = flags; + } + s->s_size = length; + if (buf) { + s->s_buf = buf; + return (s); + } + if (flags & SBUF_AUTOEXTEND) + s->s_size = sbuf_extendsize(s->s_size); + s->s_buf = (char *)SBMALLOC(s->s_size); + if (s->s_buf == NULL) { + if (SBUF_ISDYNSTRUCT(s)) + SBFREE(s); + return (NULL); + } + SBUF_SETFLAG(s, SBUF_DYNAMIC); + return (s); +} + +#ifdef _KERNEL +/* + * Create an sbuf with uio data + */ +struct sbuf * +sbuf_uionew(struct sbuf *s, struct uio *uio, int *error) +{ + KASSERT(uio != NULL, + ("%s called with NULL uio pointer", __func__)); + KASSERT(error != NULL, + ("%s called with NULL error pointer", __func__)); + + s = sbuf_new(s, NULL, uio->uio_resid + 1, 0); + if (s == NULL) { + *error = ENOMEM; + return (NULL); + } + *error = uiomove(s->s_buf, uio->uio_resid, uio); + if (*error != 0) { + sbuf_delete(s); + return (NULL); + } + s->s_len = s->s_size - 1; + *error = 0; + return (s); +} +#endif + +/* + * Clear an sbuf and reset its position. + */ +void +sbuf_clear(struct sbuf *s) +{ + assert_sbuf_integrity(s); + /* don't care if it's finished or not */ + + SBUF_CLEARFLAG(s, SBUF_FINISHED); + SBUF_CLEARFLAG(s, SBUF_OVERFLOWED); + s->s_len = 0; +} + +/* + * Set the sbuf's end position to an arbitrary value. + * Effectively truncates the sbuf at the new position. + */ +int +sbuf_setpos(struct sbuf *s, int pos) +{ + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + + KASSERT(pos >= 0, + ("attempt to seek to a negative position (%d)", pos)); + KASSERT(pos < s->s_size, + ("attempt to seek past end of sbuf (%d >= %d)", pos, s->s_size)); + + if (pos < 0 || pos > s->s_len) + return (-1); + s->s_len = pos; + return (0); +} + +/* + * Append a byte string to an sbuf. + */ +int +sbuf_bcat(struct sbuf *s, const void *buf, size_t len) +{ + const char *str = buf; + + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + + if (SBUF_HASOVERFLOWED(s)) + return (-1); + + for (; len; len--) { + if (!SBUF_HASROOM(s) && sbuf_extend(s, len) < 0) + break; + s->s_buf[s->s_len++] = *str++; + } + if (len) { + SBUF_SETFLAG(s, SBUF_OVERFLOWED); + return (-1); + } + return (0); +} + +#ifdef _KERNEL +/* + * Copy a byte string from userland into an sbuf. + */ +int +sbuf_bcopyin(struct sbuf *s, const void *uaddr, size_t len) +{ + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + + if (SBUF_HASOVERFLOWED(s)) + return (-1); + + if (len == 0) + return (0); + if (len > SBUF_FREESPACE(s)) { + sbuf_extend(s, len - SBUF_FREESPACE(s)); + len = min(len, SBUF_FREESPACE(s)); + } + if (copyin(uaddr, s->s_buf + s->s_len, len) != 0) + return (-1); + s->s_len += len; + + return (0); +} +#endif + +/* + * Copy a byte string into an sbuf. + */ +int +sbuf_bcpy(struct sbuf *s, const void *buf, size_t len) +{ + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + + sbuf_clear(s); + return (sbuf_bcat(s, buf, len)); +} + +/* + * Append a string to an sbuf. + */ +int +sbuf_cat(struct sbuf *s, const char *str) +{ + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + + if (SBUF_HASOVERFLOWED(s)) + return (-1); + + while (*str) { + if (!SBUF_HASROOM(s) && sbuf_extend(s, strlen(str)) < 0) + break; + s->s_buf[s->s_len++] = *str++; + } + if (*str) { + SBUF_SETFLAG(s, SBUF_OVERFLOWED); + return (-1); + } + return (0); +} + +#ifdef _KERNEL +/* + * Append a string from userland to an sbuf. + */ +int +sbuf_copyin(struct sbuf *s, const void *uaddr, size_t len) +{ + size_t done; + + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + + if (SBUF_HASOVERFLOWED(s)) + return (-1); + + if (len == 0) + len = SBUF_FREESPACE(s); /* XXX return 0? */ + if (len > SBUF_FREESPACE(s)) { + sbuf_extend(s, len); + len = min(len, SBUF_FREESPACE(s)); + } + switch (copyinstr(uaddr, s->s_buf + s->s_len, len + 1, &done)) { + case ENAMETOOLONG: + SBUF_SETFLAG(s, SBUF_OVERFLOWED); + RT_FALL_THRU(); + case 0: + s->s_len += done - 1; + break; + default: + return (-1); /* XXX */ + } + + return (done); +} +#endif + +/* + * Copy a string into an sbuf. + */ +int +sbuf_cpy(struct sbuf *s, const char *str) +{ + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + + sbuf_clear(s); + return (sbuf_cat(s, str)); +} + +/* + * Format the given argument list and append the resulting string to an sbuf. + */ +int +sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap) +{ + va_list ap_copy; + int len; + + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + + KASSERT(fmt != NULL, + ("%s called with a NULL format string", __func__)); + + if (SBUF_HASOVERFLOWED(s)) + return (-1); + + do { + va_copy(ap_copy, ap); +#ifndef VBOX + len = vsnprintf(&s->s_buf[s->s_len], SBUF_FREESPACE(s) + 1, + fmt, ap_copy); +#else + len = RTStrPrintfV(&s->s_buf[s->s_len], SBUF_FREESPACE(s) + 1, + fmt, ap_copy); +#endif + va_end(ap_copy); + } while (len > SBUF_FREESPACE(s) && + sbuf_extend(s, len - SBUF_FREESPACE(s)) == 0); + + /* + * s->s_len is the length of the string, without the terminating nul. + * When updating s->s_len, we must subtract 1 from the length that + * we passed into vsnprintf() because that length includes the + * terminating nul. + * + * vsnprintf() returns the amount that would have been copied, + * given sufficient space, hence the min() calculation below. + */ + s->s_len += min(len, SBUF_FREESPACE(s)); + if (!SBUF_HASROOM(s) && !SBUF_CANEXTEND(s)) + SBUF_SETFLAG(s, SBUF_OVERFLOWED); + + KASSERT(s->s_len < s->s_size, + ("wrote past end of sbuf (%d >= %d)", s->s_len, s->s_size)); + + if (SBUF_HASOVERFLOWED(s)) + return (-1); + return (0); +} + +/* + * Format the given arguments and append the resulting string to an sbuf. + */ +int +sbuf_printf(struct sbuf *s, const char *fmt, ...) +{ + va_list ap; + int result; + + va_start(ap, fmt); + result = sbuf_vprintf(s, fmt, ap); + va_end(ap); + return(result); +} + +/* + * Append a character to an sbuf. + */ +int +sbuf_putc(struct sbuf *s, int c) +{ + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + + if (SBUF_HASOVERFLOWED(s)) + return (-1); + + if (!SBUF_HASROOM(s) && sbuf_extend(s, 1) < 0) { + SBUF_SETFLAG(s, SBUF_OVERFLOWED); + return (-1); + } + if (c != '\0') + s->s_buf[s->s_len++] = c; + return (0); +} + +/* + * Trim whitespace characters from end of an sbuf. + */ +int +sbuf_trim(struct sbuf *s) +{ + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + + if (SBUF_HASOVERFLOWED(s)) + return (-1); + +#ifndef VBOX + while (s->s_len && isspace(s->s_buf[s->s_len-1])) + --s->s_len; +#else + while (s->s_len && RT_C_IS_SPACE(s->s_buf[s->s_len-1])) + --s->s_len; +#endif + + return (0); +} + +/* + * Check if an sbuf overflowed + */ +int +sbuf_overflowed(struct sbuf *s) +{ + return SBUF_HASOVERFLOWED(s); +} + +/* + * Finish off an sbuf. + */ +void +sbuf_finish(struct sbuf *s) +{ + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + + s->s_buf[s->s_len] = '\0'; + SBUF_CLEARFLAG(s, SBUF_OVERFLOWED); + SBUF_SETFLAG(s, SBUF_FINISHED); +} + +/* + * Return a pointer to the sbuf data. + */ +char * +sbuf_data(struct sbuf *s) +{ + assert_sbuf_integrity(s); + assert_sbuf_state(s, SBUF_FINISHED); + + return s->s_buf; +} + +/* + * Return the length of the sbuf data. + */ +int +sbuf_len(struct sbuf *s) +{ + assert_sbuf_integrity(s); + /* don't care if it's finished or not */ + + if (SBUF_HASOVERFLOWED(s)) + return (-1); + return s->s_len; +} + +/* + * Clear an sbuf, free its buffer if necessary. + */ +void +sbuf_delete(struct sbuf *s) +{ + int isdyn; + + assert_sbuf_integrity(s); + /* don't care if it's finished or not */ + + if (SBUF_ISDYNAMIC(s)) + SBFREE(s->s_buf); + isdyn = SBUF_ISDYNSTRUCT(s); + bzero(s, sizeof *s); + if (isdyn) + SBFREE(s); +} + +/* + * Check if an sbuf has been finished. + */ +int +sbuf_done(struct sbuf *s) +{ + + return(SBUF_ISFINISHED(s)); +} diff --git a/src/VBox/Devices/Network/slirp/bsd/kern/uipc_mbuf.c b/src/VBox/Devices/Network/slirp/bsd/kern/uipc_mbuf.c new file mode 100644 index 00000000..0af49faa --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/kern/uipc_mbuf.c @@ -0,0 +1,2238 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 + */ + +#ifndef VBOX +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.174.2.3.2.1 2009/04/15 03:14:26 kensmith Exp $"); + +#include "opt_mac.h" +#include "opt_param.h" +#include "opt_mbuf_stress_test.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/limits.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/sysctl.h> +#include <sys/domain.h> +#include <sys/protosw.h> +#include <sys/uio.h> + +#include <security/mac/mac_framework.h> + +int max_linkhdr; +#ifndef VBOX +int max_protohdr; +#endif +int max_hdr; +int max_datalen; +#ifdef MBUF_STRESS_TEST +int m_defragpackets; +int m_defragbytes; +int m_defraguseless; +int m_defragfailure; +int m_defragrandomfailures; +#endif + +/* + * sysctl(8) exported objects + */ +SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RD, + &max_linkhdr, 0, "Size of largest link layer header"); +SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RD, + &max_protohdr, 0, "Size of largest protocol layer header"); +SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RD, + &max_hdr, 0, "Size of largest link plus protocol header"); +SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RD, + &max_datalen, 0, "Minimum space left in mbuf after max_hdr"); +#ifdef MBUF_STRESS_TEST +SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD, + &m_defragpackets, 0, ""); +SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD, + &m_defragbytes, 0, ""); +SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD, + &m_defraguseless, 0, ""); +SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD, + &m_defragfailure, 0, ""); +SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW, + &m_defragrandomfailures, 0, ""); +#endif +#else /* VBOX */ +# include <iprt/asm.h> +# include "slirp.h" +# define atomic_fetchadd_int(var, val) (ASMAtomicAddU32((var), (val))) +# define atomic_add_int(var, val) (ASMAtomicAddU32((var), (val))) +#endif /* VBOX */ + +/* + * Allocate a given length worth of mbufs and/or clusters (whatever fits + * best) and return a pointer to the top of the allocated chain. If an + * existing mbuf chain is provided, then we will append the new chain + * to the existing one but still return the top of the newly allocated + * chain. + */ +struct mbuf * +#ifndef VBOX +m_getm2(struct mbuf *m, int len, int how, short type, int flags) +#else +m_getm2(PNATState pData, struct mbuf *m, int len, int how, short type, int flags) +#endif +{ + struct mbuf *mb, *nm = NULL, *mtail = NULL; + + KASSERT(len >= 0, ("%s: len is < 0", __func__)); + + /* Validate flags. */ + flags &= (M_PKTHDR | M_EOR); + + /* Packet header mbuf must be first in chain. */ + if ((flags & M_PKTHDR) && m != NULL) + flags &= ~M_PKTHDR; + + /* Loop and append maximum sized mbufs to the chain tail. */ + while (len > 0) { +#ifndef VBOX + if (len > MCLBYTES) + mb = m_getjcl(how, type, (flags & M_PKTHDR), + MJUMPAGESIZE); + else if (len >= MINCLSIZE) + mb = m_getcl(how, type, (flags & M_PKTHDR)); + else if (flags & M_PKTHDR) + mb = m_gethdr(how, type); + else + mb = m_get(how, type); + + /* Fail the whole operation if one mbuf can't be allocated. */ + if (mb == NULL) { + if (nm != NULL) + m_freem(nm); + return (NULL); + } +#else + if (len > MCLBYTES) + mb = m_getjcl(pData, how, type, (flags & M_PKTHDR), + MJUMPAGESIZE); + else if (len >= MINCLSIZE) + mb = m_getcl(pData, how, type, (flags & M_PKTHDR)); + else if (flags & M_PKTHDR) + mb = m_gethdr(pData, how, type); + else + mb = m_get(pData, how, type); + /* Fail the whole operation if one mbuf can't be allocated. */ + if (mb == NULL) { + if (nm != NULL) + m_freem(pData, nm); + return (NULL); + } +#endif + + /* Book keeping. */ + len -= (mb->m_flags & M_EXT) ? mb->m_ext.ext_size : + ((mb->m_flags & M_PKTHDR) ? MHLEN : MLEN); + if (mtail != NULL) + mtail->m_next = mb; + else + nm = mb; + mtail = mb; + flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ + } + if (flags & M_EOR) + mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */ + + /* If mbuf was supplied, append new chain to the end of it. */ + if (m != NULL) { + for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next) + ; + mtail->m_next = nm; + mtail->m_flags &= ~M_EOR; + } else + m = nm; + + return (m); +} + +/* + * Free an entire chain of mbufs and associated external buffers, if + * applicable. + */ +void +#ifndef VBOX +m_freem(struct mbuf *mb) +#else +m_freem(PNATState pData, struct mbuf *mb) +#endif +{ + + while (mb != NULL) + mb = m_free(pData, mb); +} + +/*- + * Configure a provided mbuf to refer to the provided external storage + * buffer and setup a reference count for said buffer. If the setting + * up of the reference count fails, the M_EXT bit will not be set. If + * successfull, the M_EXT bit is set in the mbuf's flags. + * + * Arguments: + * mb The existing mbuf to which to attach the provided buffer. + * buf The address of the provided external storage buffer. + * size The size of the provided buffer. + * freef A pointer to a routine that is responsible for freeing the + * provided external storage buffer. + * args A pointer to an argument structure (of any type) to be passed + * to the provided freef routine (may be NULL). + * flags Any other flags to be passed to the provided mbuf. + * type The type that the external storage buffer should be + * labeled with. + * + * Returns: + * Nothing. + */ +void +#ifndef VBOX +m_extadd(struct mbuf *mb, caddr_t buf, u_int size, + void (*freef)(void *, void *), void *args, int flags, int type) +#else +m_extadd(PNATState pData, struct mbuf *mb, caddr_t buf, u_int size, + void (*freef)(void *, void *), void *args, int flags, int type) +#endif +{ + KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); + + if (type != EXT_EXTREF) + mb->m_ext.ref_cnt = (u_int *)uma_zalloc(zone_ext_refcnt, M_NOWAIT); + if (mb->m_ext.ref_cnt != NULL) { + *(mb->m_ext.ref_cnt) = 1; + mb->m_flags |= (M_EXT | flags); + mb->m_ext.ext_buf = buf; + mb->m_data = mb->m_ext.ext_buf; + mb->m_ext.ext_size = size; + mb->m_ext.ext_free = freef; + mb->m_ext.ext_args = args; + mb->m_ext.ext_type = type; + } +} + +/* + * Non-directly-exported function to clean up after mbufs with M_EXT + * storage attached to them if the reference count hits 1. + */ +void +#ifndef VBOX +mb_free_ext(struct mbuf *m) +#else +mb_free_ext(PNATState pData, struct mbuf *m) +#endif +{ + int skipmbuf; + + KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); + KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__)); + + + /* + * check if the header is embedded in the cluster + */ + skipmbuf = (m->m_flags & M_NOFREE); + + /* Free attached storage if this mbuf is the only reference to it. */ + if (*(m->m_ext.ref_cnt) == 1 || + atomic_fetchadd_int(m->m_ext.ref_cnt, (uint32_t)-1) == 1) { + switch (m->m_ext.ext_type) { + case EXT_PACKET: /* The packet zone is special. */ + if (*(m->m_ext.ref_cnt) == 0) + *(m->m_ext.ref_cnt) = 1; + uma_zfree(zone_pack, m); + return; /* Job done. */ + case EXT_CLUSTER: + uma_zfree(zone_clust, m->m_ext.ext_buf); + break; + case EXT_JUMBOP: + uma_zfree(zone_jumbop, m->m_ext.ext_buf); + break; + case EXT_JUMBO9: + uma_zfree(zone_jumbo9, m->m_ext.ext_buf); + break; + case EXT_JUMBO16: + uma_zfree(zone_jumbo16, m->m_ext.ext_buf); + break; + case EXT_SFBUF: + case EXT_NET_DRV: + case EXT_MOD_TYPE: + case EXT_DISPOSABLE: +#ifndef VBOX + /* This code is dead in VBOX port of BSD mbufs (probably will be used for EXT_SBUFS some day) + * @todo once bsd sbufs will be on trunk think about this code. + */ + *(m->m_ext.ref_cnt) = 0; + uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *, + m->m_ext.ref_cnt)); +#else + AssertMsgFailed(("unimplemented")); +#endif + RT_FALL_THRU(); + case EXT_EXTREF: + KASSERT(m->m_ext.ext_free != NULL, + ("%s: ext_free not set", __func__)); + (*(m->m_ext.ext_free))(m->m_ext.ext_buf, + m->m_ext.ext_args); + break; + default: + KASSERT(m->m_ext.ext_type == 0, + ("%s: unknown ext_type", __func__)); + } + } + if (skipmbuf) + return; + + /* + * Free this mbuf back to the mbuf zone with all m_ext + * information purged. + */ + m->m_ext.ext_buf = NULL; + m->m_ext.ext_free = NULL; + m->m_ext.ext_args = NULL; + m->m_ext.ref_cnt = NULL; + m->m_ext.ext_size = 0; + m->m_ext.ext_type = 0; + m->m_flags &= ~M_EXT; + uma_zfree(zone_mbuf, m); +} + +/* + * Attach the cluster from *m to *n, set up m_ext in *n + * and bump the refcount of the cluster. + */ +static void +mb_dupcl(struct mbuf *n, struct mbuf *m) +{ + KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); + KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__)); + KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); + + if (*(m->m_ext.ref_cnt) == 1) + *(m->m_ext.ref_cnt) += 1; + else + atomic_add_int(m->m_ext.ref_cnt, 1); + n->m_ext.ext_buf = m->m_ext.ext_buf; + n->m_ext.ext_free = m->m_ext.ext_free; + n->m_ext.ext_args = m->m_ext.ext_args; + n->m_ext.ext_size = m->m_ext.ext_size; + n->m_ext.ref_cnt = m->m_ext.ref_cnt; + n->m_ext.ext_type = m->m_ext.ext_type; + n->m_flags |= M_EXT; +} + +/* + * Clean up mbuf (chain) from any tags and packet headers. + * If "all" is set then the first mbuf in the chain will be + * cleaned too. + */ +void +m_demote(struct mbuf *m0, int all) +{ + struct mbuf *m; + + for (m = all ? m0 : m0->m_next; m != NULL; m = m->m_next) { + if (m->m_flags & M_PKTHDR) { + m_tag_delete_chain(m, NULL); + m->m_flags &= ~M_PKTHDR; + bzero(&m->m_pkthdr, sizeof(struct pkthdr)); + } + if (m->m_type == MT_HEADER) + m->m_type = MT_DATA; + if (m != m0 && m->m_nextpkt != NULL) + m->m_nextpkt = NULL; + m->m_flags = m->m_flags & (M_EXT|M_EOR|M_RDONLY|M_FREELIST); + } +} + +/* + * Sanity checks on mbuf (chain) for use in KASSERT() and general + * debugging. + * Returns 0 or panics when bad and 1 on all tests passed. + * Sanitize, 0 to run M_SANITY_ACTION, 1 to garble things so they + * blow up later. + */ +int +#ifndef VBOX +m_sanity(struct mbuf *m0, int sanitize) +#else +m_sanity(PNATState pData, struct mbuf *m0, int sanitize) +#endif +{ + struct mbuf *m; + caddr_t a, b; + int pktlen = 0; + +#ifdef INVARIANTS +#define M_SANITY_ACTION(s) panic("mbuf %p: " s, m) +#else +#define M_SANITY_ACTION(s) printf("mbuf %p: " s, m) +#endif + + for (m = m0; m != NULL; m = m->m_next) { + /* + * Basic pointer checks. If any of these fails then some + * unrelated kernel memory before or after us is trashed. + * No way to recover from that. + */ + a = ((m->m_flags & M_EXT) ? m->m_ext.ext_buf : + ((m->m_flags & M_PKTHDR) ? (caddr_t)(&m->m_pktdat) : + (caddr_t)(&m->m_dat)) ); + b = (caddr_t)(a + (m->m_flags & M_EXT ? m->m_ext.ext_size : + ((m->m_flags & M_PKTHDR) ? MHLEN : MLEN))); + if ((caddr_t)m->m_data < a) + M_SANITY_ACTION("m_data outside mbuf data range left"); + if ((caddr_t)m->m_data > b) + M_SANITY_ACTION("m_data outside mbuf data range right"); + if ((caddr_t)m->m_data + m->m_len > b) + M_SANITY_ACTION("m_data + m_len exeeds mbuf space"); + if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.header) { + if ((caddr_t)m->m_pkthdr.header < a || + (caddr_t)m->m_pkthdr.header > b) + M_SANITY_ACTION("m_pkthdr.header outside mbuf data range"); + } + + /* m->m_nextpkt may only be set on first mbuf in chain. */ + if (m != m0 && m->m_nextpkt != NULL) { + if (sanitize) { +#ifndef VBOX + m_freem(m->m_nextpkt); +#else + m_freem(pData, m->m_nextpkt); +#endif + m->m_nextpkt = (struct mbuf *)(uintptr_t)UINT32_C(0xDEADC0DE); + } else + M_SANITY_ACTION("m->m_nextpkt on in-chain mbuf"); + } + + /* packet length (not mbuf length!) calculation */ + if (m0->m_flags & M_PKTHDR) + pktlen += m->m_len; + + /* m_tags may only be attached to first mbuf in chain. */ + if (m != m0 && m->m_flags & M_PKTHDR && + !SLIST_EMPTY(&m->m_pkthdr.tags)) { + if (sanitize) { + m_tag_delete_chain(m, NULL); + /* put in 0xDEADC0DE perhaps? */ + } else + M_SANITY_ACTION("m_tags on in-chain mbuf"); + } + + /* M_PKTHDR may only be set on first mbuf in chain */ + if (m != m0 && m->m_flags & M_PKTHDR) { + if (sanitize) { + bzero(&m->m_pkthdr, sizeof(m->m_pkthdr)); + m->m_flags &= ~M_PKTHDR; + /* put in 0xDEADCODE and leave hdr flag in */ + } else + M_SANITY_ACTION("M_PKTHDR on in-chain mbuf"); + } + } + m = m0; + if (pktlen && pktlen != m->m_pkthdr.len) { + if (sanitize) + m->m_pkthdr.len = 0; + else + M_SANITY_ACTION("m_pkthdr.len != mbuf chain length"); + } + return 1; + +#undef M_SANITY_ACTION +} + + +/* + * "Move" mbuf pkthdr from "from" to "to". + * "from" must have M_PKTHDR set, and "to" must be empty. + */ +void +m_move_pkthdr(struct mbuf *to, struct mbuf *from) +{ + +#if 0 + /* see below for why these are not enabled */ + M_ASSERTPKTHDR(to); + /* Note: with MAC, this may not be a good assertion. */ + KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), + ("m_move_pkthdr: to has tags")); +#endif +#ifdef MAC + /* + * XXXMAC: It could be this should also occur for non-MAC? + */ + if (to->m_flags & M_PKTHDR) + m_tag_delete_chain(to, NULL); +#endif + to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); + if ((to->m_flags & M_EXT) == 0) + to->m_data = to->m_pktdat; + to->m_pkthdr = from->m_pkthdr; /* especially tags */ + SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */ + from->m_flags &= ~M_PKTHDR; +} + +/* + * Duplicate "from"'s mbuf pkthdr in "to". + * "from" must have M_PKTHDR set, and "to" must be empty. + * In particular, this does a deep copy of the packet tags. + */ +int +m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how) +{ + +#if 0 + /* + * The mbuf allocator only initializes the pkthdr + * when the mbuf is allocated with MGETHDR. Many users + * (e.g. m_copy*, m_prepend) use MGET and then + * smash the pkthdr as needed causing these + * assertions to trip. For now just disable them. + */ + M_ASSERTPKTHDR(to); + /* Note: with MAC, this may not be a good assertion. */ + KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags")); +#endif + MBUF_CHECKSLEEP(how); +#ifdef MAC + if (to->m_flags & M_PKTHDR) + m_tag_delete_chain(to, NULL); +#endif + to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); + if ((to->m_flags & M_EXT) == 0) + to->m_data = to->m_pktdat; + to->m_pkthdr = from->m_pkthdr; + SLIST_INIT(&to->m_pkthdr.tags); + return (m_tag_copy_chain(to, from, MBTOM(how))); +} + +/* + * Lesser-used path for M_PREPEND: + * allocate new mbuf to prepend to chain, + * copy junk along. + */ +struct mbuf * +#ifndef VBOX +m_prepend(struct mbuf *m, int len, int how) +#else +m_prepend(PNATState pData, struct mbuf *m, int len, int how) +#endif +{ + struct mbuf *mn; + + if (m->m_flags & M_PKTHDR) + MGETHDR(mn, how, m->m_type); + else + MGET(mn, how, m->m_type); + if (mn == NULL) { +#ifndef VBOX + m_freem(m); +#else + m_freem(pData, m); +#endif + return (NULL); + } + if (m->m_flags & M_PKTHDR) + M_MOVE_PKTHDR(mn, m); + mn->m_next = m; + m = mn; + if(m->m_flags & M_PKTHDR) { + if (len < MHLEN) + MH_ALIGN(m, len); + } else { + if (len < MLEN) + M_ALIGN(m, len); + } + m->m_len = len; + return (m); +} + +/* + * Make a copy of an mbuf chain starting "off0" bytes from the beginning, + * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. + * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller. + * Note that the copy is read-only, because clusters are not copied, + * only their reference counts are incremented. + */ +struct mbuf * +#ifndef VBOX +m_copym(struct mbuf *m, int off0, int len, int fWait) +#else +m_copym(PNATState pData, struct mbuf *m, int off0, int len, int fWait) +#endif +{ + struct mbuf *n, **np; + int off = off0; + struct mbuf *top; + int copyhdr = 0; + + KASSERT(off >= 0, ("m_copym, negative off %d", off)); + KASSERT(len >= 0, ("m_copym, negative len %d", len)); + MBUF_CHECKSLEEP(fWait); + if (off == 0 && m->m_flags & M_PKTHDR) + copyhdr = 1; + while (off > 0) { + KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + np = ⊤ + top = 0; + while (len > 0) { + if (m == NULL) { + KASSERT(len == M_COPYALL, + ("m_copym, length > size of mbuf chain")); + break; + } + if (copyhdr) + MGETHDR(n, fWait, m->m_type); + else + MGET(n, fWait, m->m_type); + *np = n; + if (n == NULL) + goto nospace; + if (copyhdr) { + if (!m_dup_pkthdr(n, m, fWait)) + goto nospace; + if (len == M_COPYALL) + n->m_pkthdr.len -= off0; + else + n->m_pkthdr.len = len; + copyhdr = 0; + } + n->m_len = min(len, m->m_len - off); + if (m->m_flags & M_EXT) { + n->m_data = m->m_data + off; + mb_dupcl(n, m); + } else + bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), + (u_int)n->m_len); + if (len != M_COPYALL) + len -= n->m_len; + off = 0; + m = m->m_next; + np = &n->m_next; + } + if (top == NULL) + mbstat.m_mcfail++; /* XXX: No consistency. */ + + return (top); +nospace: +#ifndef VBOX + m_freem(top); +#else + m_freem(pData, top); +#endif + mbstat.m_mcfail++; /* XXX: No consistency. */ + return (NULL); +} + +/* + * Returns mbuf chain with new head for the prepending case. + * Copies from mbuf (chain) n from off for len to mbuf (chain) m + * either prepending or appending the data. + * The resulting mbuf (chain) m is fully writeable. + * m is destination (is made writeable) + * n is source, off is offset in source, len is len from offset + * dir, 0 append, 1 prepend + * how, wait or nowait + */ + +static int +m_bcopyxxx(void *s, void *t, u_int len) +{ + bcopy(s, t, (size_t)len); + return 0; +} + +struct mbuf * +#ifndef VBOX +m_copymdata(struct mbuf *m, struct mbuf *n, int off, int len, + int prep, int how) +#else +m_copymdata(PNATState pData, struct mbuf *m, struct mbuf *n, int off, int len, + int prep, int how) +#endif +{ + struct mbuf *mm, *x, *z, *prev = NULL; + caddr_t p; + int i, nlen = 0; + caddr_t buf[MLEN]; + + KASSERT(m != NULL && n != NULL, ("m_copymdata, no target or source")); + KASSERT(off >= 0, ("m_copymdata, negative off %d", off)); + KASSERT(len >= 0, ("m_copymdata, negative len %d", len)); + KASSERT(prep == 0 || prep == 1, ("m_copymdata, unknown direction %d", prep)); + + mm = m; + if (!prep) { + while(mm->m_next) { + prev = mm; + mm = mm->m_next; + } + } + for (z = n; z != NULL; z = z->m_next) + nlen += z->m_len; + if (len == M_COPYALL) + len = nlen - off; + if (off + len > nlen || len < 1) + return NULL; + + if (!M_WRITABLE(mm)) { + /* XXX: Use proper m_xxx function instead. */ +#ifndef VBOX + x = m_getcl(how, MT_DATA, mm->m_flags); +#else + x = m_getcl(pData, how, MT_DATA, mm->m_flags); +#endif + if (x == NULL) + return NULL; + bcopy(mm->m_ext.ext_buf, x->m_ext.ext_buf, x->m_ext.ext_size); + p = x->m_ext.ext_buf + (mm->m_data - mm->m_ext.ext_buf); + x->m_data = p; + mm->m_next = NULL; + if (mm != m) + prev->m_next = x; +#ifndef VBOX + m_free(mm); +#else + m_free(pData, mm); +#endif + mm = x; + } + + /* + * Append/prepend the data. Allocating mbufs as necessary. + */ + /* Shortcut if enough free space in first/last mbuf. */ + if (!prep && M_TRAILINGSPACE(mm) >= len) { + m_apply(n, off, len, m_bcopyxxx, mtod(mm, caddr_t) + + mm->m_len); + mm->m_len += len; + mm->m_pkthdr.len += len; + return m; + } + if (prep && M_LEADINGSPACE(mm) >= len) { + mm->m_data = mtod(mm, caddr_t) - len; + m_apply(n, off, len, m_bcopyxxx, mtod(mm, caddr_t)); + mm->m_len += len; + mm->m_pkthdr.len += len; + return mm; + } + + /* Expand first/last mbuf to cluster if possible. */ + if (!prep && !(mm->m_flags & M_EXT) && len > M_TRAILINGSPACE(mm)) { + bcopy(mm->m_data, &buf, mm->m_len); +#ifndef VBOX + m_clget(mm, how); +#else + m_clget(pData, mm, how); +#endif + if (!(mm->m_flags & M_EXT)) + return NULL; + bcopy(&buf, mm->m_ext.ext_buf, mm->m_len); + mm->m_data = mm->m_ext.ext_buf; + mm->m_pkthdr.header = NULL; + } + if (prep && !(mm->m_flags & M_EXT) && len > M_LEADINGSPACE(mm)) { + bcopy(mm->m_data, &buf, mm->m_len); +#ifndef VBOX + m_clget(mm, how); +#else + m_clget(pData, mm, how); +#endif + if (!(mm->m_flags & M_EXT)) + return NULL; + bcopy(&buf, (caddr_t *)mm->m_ext.ext_buf + + mm->m_ext.ext_size - mm->m_len, mm->m_len); + mm->m_data = (caddr_t)mm->m_ext.ext_buf + + mm->m_ext.ext_size - mm->m_len; + mm->m_pkthdr.header = NULL; + } + + /* Append/prepend as many mbuf (clusters) as necessary to fit len. */ + if (!prep && len > M_TRAILINGSPACE(mm)) { + if (!m_getm(mm, len - M_TRAILINGSPACE(mm), how, MT_DATA)) + return NULL; + } + if (prep && len > M_LEADINGSPACE(mm)) { + if (!(z = m_getm(NULL, len - M_LEADINGSPACE(mm), how, MT_DATA))) + return NULL; + i = 0; + for (x = z; x != NULL; x = x->m_next) { + i += x->m_flags & M_EXT ? x->m_ext.ext_size : + (x->m_flags & M_PKTHDR ? MHLEN : MLEN); + if (!x->m_next) + break; + } + z->m_data += i - len; + m_move_pkthdr(mm, z); + x->m_next = mm; + mm = z; + } + + /* Seek to start position in source mbuf. Optimization for long chains. */ + while (off > 0) { + if (off < n->m_len) + break; + off -= n->m_len; + n = n->m_next; + } + + /* Copy data into target mbuf. */ + z = mm; + while (len > 0) { + KASSERT(z != NULL, ("m_copymdata, falling off target edge")); + i = M_TRAILINGSPACE(z); + m_apply(n, off, i, m_bcopyxxx, mtod(z, caddr_t) + z->m_len); + z->m_len += i; + /* fixup pkthdr.len if necessary */ + if ((prep ? mm : m)->m_flags & M_PKTHDR) + (prep ? mm : m)->m_pkthdr.len += i; + off += i; + len -= i; + z = z->m_next; + } + return (prep ? mm : m); +} + +/* + * Copy an entire packet, including header (which must be present). + * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. + * Note that the copy is read-only, because clusters are not copied, + * only their reference counts are incremented. + * Preserve alignment of the first mbuf so if the creator has left + * some room at the beginning (e.g. for inserting protocol headers) + * the copies still have the room available. + */ +struct mbuf * +#ifndef VBOX +m_copypacket(struct mbuf *m, int how) +#else +m_copypacket(PNATState pData, struct mbuf *m, int how) +#endif +{ + struct mbuf *top, *n, *o; + + MBUF_CHECKSLEEP(how); + MGET(n, how, m->m_type); + top = n; + if (n == NULL) + goto nospace; + + if (!m_dup_pkthdr(n, m, how)) + goto nospace; + n->m_len = m->m_len; + if (m->m_flags & M_EXT) { + n->m_data = m->m_data; + mb_dupcl(n, m); + } else { + n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); + bcopy(mtod(m, char *), mtod(n, char *), n->m_len); + } + + m = m->m_next; + while (m) { + MGET(o, how, m->m_type); + if (o == NULL) + goto nospace; + + n->m_next = o; + n = n->m_next; + + n->m_len = m->m_len; + if (m->m_flags & M_EXT) { + n->m_data = m->m_data; + mb_dupcl(n, m); + } else { + bcopy(mtod(m, char *), mtod(n, char *), n->m_len); + } + + m = m->m_next; + } + return top; +nospace: +#ifndef VBOX + m_freem(top); +#else + m_freem(pData, top); +#endif + mbstat.m_mcfail++; /* XXX: No consistency. */ + return (NULL); +} + +/* + * Copy data from an mbuf chain starting "off" bytes from the beginning, + * continuing for "len" bytes, into the indicated buffer. + */ +void +m_copydata(const struct mbuf *m, int off, int len, caddr_t cp) +{ + u_int count; + + KASSERT(off >= 0, ("m_copydata, negative off %d", off)); + KASSERT(len >= 0, ("m_copydata, negative len %d", len)); + while (off > 0) { + KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + while (len > 0) { + KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); + count = min(m->m_len - off, len); + bcopy(mtod(m, caddr_t) + off, cp, count); + len -= count; + cp += count; + off = 0; + m = m->m_next; + } +} + +/* + * Copy a packet header mbuf chain into a completely new chain, including + * copying any mbuf clusters. Use this instead of m_copypacket() when + * you need a writable copy of an mbuf chain. + */ +struct mbuf * +#ifndef VBOX +m_dup(struct mbuf *m, int how) +#else +m_dup(PNATState pData, struct mbuf *m, int how) +#endif +{ + struct mbuf **p, *top = NULL; + int remain, moff, nsize; + + MBUF_CHECKSLEEP(how); + /* Sanity check */ + if (m == NULL) + return (NULL); + M_ASSERTPKTHDR(m); + + /* While there's more data, get a new mbuf, tack it on, and fill it */ + remain = m->m_pkthdr.len; + moff = 0; + p = ⊤ + while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ + struct mbuf *n; + + /* Get the next new mbuf */ + if (remain >= MINCLSIZE) { +#ifndef VBOX + n = m_getcl(how, m->m_type, 0); +#else + n = m_getcl(pData, how, m->m_type, 0); +#endif + nsize = MCLBYTES; + } else { +#ifndef VBOX + n = m_get(how, m->m_type); +#else + n = m_get(pData, how, m->m_type); +#endif + nsize = MLEN; + } + if (n == NULL) + goto nospace; + + if (top == NULL) { /* First one, must be PKTHDR */ + if (!m_dup_pkthdr(n, m, how)) { +#ifndef VBOX + m_free(n); +#else + m_free(pData, n); +#endif + goto nospace; + } + if ((n->m_flags & M_EXT) == 0) + nsize = MHLEN; + } + n->m_len = 0; + + /* Link it into the new chain */ + *p = n; + p = &n->m_next; + + /* Copy data from original mbuf(s) into new mbuf */ + while (n->m_len < nsize && m != NULL) { + int chunk = min(nsize - n->m_len, m->m_len - moff); + + bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); + moff += chunk; + n->m_len += chunk; + remain -= chunk; + if (moff == m->m_len) { + m = m->m_next; + moff = 0; + } + } + + /* Check correct total mbuf length */ + KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), + ("%s: bogus m_pkthdr.len", __func__)); + } + return (top); + +nospace: +#ifndef VBOX + m_freem(top); +#else + m_freem(pData, top); +#endif + mbstat.m_mcfail++; /* XXX: No consistency. */ + return (NULL); +} + +/* + * Concatenate mbuf chain n to m. + * Both chains must be of the same type (e.g. MT_DATA). + * Any m_pkthdr is not updated. + */ +void +#ifndef VBOX +m_cat(struct mbuf *m, struct mbuf *n) +#else +m_cat(PNATState pData, struct mbuf *m, struct mbuf *n) +#endif +{ + while (m->m_next) + m = m->m_next; + while (n) { + if (m->m_flags & M_EXT || + m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { + /* just join the two chains */ + m->m_next = n; + return; + } + /* splat the data from one into the other */ + bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, + (u_int)n->m_len); + m->m_len += n->m_len; +#ifndef VBOX + n = m_free(n); +#else + n = m_free(pData, n); +#endif + } +} + +void +#ifndef VBOX +m_adj(struct mbuf *mp, int req_len) +#else +m_adj(PNATState pData, struct mbuf *mp, int req_len) +#endif +{ + int len = req_len; + struct mbuf *m; + int count; + + if ((m = mp) == NULL) + return; + if (len >= 0) { + /* + * Trim from head. + */ + while (m != NULL && len > 0) { + if (m->m_len <= len) { + len -= m->m_len; + m->m_len = 0; + m = m->m_next; + } else { + m->m_len -= len; + m->m_data += len; + len = 0; + } + } + m = mp; + if (mp->m_flags & M_PKTHDR) + m->m_pkthdr.len -= (req_len - len); + } else { + /* + * Trim from tail. Scan the mbuf chain, + * calculating its length and finding the last mbuf. + * If the adjustment only affects this mbuf, then just + * adjust and return. Otherwise, rescan and truncate + * after the remaining size. + */ + len = -len; + count = 0; + for (;;) { + count += m->m_len; + if (m->m_next == (struct mbuf *)0) + break; + m = m->m_next; + } + if (m->m_len > len || (m->m_len == len && m == mp)) { + m->m_len -= len; + if (mp->m_flags & M_PKTHDR) + mp->m_pkthdr.len -= len; + return; + } + count -= len; + if (count < 0) + count = 0; + /* + * Correct length for chain is "count". + * Find the mbuf with last data, adjust its length, + * and toss data from remaining mbufs on chain. + */ + m = mp; + if (m->m_flags & M_PKTHDR) + m->m_pkthdr.len = count; + for (; m; m = m->m_next) { + if (m->m_len >= count) { + m->m_len = count; + if (m->m_next != NULL) { +#ifndef VBOX + m_freem(m->m_next); +#else + m_freem(pData, m->m_next); +#endif + m->m_next = NULL; + } + break; + } + count -= m->m_len; + } + } +} + +/* + * Rearange an mbuf chain so that len bytes are contiguous + * and in the data area of an mbuf (so that mtod and dtom + * will work for a structure of size len). Returns the resulting + * mbuf chain on success, frees it and returns null on failure. + * If there is room, it will add up to max_protohdr-len extra bytes to the + * contiguous region in an attempt to avoid being called next time. + */ +struct mbuf * +#ifndef VBOX +m_pullup(struct mbuf *n, int len) +#else +m_pullup(PNATState pData, struct mbuf *n, int len) +#endif +{ + struct mbuf *m; + int count; + int space; + + /* + * If first mbuf has no cluster, and has room for len bytes + * without shifting current data, pullup into it, + * otherwise allocate a new mbuf to prepend to the chain. + */ + if ((n->m_flags & M_EXT) == 0 && + n->m_data + len < &n->m_dat[MLEN] && n->m_next) { + if (n->m_len >= len) + return (n); + m = n; + n = n->m_next; + len -= m->m_len; + } else { + if (len > MHLEN) + goto bad; + MGET(m, M_DONTWAIT, n->m_type); + if (m == NULL) + goto bad; + m->m_len = 0; + if (n->m_flags & M_PKTHDR) + M_MOVE_PKTHDR(m, n); + } + space = &m->m_dat[MLEN] - (m->m_data + m->m_len); + do { + count = min(min(max(len, max_protohdr), space), n->m_len); + bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, + (u_int)count); + len -= count; + m->m_len += count; + n->m_len -= count; + space -= count; + if (n->m_len) + n->m_data += count; + else +#ifndef VBOX + n = m_free(n); +#else + n = m_free(pData, n); +#endif + } while (len > 0 && n); + if (len > 0) { +#ifndef VBOX + (void) m_free(m); +#else + (void) m_free(pData, m); +#endif + goto bad; + } + m->m_next = n; + return (m); +bad: +#ifndef VBOX + m_freem(n); +#else + m_freem(pData, n); +#endif + mbstat.m_mpfail++; /* XXX: No consistency. */ + return (NULL); +} + +/* + * Like m_pullup(), except a new mbuf is always allocated, and we allow + * the amount of empty space before the data in the new mbuf to be specified + * (in the event that the caller expects to prepend later). + */ +int MSFail; + +struct mbuf * +#ifndef VBOX +m_copyup(struct mbuf *n, int len, int dstoff) +#else +m_copyup(PNATState pData, struct mbuf *n, int len, int dstoff) +#endif +{ + struct mbuf *m; + int count, space; + + if (len > (int)(MHLEN - dstoff)) + goto bad; + MGET(m, M_DONTWAIT, n->m_type); + if (m == NULL) + goto bad; + m->m_len = 0; + if (n->m_flags & M_PKTHDR) + M_MOVE_PKTHDR(m, n); + m->m_data += dstoff; + space = &m->m_dat[MLEN] - (m->m_data + m->m_len); + do { + count = min(min(max(len, max_protohdr), space), n->m_len); + memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), + (unsigned)count); + len -= count; + m->m_len += count; + n->m_len -= count; + space -= count; + if (n->m_len) + n->m_data += count; + else +#ifndef VBOX + n = m_free(n); +#else + n = m_free(pData, n); +#endif + } while (len > 0 && n); + if (len > 0) { +#ifndef VBOX + (void) m_free(m); +#else + (void) m_free(pData, m); +#endif + goto bad; + } + m->m_next = n; + return (m); + bad: +#ifndef VBOX + m_freem(n); +#else + m_freem(pData, n); +#endif + MSFail++; + return (NULL); +} + +/* + * Partition an mbuf chain in two pieces, returning the tail -- + * all but the first len0 bytes. In case of failure, it returns NULL and + * attempts to restore the chain to its original state. + * + * Note that the resulting mbufs might be read-only, because the new + * mbuf can end up sharing an mbuf cluster with the original mbuf if + * the "breaking point" happens to lie within a cluster mbuf. Use the + * M_WRITABLE() macro to check for this case. + */ +struct mbuf * +#ifndef VBOX +m_split(struct mbuf *m0, int len0, int fWait) +#else +m_split(PNATState pData, struct mbuf *m0, int len0, int fWait) +#endif +{ + struct mbuf *m, *n; + u_int len = len0, remain; + + MBUF_CHECKSLEEP(fWait); + for (m = m0; m && len > m->m_len; m = m->m_next) + len -= m->m_len; + if (m == NULL) + return (NULL); + remain = m->m_len - len; + if (m0->m_flags & M_PKTHDR) { + MGETHDR(n, fWait, m0->m_type); + if (n == NULL) + return (NULL); + n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; + n->m_pkthdr.len = m0->m_pkthdr.len - len0; + m0->m_pkthdr.len = len0; + if (m->m_flags & M_EXT) + goto extpacket; + if (remain > MHLEN) { + /* m can't be the lead packet */ + MH_ALIGN(n, 0); +#ifndef VBOX + n->m_next = m_split(m, len, fWait); +#else + n->m_next = m_split(pData, m, len, fWait); +#endif + if (n->m_next == NULL) { +#ifndef VBOX + (void) m_free(n); +#else + (void) m_free(pData, n); +#endif + return (NULL); + } else { + n->m_len = 0; + return (n); + } + } else + MH_ALIGN(n, remain); + } else if (remain == 0) { + n = m->m_next; + m->m_next = NULL; + return (n); + } else { + MGET(n, fWait, m->m_type); + if (n == NULL) + return (NULL); + M_ALIGN(n, remain); + } +extpacket: + if (m->m_flags & M_EXT) { + n->m_data = m->m_data + len; + mb_dupcl(n, m); + } else { + bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); + } + n->m_len = remain; + m->m_len = len; + n->m_next = m->m_next; + m->m_next = NULL; + return (n); +} +/* + * Routine to copy from device local memory into mbufs. + * Note that `off' argument is offset into first mbuf of target chain from + * which to begin copying the data to. + */ +#ifndef VBOX +struct mbuf * +m_devget(char *buf, int totlen, int off, struct ifnet *ifp, + void (*copy)(char *from, caddr_t to, u_int len)) +{ + struct mbuf *m; + struct mbuf *top = NULL, **mp = ⊤ + int len; + + if (off < 0 || off > MHLEN) + return (NULL); + + while (totlen > 0) { + if (top == NULL) { /* First one, must be PKTHDR */ + if (totlen + off >= MINCLSIZE) { + m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); + len = MCLBYTES; + } else { + m = m_gethdr(M_DONTWAIT, MT_DATA); + len = MHLEN; + + /* Place initial small packet/header at end of mbuf */ + if (m && totlen + off + max_linkhdr <= MLEN) { + m->m_data += max_linkhdr; + len -= max_linkhdr; + } + } + if (m == NULL) + return NULL; + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.len = totlen; + } else { + if (totlen + off >= MINCLSIZE) { + m = m_getcl(M_DONTWAIT, MT_DATA, 0); + len = MCLBYTES; + } else { + m = m_get(M_DONTWAIT, MT_DATA); + len = MLEN; + } + if (m == NULL) { + m_freem(top); + return NULL; + } + } + if (off) { + m->m_data += off; + len -= off; + off = 0; + } + m->m_len = len = min(totlen, len); + if (copy) + copy(buf, mtod(m, caddr_t), (u_int)len); + else + bcopy(buf, mtod(m, caddr_t), (u_int)len); + buf += len; + *mp = m; + mp = &m->m_next; + totlen -= len; + } + return (top); +} +#endif + +/* + * Copy data from a buffer back into the indicated mbuf chain, + * starting "off" bytes from the beginning, extending the mbuf + * chain if necessary. + */ +void +#ifndef VBOX +m_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp) +#else +m_copyback(PNATState pData, struct mbuf *m0, int off, int len, c_caddr_t cp) +#endif +{ + int mlen; + struct mbuf *m = m0, *n; + int totlen = 0; + + if (m0 == NULL) + return; + while (off > (mlen = m->m_len)) { + off -= mlen; + totlen += mlen; + if (m->m_next == NULL) { +#ifndef VBOX + n = m_get(M_DONTWAIT, m->m_type); +#else + n = m_get(pData, M_DONTWAIT, m->m_type); +#endif + if (n == NULL) + goto out; + bzero(mtod(n, caddr_t), MLEN); + n->m_len = min(MLEN, len + off); + m->m_next = n; + } + m = m->m_next; + } + while (len > 0) { + if (m->m_next == NULL && (len > m->m_len - off)) { + m->m_len += min(len - (m->m_len - off), + M_TRAILINGSPACE(m)); + } + mlen = min (m->m_len - off, len); + bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen); + cp += mlen; + len -= mlen; + mlen += off; + off = 0; + totlen += mlen; + if (len == 0) + break; + if (m->m_next == NULL) { +#ifndef VBOX + n = m_get(M_DONTWAIT, m->m_type); +#else + n = m_get(pData, M_DONTWAIT, m->m_type); +#endif + if (n == NULL) + break; + n->m_len = min(MLEN, len); + m->m_next = n; + } + m = m->m_next; + } +out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) + m->m_pkthdr.len = totlen; +} + +/* + * Append the specified data to the indicated mbuf chain, + * Extend the mbuf chain if the new data does not fit in + * existing space. + * + * Return 1 if able to complete the job; otherwise 0. + */ +int +#ifndef VBOX +m_append(struct mbuf *m0, int len, c_caddr_t cp) +#else +m_append(PNATState pData, struct mbuf *m0, int len, c_caddr_t cp) +#endif +{ + struct mbuf *m, *n; + int remainder, space; + + for (m = m0; m->m_next != NULL; m = m->m_next) + ; + remainder = len; + space = M_TRAILINGSPACE(m); + if (space > 0) { + /* + * Copy into available space. + */ + if (space > remainder) + space = remainder; + bcopy(cp, mtod(m, caddr_t) + m->m_len, space); + m->m_len += space; + cp += space, remainder -= space; + } + while (remainder > 0) { + /* + * Allocate a new mbuf; could check space + * and allocate a cluster instead. + */ +#ifndef VBOX + n = m_get(M_DONTWAIT, m->m_type); +#else + n = m_get(pData, M_DONTWAIT, m->m_type); +#endif + if (n == NULL) + break; + n->m_len = min(MLEN, remainder); + bcopy(cp, mtod(n, caddr_t), n->m_len); + cp += n->m_len, remainder -= n->m_len; + m->m_next = n; + m = n; + } + if (m0->m_flags & M_PKTHDR) + m0->m_pkthdr.len += len - remainder; + return (remainder == 0); +} + +/* + * Apply function f to the data in an mbuf chain starting "off" bytes from + * the beginning, continuing for "len" bytes. + */ +int +m_apply(struct mbuf *m, int off, int len, + int (*f)(void *, void *, u_int), void *arg) +{ + u_int count; + int rval; + + KASSERT(off >= 0, ("m_apply, negative off %d", off)); + KASSERT(len >= 0, ("m_apply, negative len %d", len)); + while (off > 0) { + KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain")); + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + while (len > 0) { + KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain")); + count = min(m->m_len - off, len); + rval = (*f)(arg, mtod(m, caddr_t) + off, count); + if (rval) + return (rval); + len -= count; + off = 0; + m = m->m_next; + } + return (0); +} + +/* + * Return a pointer to mbuf/offset of location in mbuf chain. + */ +struct mbuf * +m_getptr(struct mbuf *m, int loc, int *off) +{ + + while (loc >= 0) { + /* Normal end of search. */ + if (m->m_len > loc) { + *off = loc; + return (m); + } else { + loc -= m->m_len; + if (m->m_next == NULL) { + if (loc == 0) { + /* Point at the end of valid data. */ + *off = m->m_len; + return (m); + } + return (NULL); + } + m = m->m_next; + } + } + return (NULL); +} + +void +m_print(const struct mbuf *m, int maxlen) +{ + int len; + int pdata; + const struct mbuf *m2; + + if (m->m_flags & M_PKTHDR) + len = m->m_pkthdr.len; + else + len = -1; + m2 = m; + while (m2 != NULL && (len == -1 || len)) { + pdata = m2->m_len; + if (maxlen != -1 && pdata > maxlen) + pdata = maxlen; + printf("mbuf: %p len: %d, next: %p, %b%s", m2, m2->m_len, + m2->m_next, m2->m_flags, "\20\20freelist\17skipfw" + "\11proto5\10proto4\7proto3\6proto2\5proto1\4rdonly" + "\3eor\2pkthdr\1ext", pdata ? "" : "\n"); + if (pdata) + printf(", %*D\n", pdata, (u_char *)m2->m_data, "-"); + if (len != -1) + len -= m2->m_len; + m2 = m2->m_next; + } + if (len > 0) + printf("%d bytes unaccounted for.\n", len); + return; +} + +u_int +m_fixhdr(struct mbuf *m0) +{ + u_int len; + + len = m_length(m0, NULL); + m0->m_pkthdr.len = len; + return (len); +} + +u_int +m_length(struct mbuf *m0, struct mbuf **last) +{ + struct mbuf *m; + u_int len; + + len = 0; + for (m = m0; m != NULL; m = m->m_next) { + len += m->m_len; + if (m->m_next == NULL) + break; + } + if (last != NULL) + *last = m; + return (len); +} + +/* + * Defragment a mbuf chain, returning the shortest possible + * chain of mbufs and clusters. If allocation fails and + * this cannot be completed, NULL will be returned, but + * the passed in chain will be unchanged. Upon success, + * the original chain will be freed, and the new chain + * will be returned. + * + * If a non-packet header is passed in, the original + * mbuf (chain?) will be returned unharmed. + */ +struct mbuf * +#ifndef VBOX +m_defrag(struct mbuf *m0, int how) +#else +m_defrag(PNATState pData, struct mbuf *m0, int how) +#endif +{ + struct mbuf *m_new = NULL, *m_final = NULL; + int progress = 0, length; + + MBUF_CHECKSLEEP(how); + if (!(m0->m_flags & M_PKTHDR)) + return (m0); + + m_fixhdr(m0); /* Needed sanity check */ + +#ifdef MBUF_STRESS_TEST + if (m_defragrandomfailures) { + int temp = arc4random() & 0xff; + if (temp == 0xba) + goto nospace; + } +#endif + + if (m0->m_pkthdr.len > MHLEN) +#ifndef VBOX + m_final = m_getcl(how, MT_DATA, M_PKTHDR); +#else + m_final = m_getcl(pData, how, MT_DATA, M_PKTHDR); +#endif + else +#ifndef VBOX + m_final = m_gethdr(how, MT_DATA); +#else + m_final = m_gethdr(pData, how, MT_DATA); +#endif + + if (m_final == NULL) + goto nospace; + + if (m_dup_pkthdr(m_final, m0, how) == 0) + goto nospace; + + m_new = m_final; + + while (progress < m0->m_pkthdr.len) { + length = m0->m_pkthdr.len - progress; + if (length > MCLBYTES) + length = MCLBYTES; + + if (m_new == NULL) { + if (length > MLEN) +#ifndef VBOX + m_new = m_getcl(how, MT_DATA, 0); +#else + m_new = m_getcl(pData, how, MT_DATA, 0); +#endif + else +#ifndef VBOX + m_new = m_get(how, MT_DATA); +#else + m_new = m_get(pData, how, MT_DATA); +#endif + if (m_new == NULL) + goto nospace; + } + + m_copydata(m0, progress, length, mtod(m_new, caddr_t)); + progress += length; + m_new->m_len = length; + if (m_new != m_final) +#ifndef VBOX + m_cat(m_final, m_new); +#else + m_cat(pData, m_final, m_new); +#endif + m_new = NULL; + } +#ifdef MBUF_STRESS_TEST + if (m0->m_next == NULL) + m_defraguseless++; +#endif +#ifndef VBOX + m_freem(m0); +#else + m_freem(pData, m0); +#endif + m0 = m_final; +#ifdef MBUF_STRESS_TEST + m_defragpackets++; + m_defragbytes += m0->m_pkthdr.len; +#endif + return (m0); +nospace: +#ifdef MBUF_STRESS_TEST + m_defragfailure++; +#endif + if (m_final) +#ifndef VBOX + m_freem(m_final); +#else + m_freem(pData, m_final); +#endif + return (NULL); +} + +/* + * Defragment an mbuf chain, returning at most maxfrags separate + * mbufs+clusters. If this is not possible NULL is returned and + * the original mbuf chain is left in it's present (potentially + * modified) state. We use two techniques: collapsing consecutive + * mbufs and replacing consecutive mbufs by a cluster. + * + * NB: this should really be named m_defrag but that name is taken + */ +struct mbuf * +#ifndef VBOX +m_collapse(struct mbuf *m0, int how, int maxfrags) +#else +m_collapse(PNATState pData, struct mbuf *m0, int how, int maxfrags) +#endif +{ + struct mbuf *m, *n, *n2, **prev; + u_int curfrags; + + /* + * Calculate the current number of frags. + */ + curfrags = 0; + for (m = m0; m != NULL; m = m->m_next) + curfrags++; + /* + * First, try to collapse mbufs. Note that we always collapse + * towards the front so we don't need to deal with moving the + * pkthdr. This may be suboptimal if the first mbuf has much + * less data than the following. + */ + m = m0; +again: + for (;;) { + n = m->m_next; + if (n == NULL) + break; + if ((m->m_flags & M_RDONLY) == 0 && + n->m_len < M_TRAILINGSPACE(m)) { + bcopy(mtod(n, void *), mtod(m, char *) + m->m_len, + n->m_len); + m->m_len += n->m_len; + m->m_next = n->m_next; +#ifndef VBOX + m_free(n); +#else + m_free(pData, n); +#endif + if (--curfrags <= maxfrags) + return m0; + } else + m = n; + } + KASSERT(maxfrags > 1, + ("maxfrags %u, but normal collapse failed", maxfrags)); + /* + * Collapse consecutive mbufs to a cluster. + */ + prev = &m0->m_next; /* NB: not the first mbuf */ + while ((n = *prev) != NULL) { + if ((n2 = n->m_next) != NULL && + n->m_len + n2->m_len < MCLBYTES) { +#ifndef VBOX + m = m_getcl(how, MT_DATA, 0); +#else + m = m_getcl(pData, how, MT_DATA, 0); +#endif + if (m == NULL) + goto bad; + bcopy(mtod(n, void *), mtod(m, void *), n->m_len); + bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len, + n2->m_len); + m->m_len = n->m_len + n2->m_len; + m->m_next = n2->m_next; + *prev = m; +#ifndef VBOX + m_free(n); + m_free(n2); +#else + m_free(pData, n); + m_free(pData, n2); +#endif + if (--curfrags <= maxfrags) /* +1 cl -2 mbufs */ + return m0; + /* + * Still not there, try the normal collapse + * again before we allocate another cluster. + */ + goto again; + } + prev = &n->m_next; + } + /* + * No place where we can collapse to a cluster; punt. + * This can occur if, for example, you request 2 frags + * but the packet requires that both be clusters (we + * never reallocate the first mbuf to avoid moving the + * packet header). + */ +bad: + return NULL; +} + +#ifdef MBUF_STRESS_TEST + +/* + * Fragment an mbuf chain. There's no reason you'd ever want to do + * this in normal usage, but it's great for stress testing various + * mbuf consumers. + * + * If fragmentation is not possible, the original chain will be + * returned. + * + * Possible length values: + * 0 no fragmentation will occur + * > 0 each fragment will be of the specified length + * -1 each fragment will be the same random value in length + * -2 each fragment's length will be entirely random + * (Random values range from 1 to 256) + */ +struct mbuf * +m_fragment(struct mbuf *m0, int how, int length) +{ + struct mbuf *m_new = NULL, *m_final = NULL; + int progress = 0; + + if (!(m0->m_flags & M_PKTHDR)) + return (m0); + + if ((length == 0) || (length < -2)) + return (m0); + + m_fixhdr(m0); /* Needed sanity check */ + + m_final = m_getcl(how, MT_DATA, M_PKTHDR); + + if (m_final == NULL) + goto nospace; + + if (m_dup_pkthdr(m_final, m0, how) == 0) + goto nospace; + + m_new = m_final; + + if (length == -1) + length = 1 + (arc4random() & 255); + + while (progress < m0->m_pkthdr.len) { + int fraglen; + + if (length > 0) + fraglen = length; + else + fraglen = 1 + (arc4random() & 255); + if (fraglen > m0->m_pkthdr.len - progress) + fraglen = m0->m_pkthdr.len - progress; + + if (fraglen > MCLBYTES) + fraglen = MCLBYTES; + + if (m_new == NULL) { + m_new = m_getcl(how, MT_DATA, 0); + if (m_new == NULL) + goto nospace; + } + + m_copydata(m0, progress, fraglen, mtod(m_new, caddr_t)); + progress += fraglen; + m_new->m_len = fraglen; + if (m_new != m_final) + m_cat(m_final, m_new); + m_new = NULL; + } + m_freem(m0); + m0 = m_final; + return (m0); +nospace: + if (m_final) + m_freem(m_final); + /* Return the original chain on failure */ + return (m0); +} + +#endif + +/* + * Copy the contents of uio into a properly sized mbuf chain. + */ +#ifndef VBOX +struct mbuf * +m_uiotombuf(struct uio *uio, int how, int len, int align, int flags) +{ + struct mbuf *m, *mb; + int error, length, total; + int progress = 0; + + /* + * len can be zero or an arbitrary large value bound by + * the total data supplied by the uio. + */ + if (len > 0) + total = min(uio->uio_resid, len); + else + total = uio->uio_resid; + + /* + * The smallest unit returned by m_getm2() is a single mbuf + * with pkthdr. We can't align past it. + */ + if (align >= MHLEN) + return (NULL); + + /* + * Give us the full allocation or nothing. + * If len is zero return the smallest empty mbuf. + */ + m = m_getm2(NULL, max(total + align, 1), how, MT_DATA, flags); + if (m == NULL) + return (NULL); + m->m_data += align; + + /* Fill all mbufs with uio data and update header information. */ + for (mb = m; mb != NULL; mb = mb->m_next) { + length = min(M_TRAILINGSPACE(mb), total - progress); + + error = uiomove(mtod(mb, void *), length, uio); + if (error) { + m_freem(m); + return (NULL); + } + + mb->m_len = length; + progress += length; + if (flags & M_PKTHDR) + m->m_pkthdr.len += length; + } + KASSERT(progress == total, ("%s: progress != total", __func__)); + + return (m); +} +#endif + +/* + * Set the m_data pointer of a newly-allocated mbuf + * to place an object of the specified size at the + * end of the mbuf, longword aligned. + */ +void +m_align(struct mbuf *m, int len) +{ + int adjust; + + if (m->m_flags & M_EXT) + adjust = m->m_ext.ext_size - len; + else if (m->m_flags & M_PKTHDR) + adjust = MHLEN - len; + else + adjust = MLEN - len; + m->m_data += adjust &~ (sizeof(long)-1); +} + +/* + * Create a writable copy of the mbuf chain. While doing this + * we compact the chain with a goal of producing a chain with + * at most two mbufs. The second mbuf in this chain is likely + * to be a cluster. The primary purpose of this work is to create + * a writable packet for encryption, compression, etc. The + * secondary goal is to linearize the data so the data can be + * passed to crypto hardware in the most efficient manner possible. + */ +struct mbuf * +#ifndef VBOX +m_unshare(struct mbuf *m0, int how) +#else +m_unshare(PNATState pData, struct mbuf *m0, int how) +#endif +{ + struct mbuf *m, *mprev; + struct mbuf *n, *mfirst, *mlast; + int len, off; + + mprev = NULL; + for (m = m0; m != NULL; m = mprev->m_next) { + /* + * Regular mbufs are ignored unless there's a cluster + * in front of it that we can use to coalesce. We do + * the latter mainly so later clusters can be coalesced + * also w/o having to handle them specially (i.e. convert + * mbuf+cluster -> cluster). This optimization is heavily + * influenced by the assumption that we're running over + * Ethernet where MCLBYTES is large enough that the max + * packet size will permit lots of coalescing into a + * single cluster. This in turn permits efficient + * crypto operations, especially when using hardware. + */ + if ((m->m_flags & M_EXT) == 0) { + if (mprev && (mprev->m_flags & M_EXT) && + m->m_len <= M_TRAILINGSPACE(mprev)) { + /* XXX: this ignores mbuf types */ + memcpy(mtod(mprev, caddr_t) + mprev->m_len, + mtod(m, caddr_t), m->m_len); + mprev->m_len += m->m_len; + mprev->m_next = m->m_next; /* unlink from chain */ +#ifndef VBOX + m_free(m); /* reclaim mbuf */ +#else + m_free(pData, m); /* reclaim mbuf */ +#endif +#if 0 + newipsecstat.ips_mbcoalesced++; +#endif + } else { + mprev = m; + } + continue; + } + /* + * Writable mbufs are left alone (for now). + */ + if (M_WRITABLE(m)) { + mprev = m; + continue; + } + + /* + * Not writable, replace with a copy or coalesce with + * the previous mbuf if possible (since we have to copy + * it anyway, we try to reduce the number of mbufs and + * clusters so that future work is easier). + */ + KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags)); + /* NB: we only coalesce into a cluster or larger */ + if (mprev != NULL && (mprev->m_flags & M_EXT) && + m->m_len <= M_TRAILINGSPACE(mprev)) { + /* XXX: this ignores mbuf types */ + memcpy(mtod(mprev, caddr_t) + mprev->m_len, + mtod(m, caddr_t), m->m_len); + mprev->m_len += m->m_len; + mprev->m_next = m->m_next; /* unlink from chain */ +#ifndef VBOX + m_free(m); /* reclaim mbuf */ +#else + m_free(pData, m); /* reclaim mbuf */ +#endif +#if 0 + newipsecstat.ips_clcoalesced++; +#endif + continue; + } + + /* + * Allocate new space to hold the copy... + */ + /* XXX why can M_PKTHDR be set past the first mbuf? */ + if (mprev == NULL && (m->m_flags & M_PKTHDR)) { + /* + * NB: if a packet header is present we must + * allocate the mbuf separately from any cluster + * because M_MOVE_PKTHDR will smash the data + * pointer and drop the M_EXT marker. + */ + MGETHDR(n, how, m->m_type); + if (n == NULL) { +#ifndef VBOX + m_freem(m0); +#else + m_freem(pData, m0); +#endif + return (NULL); + } + M_MOVE_PKTHDR(n, m); + MCLGET(n, how); + if ((n->m_flags & M_EXT) == 0) { +#ifndef VBOX + m_free(n); + m_freem(m0); +#else + m_free(pData, n); + m_freem(pData, m0); +#endif + return (NULL); + } + } else { +#ifndef VBOX + n = m_getcl(how, m->m_type, m->m_flags); +#else + n = m_getcl(pData, how, m->m_type, m->m_flags); +#endif + if (n == NULL) { +#ifndef VBOX + m_freem(m0); +#else + m_freem(pData, m0); +#endif + return (NULL); + } + } + /* + * ... and copy the data. We deal with jumbo mbufs + * (i.e. m_len > MCLBYTES) by splitting them into + * clusters. We could just malloc a buffer and make + * it external but too many device drivers don't know + * how to break up the non-contiguous memory when + * doing DMA. + */ + len = m->m_len; + off = 0; + mfirst = n; + mlast = NULL; + for (;;) { + int cc = min(len, MCLBYTES); + memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc); + n->m_len = cc; + if (mlast != NULL) + mlast->m_next = n; + mlast = n; +#if 0 + newipsecstat.ips_clcopied++; +#endif + + len -= cc; + if (len <= 0) + break; + off += cc; + +#ifndef VBOX + n = m_getcl(how, m->m_type, m->m_flags); +#else + n = m_getcl(pData, how, m->m_type, m->m_flags); +#endif + if (n == NULL) { +#ifndef VBOX + m_freem(mfirst); + m_freem(m0); +#else + m_freem(pData, mfirst); + m_freem(pData, m0); +#endif + return (NULL); + } + } + n->m_next = m->m_next; + if (mprev == NULL) + m0 = mfirst; /* new head of chain */ + else + mprev->m_next = mfirst; /* replace old mbuf */ +#ifndef VBOX + m_free(m); /* release old mbuf */ +#else + m_free(pData, m); /* release old mbuf */ +#endif + mprev = mfirst; + } + return (m0); +} diff --git a/src/VBox/Devices/Network/slirp/bsd/kern/uipc_mbuf2.c b/src/VBox/Devices/Network/slirp/bsd/kern/uipc_mbuf2.c new file mode 100644 index 00000000..5f3abde8 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/kern/uipc_mbuf2.c @@ -0,0 +1,539 @@ +/* $KAME: uipc_mbuf2.c,v 1.31 2001/11/28 11:08:53 itojun Exp $ */ +/* $NetBSD: uipc_mbuf.c,v 1.40 1999/04/01 00:23:25 thorpej Exp $ */ + +/*- + * Copyright (C) 1999 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/*- + * Copyright (c) 1982, 1986, 1988, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 + */ +#ifndef VBOX +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/kern/uipc_mbuf2.c,v 1.33.8.1 2009/04/15 03:14:26 kensmith Exp $"); + +/*#define PULLDOWN_DEBUG*/ + +#include "opt_mac.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/mutex.h> + +#include <security/mac/mac_framework.h> + +static MALLOC_DEFINE(M_PACKET_TAGS, MBUF_TAG_MEM_NAME, + "packet-attached information"); +#else +# include "slirp.h" +#endif + +/* can't call it m_dup(), as freebsd[34] uses m_dup() with different arg */ +#ifndef VBOX +static struct mbuf *m_dup1(struct mbuf *, int, int, int); +#else +static struct mbuf *m_dup1(PNATState, struct mbuf *, int, int, int); +#endif + +/* + * ensure that [off, off + len) is contiguous on the mbuf chain "m". + * packet chain before "off" is kept untouched. + * if offp == NULL, the target will start at <retval, 0> on resulting chain. + * if offp != NULL, the target will start at <retval, *offp> on resulting chain. + * + * on error return (NULL return value), original "m" will be freed. + * + * XXX: M_TRAILINGSPACE/M_LEADINGSPACE only permitted on writable ext_buf. + */ +struct mbuf * +#ifndef VBOX +m_pulldown(struct mbuf *m, int off, int len, int *offp) +#else +m_pulldown(PNATState pData, struct mbuf *m, int off, int len, int *offp) +#endif +{ + struct mbuf *n, *o; + int hlen, tlen, olen; + int writable; + + /* check invalid arguments. */ + if (m == NULL) + panic("m == NULL in m_pulldown()"); + if (len > MCLBYTES) { +#ifndef VBOX + m_freem(m); +#else + m_freem(pData, m); +#endif + return NULL; /* impossible */ + } + +#ifdef PULLDOWN_DEBUG + { + struct mbuf *t; + printf("before:"); + for (t = m; t; t = t->m_next) + printf(" %d", t->m_len); + printf("\n"); + } +#endif + n = m; + while (n != NULL && off > 0) { + if (n->m_len > off) + break; + off -= n->m_len; + n = n->m_next; + } + /* be sure to point non-empty mbuf */ + while (n != NULL && n->m_len == 0) + n = n->m_next; + if (!n) { +#ifndef VBOX + m_freem(m); +#else + m_freem(pData, m); +#endif + return NULL; /* mbuf chain too short */ + } + + /* + * XXX: This code is flawed because it considers a "writable" mbuf + * data region to require all of the following: + * (i) mbuf _has_ to have M_EXT set; if it is just a regular + * mbuf, it is still not considered "writable." + * (ii) since mbuf has M_EXT, the ext_type _has_ to be + * EXT_CLUSTER. Anything else makes it non-writable. + * (iii) M_WRITABLE() must evaluate true. + * Ideally, the requirement should only be (iii). + * + * If we're writable, we're sure we're writable, because the ref. count + * cannot increase from 1, as that would require posession of mbuf + * n by someone else (which is impossible). However, if we're _not_ + * writable, we may eventually become writable )if the ref. count drops + * to 1), but we'll fail to notice it unless we re-evaluate + * M_WRITABLE(). For now, we only evaluate once at the beginning and + * live with this. + */ + /* + * XXX: This is dumb. If we're just a regular mbuf with no M_EXT, + * then we're not "writable," according to this code. + */ + writable = 0; + if ((n->m_flags & M_EXT) == 0 || + (n->m_ext.ext_type == EXT_CLUSTER && M_WRITABLE(n))) + writable = 1; + + /* + * the target data is on <n, off>. + * if we got enough data on the mbuf "n", we're done. + */ + if ((off == 0 || offp) && len <= n->m_len - off && writable) + goto ok; + + /* + * when len <= n->m_len - off and off != 0, it is a special case. + * len bytes from <n, off> sits in single mbuf, but the caller does + * not like the starting position (off). + * chop the current mbuf into two pieces, set off to 0. + */ + if (len <= n->m_len - off) { +#ifndef VBOX + o = m_dup1(n, off, n->m_len - off, M_DONTWAIT); +#else + o = m_dup1(pData, n, off, n->m_len - off, M_DONTWAIT); +#endif + if (o == NULL) { +#ifndef VBOX + m_freem(m); +#else + m_freem(pData, m); +#endif + return NULL; /* ENOBUFS */ + } + n->m_len = off; + o->m_next = n->m_next; + n->m_next = o; + n = n->m_next; + off = 0; + goto ok; + } + + /* + * we need to take hlen from <n, off> and tlen from <n->m_next, 0>, + * and construct contiguous mbuf with m_len == len. + * note that hlen + tlen == len, and tlen > 0. + */ + hlen = n->m_len - off; + tlen = len - hlen; + + /* + * ensure that we have enough trailing data on mbuf chain. + * if not, we can do nothing about the chain. + */ + olen = 0; + for (o = n->m_next; o != NULL; o = o->m_next) + olen += o->m_len; + if (hlen + olen < len) { +#ifndef VBOX + m_freem(m); +#else + m_freem(pData, m); +#endif + return NULL; /* mbuf chain too short */ + } + + /* + * easy cases first. + * we need to use m_copydata() to get data from <n->m_next, 0>. + */ + if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen + && writable) { + m_copydata(n->m_next, 0, tlen, mtod(n, caddr_t) + n->m_len); + n->m_len += tlen; +#ifndef VBOX + m_adj(n->m_next, tlen); +#else + m_adj(pData, n->m_next, tlen); +#endif + goto ok; + } + if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen + && writable) { + n->m_next->m_data -= hlen; + n->m_next->m_len += hlen; + bcopy(mtod(n, caddr_t) + off, mtod(n->m_next, caddr_t), hlen); + n->m_len -= hlen; + n = n->m_next; + off = 0; + goto ok; + } + + /* + * now, we need to do the hard way. don't m_copy as there's no room + * on both end. + */ + if (len > MLEN) +#ifndef VBOX + o = m_getcl(M_DONTWAIT, m->m_type, 0); +#else + o = m_getcl(pData, M_DONTWAIT, m->m_type, 0); +#endif + else +#ifndef VBOX + o = m_get(M_DONTWAIT, m->m_type); +#else + o = m_get(pData, M_DONTWAIT, m->m_type); +#endif + if (!o) { +#ifndef VBOX + m_freem(m); +#else + m_freem(pData, m); +#endif + return NULL; /* ENOBUFS */ + } + /* get hlen from <n, off> into <o, 0> */ + o->m_len = hlen; + bcopy(mtod(n, caddr_t) + off, mtod(o, caddr_t), hlen); + n->m_len -= hlen; + /* get tlen from <n->m_next, 0> into <o, hlen> */ + m_copydata(n->m_next, 0, tlen, mtod(o, caddr_t) + o->m_len); + o->m_len += tlen; +#ifndef VBOX + m_adj(n->m_next, tlen); +#else + m_adj(pData, n->m_next, tlen); +#endif + o->m_next = n->m_next; + n->m_next = o; + n = o; + off = 0; + +ok: +#ifdef PULLDOWN_DEBUG + { + struct mbuf *t; + printf("after:"); + for (t = m; t; t = t->m_next) + printf("%c%d", t == n ? '*' : ' ', t->m_len); + printf(" (off=%d)\n", off); + } +#endif + if (offp) + *offp = off; + return n; +} + +static struct mbuf * +#ifndef VBOX +m_dup1(struct mbuf *m, int off, int len, int fWait) +#else +m_dup1(PNATState pData, struct mbuf *m, int off, int len, int fWait) +#endif +{ + struct mbuf *n; + int copyhdr; + + if (len > MCLBYTES) + return NULL; + if (off == 0 && (m->m_flags & M_PKTHDR) != 0) + copyhdr = 1; + else + copyhdr = 0; + if (len >= MINCLSIZE) { + if (copyhdr == 1) +#ifndef VBOX + n = m_getcl(fWait, m->m_type, M_PKTHDR); +#else + n = m_getcl(pData, fWait, m->m_type, M_PKTHDR); +#endif + else +#ifndef VBOX + n = m_getcl(fWait, m->m_type, 0); +#else + n = m_getcl(pData, fWait, m->m_type, 0); +#endif + } else { + if (copyhdr == 1) +#ifndef VBOX + n = m_gethdr(fWait, m->m_type); +#else + n = m_gethdr(pData, fWait, m->m_type); +#endif + else +#ifndef VBOX + n = m_get(fWait, m->m_type); +#else + n = m_get(pData, fWait, m->m_type); +#endif + } + if (!n) + return NULL; /* ENOBUFS */ + + if (copyhdr && !m_dup_pkthdr(n, m, fWait)) { +#ifndef VBOX + m_free(n); +#else + m_free(pData, n); +#endif + return NULL; + } + m_copydata(m, off, len, mtod(n, caddr_t)); + n->m_len = len; + return n; +} + +/* Free a packet tag. */ +void +m_tag_free_default(struct m_tag *t) +{ +#ifdef MAC + if (t->m_tag_id == PACKET_TAG_MACLABEL) + mac_destroy_mbuf_tag(t); +#endif +#ifndef VBOX + free(t, M_PACKET_TAGS); +#else + RTMemFree(t); +#endif +} + +/* Get a packet tag structure along with specified data following. */ +struct m_tag * +m_tag_alloc(u_int32_t cookie, int type, int len, int fWait) +{ + struct m_tag *t; + + MBUF_CHECKSLEEP(fWait); + if (len < 0) + return NULL; +#ifndef VBOX + t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, fWait); +#else + NOREF(fWait); + t = RTMemAllocZ(len + sizeof(struct m_tag)); +#endif + if (t == NULL) + return NULL; + m_tag_setup(t, cookie, type, len); + t->m_tag_free = m_tag_free_default; + return t; +} + +/* Unlink and free a packet tag. */ +void +m_tag_delete(struct mbuf *m, struct m_tag *t) +{ + + KASSERT(m && t, ("m_tag_delete: null argument, m %p t %p", m, t)); + m_tag_unlink(m, t); + m_tag_free(t); +} + +/* Unlink and free a packet tag chain, starting from given tag. */ +void +m_tag_delete_chain(struct mbuf *m, struct m_tag *t) +{ + struct m_tag *p, *q; + + KASSERT(m, ("m_tag_delete_chain: null mbuf")); + if (t != NULL) + p = t; + else + p = SLIST_FIRST(&m->m_pkthdr.tags); + if (p == NULL) + return; + while ((q = SLIST_NEXT(p, m_tag_link)) != NULL) + m_tag_delete(m, q); + m_tag_delete(m, p); +} + +/* + * Strip off all tags that would normally vanish when + * passing through a network interface. Only persistent + * tags will exist after this; these are expected to remain + * so long as the mbuf chain exists, regardless of the + * path the mbufs take. + */ +void +m_tag_delete_nonpersistent(struct mbuf *m) +{ + struct m_tag *p, *q; + + SLIST_FOREACH_SAFE(p, &m->m_pkthdr.tags, m_tag_link, q) + if ((p->m_tag_id & MTAG_PERSISTENT) == 0) + m_tag_delete(m, p); +} + +/* Find a tag, starting from a given position. */ +struct m_tag * +m_tag_locate(struct mbuf *m, u_int32_t cookie, int type, struct m_tag *t) +{ + struct m_tag *p; + + KASSERT(m, ("m_tag_locate: null mbuf")); + if (t == NULL) + p = SLIST_FIRST(&m->m_pkthdr.tags); + else + p = SLIST_NEXT(t, m_tag_link); + while (p != NULL) { + if (p->m_tag_cookie == cookie && p->m_tag_id == type) + return p; + p = SLIST_NEXT(p, m_tag_link); + } + return NULL; +} + +/* Copy a single tag. */ +struct m_tag * +m_tag_copy(struct m_tag *t, int how) +{ + struct m_tag *p; + + MBUF_CHECKSLEEP(how); + KASSERT(t, ("m_tag_copy: null tag")); + p = m_tag_alloc(t->m_tag_cookie, t->m_tag_id, t->m_tag_len, how); + if (p == NULL) + return (NULL); +#ifdef MAC + /* + * XXXMAC: we should probably pass off the initialization, and + * copying here? can we hide that PACKET_TAG_MACLABEL is + * special from the mbuf code? + */ + if (t->m_tag_id == PACKET_TAG_MACLABEL) { + if (mac_init_mbuf_tag(p, how) != 0) { + m_tag_free(p); + return (NULL); + } + mac_copy_mbuf_tag(t, p); + } else +#endif + bcopy(t + 1, p + 1, t->m_tag_len); /* Copy the data */ + return p; +} + +/* + * Copy two tag chains. The destination mbuf (to) loses any attached + * tags even if the operation fails. This should not be a problem, as + * m_tag_copy_chain() is typically called with a newly-allocated + * destination mbuf. + */ +int +m_tag_copy_chain(struct mbuf *to, struct mbuf *from, int how) +{ + struct m_tag *p, *t, *tprev = NULL; + + MBUF_CHECKSLEEP(how); + KASSERT(to && from, + ("m_tag_copy_chain: null argument, to %p from %p", to, from)); + m_tag_delete_chain(to, NULL); + SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) { + t = m_tag_copy(p, how); + if (t == NULL) { + m_tag_delete_chain(to, NULL); + return 0; + } + if (tprev == NULL) + SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link); + else + SLIST_INSERT_AFTER(tprev, t, m_tag_link); + tprev = t; + } + return 1; +} diff --git a/src/VBox/Devices/Network/slirp/bsd/sys/mbuf.h b/src/VBox/Devices/Network/slirp/bsd/sys/mbuf.h new file mode 100644 index 00000000..909f300b --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/sys/mbuf.h @@ -0,0 +1,1177 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)mbuf.h 8.5 (Berkeley) 2/19/95 + * $FreeBSD: src/sys/sys/mbuf.h,v 1.217.2.3.4.1 2009/04/15 03:14:26 kensmith Exp $ + */ + +#ifndef _SYS_MBUF_H_ +#define _SYS_MBUF_H_ + +#ifndef VBOX +/* XXX: These includes suck. Sorry! */ +#include <sys/queue.h> +#ifdef _KERNEL +#include <sys/systm.h> +#include <vm/uma.h> +#ifdef WITNESS +#include <sys/lock.h> +#endif +#endif +#else /* VBOX */ +# include <VBox/param.h> +# include "misc.h" +# include "ext.h" + +typedef const char *c_caddr_t; + +DECL_NO_RETURN(static void) panic (char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + vbox_slirp_printV(fmt, args); + va_end(args); + AssertFatalFailed(); +} +/* for non-gnu compilers */ +# define __func__ RT_GCC_EXTENSION __FUNCTION__ +# ifndef __inline +# ifdef __GNUC__ +# define __inline __inline__ +# else +# define __inline +# endif +# endif + +# undef bzero +# define bzero(a1, len) memset((a1), 0, (len)) + +/* (vvl) some definitions from sys/param.h */ +/* + * Constants related to network buffer management. + * MCLBYTES must be no larger than HOST_PAGE_SIZE. + */ +# ifndef MSIZE +# define MSIZE 256 /* size of an mbuf */ +# endif /* MSIZE */ + +# ifndef MCLSHIFT +# define MCLSHIFT 11 /* convert bytes to mbuf clusters */ +# endif /* MCLSHIFT */ + +# ifndef MCLBYTES +# define MCLBYTES (1 << MCLSHIFT) /* size of an mbuf cluster */ +# endif /*MCLBYTES*/ + +# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) +# define MJUMPAGESIZE HOST_PAGE_SIZE /* jumbo cluster 4k */ +# else +# define MJUMPAGESIZE (4 * 1024) /* jumbo cluster 4k */ +# endif +# define MJUM9BYTES (9 * 1024) /* jumbo cluster 9k */ +# define MJUM16BYTES (16 * 1024) /* jumbo cluster 16k */ +#endif /* VBOX */ + +/* + * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead. + * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in + * sys/param.h), which has no additional overhead and is used instead of the + * internal data area; this is done when at least MINCLSIZE of data must be + * stored. Additionally, it is possible to allocate a separate buffer + * externally and attach it to the mbuf in a way similar to that of mbuf + * clusters. + */ +#define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */ +#define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */ +#define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */ +#define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */ + +#if defined(_KERNEL) || defined(VBOX) +/*- + * Macros for type conversion: + * mtod(m, t) -- Convert mbuf pointer to data pointer of correct type. + * dtom(x) -- Convert data pointer within mbuf to mbuf pointer (XXX). + */ +#define mtod(m, t) ((t)((m)->m_data)) +#define dtom(x) ((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1))) + +/* + * Argument structure passed to UMA routines during mbuf and packet + * allocations. + */ +struct mb_args { + int flags; /* Flags for mbuf being allocated */ + short type; /* Type of mbuf being allocated */ +}; +#endif /* _KERNEL */ + +#if defined(__LP64__) +#define M_HDR_PAD 6 +#else +#define M_HDR_PAD 2 +#endif + +/* + * Header present at the beginning of every mbuf. + */ +struct m_hdr { + struct mbuf *mh_next; /* next buffer in chain */ + struct mbuf *mh_nextpkt; /* next chain in queue/record */ + caddr_t mh_data; /* location of data */ + int mh_len; /* amount of data in this mbuf */ + int mh_flags; /* flags; see below */ + short mh_type; /* type of data in this mbuf */ +#ifdef VBOX + struct socket *mh_so; /*socket assotiated with mbuf*/ + TAILQ_ENTRY(mbuf) mh_ifq; +#endif + uint8_t pad[M_HDR_PAD];/* word align */ +}; + +/* + * Packet tag structure (see below for details). + */ +struct m_tag { + SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */ + u_int16_t m_tag_id; /* Tag ID */ + u_int16_t m_tag_len; /* Length of data */ + u_int32_t m_tag_cookie; /* ABI/Module ID */ + void (*m_tag_free)(struct m_tag *); +}; + +/* + * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set. + */ +struct pkthdr { + struct ifnet *rcvif; /* rcv interface */ + /* variables for ip and tcp reassembly */ + void *header; /* pointer to packet header */ + int len; /* total packet length */ + /* variables for hardware checksum */ + int csum_flags; /* flags regarding checksum */ + int csum_data; /* data field used by csum routines */ + u_int16_t tso_segsz; /* TSO segment size */ + u_int16_t ether_vtag; /* Ethernet 802.1p+q vlan tag */ + SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ +}; + +/* + * Description of external storage mapped into mbuf; valid only if M_EXT is + * set. + */ +struct m_ext { + caddr_t ext_buf; /* start of buffer */ + void (*ext_free) /* free routine if not the usual */ + (void *, void *); + void *ext_args; /* optional argument pointer */ + u_int ext_size; /* size of buffer, for ext_free */ +#ifdef VBOX + volatile uint32_t *ref_cnt; /* pointer to ref count info */ +#else + volatile u_int *ref_cnt; /* pointer to ref count info */ +#endif + int ext_type; /* type of external storage */ +}; + +/* + * The core of the mbuf object along with some shortcut defines for practical + * purposes. + */ +struct mbuf { + struct m_hdr m_hdr; + union { + struct { + struct pkthdr MH_pkthdr; /* M_PKTHDR set */ + union { + struct m_ext MH_ext; /* M_EXT set */ + char MH_databuf[MHLEN]; + } MH_dat; + } MH; + char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */ + } M_dat; +}; +#define m_next m_hdr.mh_next +#define m_len m_hdr.mh_len +#define m_data m_hdr.mh_data +#define m_type m_hdr.mh_type +#define m_flags m_hdr.mh_flags +#define m_nextpkt m_hdr.mh_nextpkt +#define m_act m_nextpkt +#define m_pkthdr M_dat.MH.MH_pkthdr +#define m_ext M_dat.MH.MH_dat.MH_ext +#define m_pktdat M_dat.MH.MH_dat.MH_databuf +#define m_dat M_dat.M_databuf +#ifdef VBOX +# define m_so m_hdr.mh_so +# define ifq_so m_hdr.mh_so +# define m_ifq m_hdr.mh_ifq +#endif + +/* + * mbuf flags. + */ +#define M_EXT 0x00000001 /* has associated external storage */ +#define M_PKTHDR 0x00000002 /* start of record */ +#define M_EOR 0x00000004 /* end of record */ +#define M_RDONLY 0x00000008 /* associated data is marked read-only */ +#define M_PROTO1 0x00000010 /* protocol-specific */ +#define M_PROTO2 0x00000020 /* protocol-specific */ +#define M_PROTO3 0x00000040 /* protocol-specific */ +#define M_PROTO4 0x00000080 /* protocol-specific */ +#define M_PROTO5 0x00000100 /* protocol-specific */ +#define M_BCAST 0x00000200 /* send/received as link-level broadcast */ +#define M_MCAST 0x00000400 /* send/received as link-level multicast */ +#define M_FRAG 0x00000800 /* packet is a fragment of a larger packet */ +#define M_FIRSTFRAG 0x00001000 /* packet is first fragment */ +#define M_LASTFRAG 0x00002000 /* packet is last fragment */ +#define M_SKIP_FIREWALL 0x00004000 /* skip firewall processing */ +#define M_FREELIST 0x00008000 /* mbuf is on the free list */ +#define M_VLANTAG 0x00010000 /* ether_vtag is valid */ +#define M_PROMISC 0x00020000 /* packet was not for us */ +#define M_NOFREE 0x00040000 /* do not free mbuf, embedded in cluster */ +#define M_PROTO6 0x00080000 /* protocol-specific */ +#define M_PROTO7 0x00100000 /* protocol-specific */ +#define M_PROTO8 0x00200000 /* protocol-specific */ +/* + * For RELENG_{6,7} steal these flags for limited multiple routing table + * support. In RELENG_8 and beyond, use just one flag and a tag. + */ +#define M_FIB 0xF0000000 /* steal some bits to store fib number. */ + +#define M_NOTIFICATION M_PROTO5 /* SCTP notification */ + +/* + * Flags to purge when crossing layers. + */ +#define M_PROTOFLAGS \ + (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8) + +/* + * Flags preserved when copying m_pkthdr. + */ +#define M_COPYFLAGS \ + (M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_SKIP_FIREWALL|M_BCAST|M_MCAST|\ + M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_FIB) + +/* + * External buffer types: identify ext_buf type. + */ +#define EXT_CLUSTER 1 /* mbuf cluster */ +#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */ +#define EXT_JUMBOP 3 /* jumbo cluster 4096 bytes */ +#define EXT_JUMBO9 4 /* jumbo cluster 9216 bytes */ +#define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ +#define EXT_PACKET 6 /* mbuf+cluster from packet zone */ +#define EXT_MBUF 7 /* external mbuf reference (M_IOVEC) */ +#define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */ +#define EXT_MOD_TYPE 200 /* custom module's ext_buf type */ +#define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */ +#define EXT_EXTREF 400 /* has externally maintained ref_cnt ptr */ + +/* + * Flags indicating hw checksum support and sw checksum requirements. This + * field can be directly tested against if_data.ifi_hwassist. + */ +#define CSUM_IP 0x0001 /* will csum IP */ +#define CSUM_TCP 0x0002 /* will csum TCP */ +#define CSUM_UDP 0x0004 /* will csum UDP */ +#define CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */ +#define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */ +#define CSUM_TSO 0x0020 /* will do TSO */ + +#define CSUM_IP_CHECKED 0x0100 /* did csum IP */ +#define CSUM_IP_VALID 0x0200 /* ... the csum is valid */ +#define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */ +#define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */ + +#define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP) +#define CSUM_DELAY_IP (CSUM_IP) /* XXX add ipv6 here too? */ + +/* + * mbuf types. + */ +#define MT_NOTMBUF 0 /* USED INTERNALLY ONLY! Object is not mbuf */ +#define MT_DATA 1 /* dynamic (data) allocation */ +#define MT_HEADER MT_DATA /* packet header, use M_PKTHDR instead */ +#define MT_SONAME 8 /* socket name */ +#define MT_CONTROL 14 /* extra-data protocol message */ +#define MT_OOBDATA 15 /* expedited data */ +#define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */ + +#define MT_NOINIT 255 /* Not a type but a flag to allocate + a non-initialized mbuf */ + +#define MB_NOTAGS 0x1UL /* no tags attached to mbuf */ + +/* + * General mbuf allocator statistics structure. + * + * Many of these statistics are no longer used; we instead track many + * allocator statistics through UMA's built in statistics mechanism. + */ +struct mbstat { + u_long m_mbufs; /* XXX */ + u_long m_mclusts; /* XXX */ + + u_long m_drain; /* times drained protocols for space */ + u_long m_mcfail; /* XXX: times m_copym failed */ + u_long m_mpfail; /* XXX: times m_pullup failed */ + u_long m_msize; /* length of an mbuf */ + u_long m_mclbytes; /* length of an mbuf cluster */ + u_long m_minclsize; /* min length of data to allocate a cluster */ + u_long m_mlen; /* length of data in an mbuf */ + u_long m_mhlen; /* length of data in a header mbuf */ + + /* Number of mbtypes (gives # elems in mbtypes[] array) */ + short m_numtypes; + + /* XXX: Sendfile stats should eventually move to their own struct */ + u_long sf_iocnt; /* times sendfile had to do disk I/O */ + u_long sf_allocfail; /* times sfbuf allocation failed */ + u_long sf_allocwait; /* times sfbuf allocation had to wait */ +}; + +/* + * Flags specifying how an allocation should be made. + * + * The flag to use is as follows: + * - M_DONTWAIT or M_NOWAIT from an interrupt handler to not block allocation. + * - M_WAIT or M_WAITOK or M_TRYWAIT from wherever it is safe to block. + * + * M_DONTWAIT/M_NOWAIT means that we will not block the thread explicitly and + * if we cannot allocate immediately we may return NULL, whereas + * M_WAIT/M_WAITOK/M_TRYWAIT means that if we cannot allocate resources we + * will block until they are available, and thus never return NULL. + * + * XXX Eventually just phase this out to use M_WAITOK/M_NOWAIT. + */ +#define MBTOM(how) (how) +#ifndef VBOX +#define M_DONTWAIT M_NOWAIT +#define M_TRYWAIT M_WAITOK +#define M_WAIT M_WAITOK +#else +/* @todo (r=vvl) not sure we can do it in NAT */ +# define M_WAITOK 0 +# define M_NOWAIT 0 +# define M_DONTWAIT 0 +# define M_TRYWAI 0 +# define M_WAIT 0 +#endif + +/* + * String names of mbuf-related UMA(9) and malloc(9) types. Exposed to + * !_KERNEL so that monitoring tools can look up the zones with + * libmemstat(3). + */ +#define MBUF_MEM_NAME "mbuf" +#define MBUF_CLUSTER_MEM_NAME "mbuf_cluster" +#define MBUF_PACKET_MEM_NAME "mbuf_packet" +#define MBUF_JUMBOP_MEM_NAME "mbuf_jumbo_pagesize" +#define MBUF_JUMBO9_MEM_NAME "mbuf_jumbo_9k" +#define MBUF_JUMBO16_MEM_NAME "mbuf_jumbo_16k" +#define MBUF_TAG_MEM_NAME "mbuf_tag" +#define MBUF_EXTREFCNT_MEM_NAME "mbuf_ext_refcnt" + +#if defined(_KERNEL) || defined(VBOX) + +#ifdef WITNESS +#define MBUF_CHECKSLEEP(how) do { \ + if (how == M_WAITOK) \ + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, \ + "Sleeping in \"%s\"", __func__); \ +} while (0) +#else +#define MBUF_CHECKSLEEP(how) +#endif + +/* + * Network buffer allocation API + * + * The rest of it is defined in kern/kern_mbuf.c + */ + +#ifndef VBOX +extern uma_zone_t zone_mbuf; +extern uma_zone_t zone_clust; +extern uma_zone_t zone_pack; +extern uma_zone_t zone_jumbop; +extern uma_zone_t zone_jumbo9; +extern uma_zone_t zone_jumbo16; +extern uma_zone_t zone_ext_refcnt; +#endif + +#ifndef VBOX +static __inline struct mbuf *m_getcl(int how, short type, int flags); +static __inline struct mbuf *m_get(int how, short type); +static __inline struct mbuf *m_gethdr(int how, short type); +static __inline struct mbuf *m_getjcl(int how, short type, int flags, + int size); +static __inline struct mbuf *m_getclr(int how, short type); /* XXX */ +static __inline struct mbuf *m_free(struct mbuf *m); +static __inline void m_clget(struct mbuf *m, int how); +static __inline void *m_cljget(struct mbuf *m, int how, int size); +void mb_free_ext(struct mbuf *); +#else +static __inline struct mbuf *m_getcl(PNATState pData, int how, short type, int flags); +static __inline struct mbuf *m_get(PNATState pData, int how, short type); +static __inline struct mbuf *m_gethdr(PNATState pData, int how, short type); +static __inline struct mbuf *m_getjcl(PNATState pData, int how, + short type, int flags, int size); +static __inline struct mbuf *m_getclr(PNATState pData, int how, short type); /* XXX */ +static __inline struct mbuf *m_free(PNATState pData, struct mbuf *m); +static __inline void m_clget(PNATState pData, struct mbuf *m, int how); +static __inline void *m_cljget(PNATState pData, struct mbuf *m, int how, int size); +void mb_free_ext(PNATState, struct mbuf *); +#endif +static __inline void m_chtype(struct mbuf *m, short new_type); +static __inline struct mbuf *m_last(struct mbuf *m); + +static __inline int +m_gettype(int size) +{ + int type; + + switch (size) { + case MSIZE: + type = EXT_MBUF; + break; + case MCLBYTES: + type = EXT_CLUSTER; + break; +#if MJUMPAGESIZE != MCLBYTES + case MJUMPAGESIZE: + type = EXT_JUMBOP; + break; +#endif + case MJUM9BYTES: + type = EXT_JUMBO9; + break; + case MJUM16BYTES: + type = EXT_JUMBO16; + break; + default: + panic("%s: m_getjcl: invalid cluster size", __func__); + } + + return (type); +} + +static __inline uma_zone_t +#ifndef VBOX +m_getzone(int size) +#else +m_getzone(PNATState pData, int size) +#endif +{ + uma_zone_t zone; + + switch (size) { + case MSIZE: + zone = zone_mbuf; + break; + case MCLBYTES: + zone = zone_clust; + break; +#if MJUMPAGESIZE != MCLBYTES + case MJUMPAGESIZE: + zone = zone_jumbop; + break; +#endif + case MJUM9BYTES: + zone = zone_jumbo9; + break; + case MJUM16BYTES: + zone = zone_jumbo16; + break; + default: + panic("%s: m_getjcl: invalid cluster type", __func__); + } + + return (zone); +} + +static __inline struct mbuf * +#ifndef VBOX +m_get(int how, short type) +#else +m_get(PNATState pData, int how, short type) +#endif +{ + struct mb_args args; + + args.flags = 0; + args.type = type; + return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how))); +} + +/* + * XXX This should be deprecated, very little use. + */ +static __inline struct mbuf * +#ifndef VBOX +m_getclr(int how, short type) +#else +m_getclr(PNATState pData, int how, short type) +#endif +{ + struct mbuf *m; + struct mb_args args; + + args.flags = 0; + args.type = type; + m = uma_zalloc_arg(zone_mbuf, &args, how); + if (m != NULL) + bzero(m->m_data, MLEN); + return (m); +} + +static __inline struct mbuf * +#ifndef VBOX +m_gethdr(int how, short type) +#else +m_gethdr(PNATState pData, int how, short type) +#endif +{ + struct mb_args args; + + args.flags = M_PKTHDR; + args.type = type; + return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how))); +} + +static __inline struct mbuf * +#ifndef VBOX +m_getcl(int how, short type, int flags) +#else +m_getcl(PNATState pData, int how, short type, int flags) +#endif +{ + struct mb_args args; + + args.flags = flags; + args.type = type; + return ((struct mbuf *)(uma_zalloc_arg(zone_pack, &args, how))); +} + +/* + * m_getjcl() returns an mbuf with a cluster of the specified size attached. + * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. + * + * XXX: This is rather large, should be real function maybe. + */ +static __inline struct mbuf * +#ifndef VBOX +m_getjcl(int how, short type, int flags, int size) +#else +m_getjcl(PNATState pData, int how, short type, int flags, int size) +#endif +{ + struct mb_args args; + struct mbuf *m, *n; + uma_zone_t zone; + + args.flags = flags; + args.type = type; + + m = uma_zalloc_arg(zone_mbuf, &args, how); + if (m == NULL) + return (NULL); + +#ifndef VBOX + zone = m_getzone(size); +#else + zone = m_getzone(pData, size); +#endif + n = uma_zalloc_arg(zone, m, how); + if (n == NULL) { + uma_zfree(zone_mbuf, m); + return (NULL); + } + return (m); +} + +#ifndef VBOX +static __inline void +m_free_fast(struct mbuf *m) +{ + KASSERT(SLIST_EMPTY(&m->m_pkthdr.tags), ("doing fast free of mbuf with tags")); + + uma_zfree_arg(zone_mbuf, m, (void *)MB_NOTAGS); +} +#else +static __inline void +m_free_fast(PNATState pData, struct mbuf *m) +{ + AssertMsg(SLIST_EMPTY(&m->m_pkthdr.tags), ("doing fast free of mbuf with tags")); + + uma_zfree_arg(zone_mbuf, m, (void *)(uintptr_t)MB_NOTAGS); +} +#endif + +static __inline struct mbuf * +#ifndef VBOX +m_free(struct mbuf *m) +#else +m_free(PNATState pData, struct mbuf *m) +#endif +{ + struct mbuf *n = m->m_next; + + if (m->m_flags & M_EXT) +#ifndef VBOX + mb_free_ext(m); +#else + mb_free_ext(pData, m); +#endif + else if ((m->m_flags & M_NOFREE) == 0) + uma_zfree(zone_mbuf, m); + return (n); +} + +static __inline void +#ifndef VBOX +m_clget(struct mbuf *m, int how) +#else +m_clget(PNATState pData, struct mbuf *m, int how) +#endif +{ + + if (m->m_flags & M_EXT) + printf("%s: %p mbuf already has cluster\n", __func__, m); + m->m_ext.ext_buf = (char *)NULL; + uma_zalloc_arg(zone_clust, m, how); + /* + * On a cluster allocation failure, drain the packet zone and retry, + * we might be able to loosen a few clusters up on the drain. + */ + if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { + zone_drain(zone_pack); + uma_zalloc_arg(zone_clust, m, how); + } +} + +/* + * m_cljget() is different from m_clget() as it can allocate clusters without + * attaching them to an mbuf. In that case the return value is the pointer + * to the cluster of the requested size. If an mbuf was specified, it gets + * the cluster attached to it and the return value can be safely ignored. + * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. + */ +static __inline void * +#ifndef VBOX +m_cljget(struct mbuf *m, int how, int size) +#else +m_cljget(PNATState pData, struct mbuf *m, int how, int size) +#endif +{ + uma_zone_t zone; + + if (m && m->m_flags & M_EXT) + printf("%s: %p mbuf already has cluster\n", __func__, m); + if (m != NULL) + m->m_ext.ext_buf = NULL; + +#ifndef VBOX + zone = m_getzone(size); +#else + zone = m_getzone(pData, size); +#endif + return (uma_zalloc_arg(zone, m, how)); +} + +static __inline void +#ifndef VBOX +m_cljset(struct mbuf *m, void *cl, int type) +#else +m_cljset(PNATState pData, struct mbuf *m, void *cl, int type) +#endif +{ + uma_zone_t zone; + int size; + + switch (type) { + case EXT_CLUSTER: + size = MCLBYTES; + zone = zone_clust; + break; +#if MJUMPAGESIZE != MCLBYTES + case EXT_JUMBOP: + size = MJUMPAGESIZE; + zone = zone_jumbop; + break; +#endif + case EXT_JUMBO9: + size = MJUM9BYTES; + zone = zone_jumbo9; + break; + case EXT_JUMBO16: + size = MJUM16BYTES; + zone = zone_jumbo16; + break; + default: + panic("unknown cluster type"); + break; + } + + m->m_data = m->m_ext.ext_buf = cl; +#ifdef VBOX + m->m_ext.ext_free = (void (*)(void *, void *))0; + m->m_ext.ext_args = NULL; +#else + m->m_ext.ext_free = m->m_ext.ext_args = NULL; +#endif + m->m_ext.ext_size = size; + m->m_ext.ext_type = type; + m->m_ext.ref_cnt = uma_find_refcnt(zone, cl); + m->m_flags |= M_EXT; + +} + +static __inline void +m_chtype(struct mbuf *m, short new_type) +{ + + m->m_type = new_type; +} + +static __inline struct mbuf * +m_last(struct mbuf *m) +{ + + while (m->m_next) + m = m->m_next; + return (m); +} + +/* + * mbuf, cluster, and external object allocation macros (for compatibility + * purposes). + */ +#define M_MOVE_PKTHDR(to, from) m_move_pkthdr((to), (from)) +#ifndef VBOX +#define MGET(m, how, type) ((m) = m_get((how), (type))) +#define MGETHDR(m, how, type) ((m) = m_gethdr((how), (type))) +#define MCLGET(m, how) m_clget((m), (how)) +#define MEXTADD(m, buf, size, free, args, flags, type) \ + m_extadd((m), (caddr_t)(buf), (size), (free), (args), (flags), (type)) +#define m_getm(m, len, how, type) \ + m_getm2((m), (len), (how), (type), M_PKTHDR) +#else /*!VBOX*/ +#define MGET(m, how, type) ((m) = m_get(pData, (how), (type))) +#define MGETHDR(m, how, type) ((m) = m_gethdr(pData, (how), (type))) +#define MCLGET(m, how) m_clget(pData, (m), (how)) +#define MEXTADD(m, buf, size, free, args, flags, type) \ + m_extadd(pData, (m), (caddr_t)(buf), (size), (free), (args), (flags), (type)) +#define m_getm(m, len, how, type) \ + m_getm2(pData, (m), (len), (how), (type), M_PKTHDR) +#endif + +/* + * Evaluate TRUE if it's safe to write to the mbuf m's data region (this can + * be both the local data payload, or an external buffer area, depending on + * whether M_EXT is set). + */ +#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \ + (!(((m)->m_flags & M_EXT)) || \ + (*((m)->m_ext.ref_cnt) == 1)) ) \ + +/* Check if the supplied mbuf has a packet header, or else panic. */ +#define M_ASSERTPKTHDR(m) \ + KASSERT(m != NULL && m->m_flags & M_PKTHDR, \ + ("%s: no mbuf packet header!", __func__)) + +/* + * Ensure that the supplied mbuf is a valid, non-free mbuf. + * + * XXX: Broken at the moment. Need some UMA magic to make it work again. + */ +#define M_ASSERTVALID(m) \ + KASSERT((((struct mbuf *)m)->m_flags & 0) == 0, \ + ("%s: attempted use of a free mbuf!", __func__)) + +/* + * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place an + * object of the specified size at the end of the mbuf, longword aligned. + */ +#define M_ALIGN(m, len) do { \ + KASSERT(!((m)->m_flags & (M_PKTHDR|M_EXT)), \ + ("%s: M_ALIGN not normal mbuf", __func__)); \ + KASSERT((m)->m_data == (m)->m_dat, \ + ("%s: M_ALIGN not a virgin mbuf", __func__)); \ + (m)->m_data += (MLEN - (len)) & ~(sizeof(long) - 1); \ +} while (0) + +/* + * As above, for mbufs allocated with m_gethdr/MGETHDR or initialized by + * M_DUP/MOVE_PKTHDR. + */ +#define MH_ALIGN(m, len) do { \ + KASSERT((m)->m_flags & M_PKTHDR && !((m)->m_flags & M_EXT), \ + ("%s: MH_ALIGN not PKTHDR mbuf", __func__)); \ + KASSERT((m)->m_data == (m)->m_pktdat, \ + ("%s: MH_ALIGN not a virgin mbuf", __func__)); \ + (m)->m_data += (MHLEN - (len)) & ~(sizeof(long) - 1); \ +} while (0) + +/* + * Compute the amount of space available before the current start of data in + * an mbuf. + * + * The M_WRITABLE() is a temporary, conservative safety measure: the burden + * of checking writability of the mbuf data area rests solely with the caller. + */ +#define M_LEADINGSPACE(m) \ + ((m)->m_flags & M_EXT ? \ + (M_WRITABLE(m) ? (m)->m_data - (m)->m_ext.ext_buf : 0): \ + (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \ + (m)->m_data - (m)->m_dat) + +/* + * Compute the amount of space available after the end of data in an mbuf. + * + * The M_WRITABLE() is a temporary, conservative safety measure: the burden + * of checking writability of the mbuf data area rests solely with the caller. + */ +#define M_TRAILINGSPACE(m) \ + ((m)->m_flags & M_EXT ? \ + (M_WRITABLE(m) ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size \ + - ((m)->m_data + (m)->m_len) : 0) : \ + &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len)) + +/* + * Arrange to prepend space of size plen to mbuf m. If a new mbuf must be + * allocated, how specifies whether to wait. If the allocation fails, the + * original mbuf chain is freed and m is set to NULL. + */ +#define M_PREPEND(m, plen, how) do { \ + struct mbuf **_mmp = &(m); \ + struct mbuf *_mm = *_mmp; \ + int _mplen = (plen); \ + int __mhow = (how); \ + \ + MBUF_CHECKSLEEP(how); \ + if (M_LEADINGSPACE(_mm) >= _mplen) { \ + _mm->m_data -= _mplen; \ + _mm->m_len += _mplen; \ + } else \ + _mm = m_prepend(_mm, _mplen, __mhow); \ + if (_mm != NULL && _mm->m_flags & M_PKTHDR) \ + _mm->m_pkthdr.len += _mplen; \ + *_mmp = _mm; \ +} while (0) + +/* + * Change mbuf to new type. This is a relatively expensive operation and + * should be avoided. + */ +#define MCHTYPE(m, t) m_chtype((m), (t)) + +/* Length to m_copy to copy all. */ +#define M_COPYALL 1000000000 + +/* Compatibility with 4.3. */ +#define m_copy(m, o, l) m_copym((m), (o), (l), M_DONTWAIT) + +extern int max_datalen; /* MHLEN - max_hdr */ +extern int max_hdr; /* Largest link + protocol header */ +extern int max_linkhdr; /* Largest link-level header */ +extern int max_protohdr; /* Largest protocol header */ +extern struct mbstat mbstat; /* General mbuf stats/infos */ +extern int nmbclusters; /* Maximum number of clusters */ + +struct uio; + +void m_align(struct mbuf *, int); +int m_apply(struct mbuf *, int, int, + int (*)(void *, void *, u_int), void *); +#ifndef VBOX +void m_adj(struct mbuf *, int); +int m_append(struct mbuf *, int, c_caddr_t); +struct mbuf *m_defrag(struct mbuf *, int); +struct mbuf *m_dup(struct mbuf *, int); +void m_cat(struct mbuf *, struct mbuf *); +struct mbuf *m_collapse(struct mbuf *, int, int); +void m_copyback(struct mbuf *, int, int, c_caddr_t); +struct mbuf *m_copym(struct mbuf *, int, int, int); +struct mbuf *m_copymdata(struct mbuf *, struct mbuf *, + int, int, int, int); +struct mbuf *m_copypacket(struct mbuf *, int); +struct mbuf *m_copyup(struct mbuf *n, int len, int dstoff); +void m_extadd(struct mbuf *, caddr_t, u_int, + void (*)(void *, void *), void *, int, int); +#else +void m_adj(PNATState, struct mbuf *, int); +int m_append(PNATState pData, struct mbuf *, int, c_caddr_t); +struct mbuf *m_defrag(PNATState, struct mbuf *, int); +struct mbuf *m_dup(PNATState, struct mbuf *, int); +void m_cat(PNATState, struct mbuf *, struct mbuf *); +struct mbuf *m_collapse(PNATState, struct mbuf *, int, int); +void m_copyback(PNATState, struct mbuf *, int, int, c_caddr_t); +struct mbuf *m_copym(PNATState, struct mbuf *, int, int, int); +struct mbuf *m_copymdata(PNATState, struct mbuf *, struct mbuf *, + int, int, int, int); +struct mbuf *m_copypacket(PNATState, struct mbuf *, int); +struct mbuf *m_copyup(PNATState, struct mbuf *n, int len, int dstoff); +void m_extadd(PNATState pData, struct mbuf *, caddr_t, u_int, + void (*)(void *, void *), void *, int, int); +#endif +void m_copydata(const struct mbuf *, int, int, caddr_t); +void m_copy_pkthdr(struct mbuf *, struct mbuf *); +void m_demote(struct mbuf *, int); +struct mbuf *m_devget(char *, int, int, struct ifnet *, + void (*)(char *, caddr_t, u_int)); +int m_dup_pkthdr(struct mbuf *, struct mbuf *, int); +u_int m_fixhdr(struct mbuf *); +struct mbuf *m_fragment(struct mbuf *, int, int); +#ifndef VBOX +void m_freem(struct mbuf *); +struct mbuf *m_getm2(struct mbuf *, int, int, short, int); +struct mbuf *m_prepend(struct mbuf *, int, int); +struct mbuf *m_pulldown(struct mbuf *, int, int, int *); +struct mbuf *m_pullup(struct mbuf *, int); +int m_sanity(struct mbuf *, int); +struct mbuf *m_split(struct mbuf *, int, int); +struct mbuf *m_unshare(struct mbuf *, int how); +#else +void m_freem(PNATState pData, struct mbuf *); +struct mbuf *m_getm2(PNATState pData, struct mbuf *, int, int, short, int); +struct mbuf *m_prepend(PNATState, struct mbuf *, int, int); +struct mbuf *m_pulldown(PNATState, struct mbuf *, int, int, int *); +struct mbuf *m_pullup(PNATState, struct mbuf *, int); +int m_sanity(PNATState, struct mbuf *, int); +struct mbuf *m_split(PNATState, struct mbuf *, int, int); +struct mbuf *m_unshare(PNATState, struct mbuf *, int how); +#endif +struct mbuf *m_getptr(struct mbuf *, int, int *); +u_int m_length(struct mbuf *, struct mbuf **); +void m_move_pkthdr(struct mbuf *, struct mbuf *); +void m_print(const struct mbuf *, int); +struct mbuf *m_uiotombuf(struct uio *, int, int, int, int); + +/*- + * Network packets may have annotations attached by affixing a list of + * "packet tags" to the pkthdr structure. Packet tags are dynamically + * allocated semi-opaque data structures that have a fixed header + * (struct m_tag) that specifies the size of the memory block and a + * <cookie,type> pair that identifies it. The cookie is a 32-bit unique + * unsigned value used to identify a module or ABI. By convention this value + * is chosen as the date+time that the module is created, expressed as the + * number of seconds since the epoch (e.g., using date -u +'%s'). The type + * value is an ABI/module-specific value that identifies a particular + * annotation and is private to the module. For compatibility with systems + * like OpenBSD that define packet tags w/o an ABI/module cookie, the value + * PACKET_ABI_COMPAT is used to implement m_tag_get and m_tag_find + * compatibility shim functions and several tag types are defined below. + * Users that do not require compatibility should use a private cookie value + * so that packet tag-related definitions can be maintained privately. + * + * Note that the packet tag returned by m_tag_alloc has the default memory + * alignment implemented by malloc. To reference private data one can use a + * construct like: + * + * struct m_tag *mtag = m_tag_alloc(...); + * struct foo *p = (struct foo *)(mtag+1); + * + * if the alignment of struct m_tag is sufficient for referencing members of + * struct foo. Otherwise it is necessary to embed struct m_tag within the + * private data structure to insure proper alignment; e.g., + * + * struct foo { + * struct m_tag tag; + * ... + * }; + * struct foo *p = (struct foo *) m_tag_alloc(...); + * struct m_tag *mtag = &p->tag; + */ + +/* + * Persistent tags stay with an mbuf until the mbuf is reclaimed. Otherwise + * tags are expected to ``vanish'' when they pass through a network + * interface. For most interfaces this happens normally as the tags are + * reclaimed when the mbuf is free'd. However in some special cases + * reclaiming must be done manually. An example is packets that pass through + * the loopback interface. Also, one must be careful to do this when + * ``turning around'' packets (e.g., icmp_reflect). + * + * To mark a tag persistent bit-or this flag in when defining the tag id. + * The tag will then be treated as described above. + */ +#define MTAG_PERSISTENT 0x800 + +#define PACKET_TAG_NONE 0 /* Nadda */ + +/* Packet tags for use with PACKET_ABI_COMPAT. */ +#define PACKET_TAG_IPSEC_IN_DONE 1 /* IPsec applied, in */ +#define PACKET_TAG_IPSEC_OUT_DONE 2 /* IPsec applied, out */ +#define PACKET_TAG_IPSEC_IN_CRYPTO_DONE 3 /* NIC IPsec crypto done */ +#define PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED 4 /* NIC IPsec crypto req'ed */ +#define PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO 5 /* NIC notifies IPsec */ +#define PACKET_TAG_IPSEC_PENDING_TDB 6 /* Reminder to do IPsec */ +#define PACKET_TAG_BRIDGE 7 /* Bridge processing done */ +#define PACKET_TAG_GIF 8 /* GIF processing done */ +#define PACKET_TAG_GRE 9 /* GRE processing done */ +#define PACKET_TAG_IN_PACKET_CHECKSUM 10 /* NIC checksumming done */ +#define PACKET_TAG_ENCAP 11 /* Encap. processing */ +#define PACKET_TAG_IPSEC_SOCKET 12 /* IPSEC socket ref */ +#define PACKET_TAG_IPSEC_HISTORY 13 /* IPSEC history */ +#define PACKET_TAG_IPV6_INPUT 14 /* IPV6 input processing */ +#define PACKET_TAG_DUMMYNET 15 /* dummynet info */ +#define PACKET_TAG_DIVERT 17 /* divert info */ +#define PACKET_TAG_IPFORWARD 18 /* ipforward info */ +#define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */ +#define PACKET_TAG_PF 21 /* PF + ALTQ information */ +#define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */ +#define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */ +#define PACKET_TAG_CARP 28 /* CARP info */ +#ifdef VBOX +# define PACKET_TAG_ALIAS 0xab01 +# define PACKET_TAG_ETHER 0xab02 +# define PACKET_SERVICE 0xab03 +#endif + +/* Specific cookies and tags. */ + +/* Packet tag routines. */ +struct m_tag *m_tag_alloc(u_int32_t, int, int, int); +void m_tag_delete(struct mbuf *, struct m_tag *); +void m_tag_delete_chain(struct mbuf *, struct m_tag *); +void m_tag_free_default(struct m_tag *); +struct m_tag *m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *); +struct m_tag *m_tag_copy(struct m_tag *, int); +int m_tag_copy_chain(struct mbuf *, struct mbuf *, int); +void m_tag_delete_nonpersistent(struct mbuf *); + +/* + * Initialize the list of tags associated with an mbuf. + */ +static __inline void +m_tag_init(struct mbuf *m) +{ + + SLIST_INIT(&m->m_pkthdr.tags); +} + +/* + * Set up the contents of a tag. Note that this does not fill in the free + * method; the caller is expected to do that. + * + * XXX probably should be called m_tag_init, but that was already taken. + */ +static __inline void +m_tag_setup(struct m_tag *t, u_int32_t cookie, int type, int len) +{ + + t->m_tag_id = type; + t->m_tag_len = len; + t->m_tag_cookie = cookie; +} + +/* + * Reclaim resources associated with a tag. + */ +static __inline void +m_tag_free(struct m_tag *t) +{ + + (*t->m_tag_free)(t); +} + +/* + * Return the first tag associated with an mbuf. + */ +static __inline struct m_tag * +m_tag_first(struct mbuf *m) +{ + + return (SLIST_FIRST(&m->m_pkthdr.tags)); +} + +/* + * Return the next tag in the list of tags associated with an mbuf. + */ +static __inline struct m_tag * +m_tag_next(struct mbuf *m, struct m_tag *t) +{ + NOREF(m); + return (SLIST_NEXT(t, m_tag_link)); +} + +/* + * Prepend a tag to the list of tags associated with an mbuf. + */ +static __inline void +m_tag_prepend(struct mbuf *m, struct m_tag *t) +{ + + SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link); +} + +/* + * Unlink a tag from the list of tags associated with an mbuf. + */ +static __inline void +m_tag_unlink(struct mbuf *m, struct m_tag *t) +{ + + SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link); +} + +/* These are for OpenBSD compatibility. */ +#define MTAG_ABI_COMPAT 0 /* compatibility ABI */ + +static __inline struct m_tag * +m_tag_get(int type, int length, int fWait) +{ + return (m_tag_alloc(MTAG_ABI_COMPAT, type, length, fWait)); +} + +static __inline struct m_tag * +m_tag_find(struct mbuf *m, int type, struct m_tag *start) +{ + return (SLIST_EMPTY(&m->m_pkthdr.tags) ? (struct m_tag *)NULL : + m_tag_locate(m, MTAG_ABI_COMPAT, type, start)); +} + +/* XXX temporary FIB methods probably eventually use tags.*/ +#define M_FIBSHIFT 28 +#define M_FIBMASK 0x0F + +/* get the fib from an mbuf and if it is not set, return the default */ +#define M_GETFIB(_m) \ + ((((_m)->m_flags & M_FIB) >> M_FIBSHIFT) & M_FIBMASK) + +#define M_SETFIB(_m, _fib) do { \ + _m->m_flags &= ~M_FIB; \ + _m->m_flags |= (((_fib) << M_FIBSHIFT) & M_FIB); \ +} while (0) + +#endif /* _KERNEL */ + +#endif /* !_SYS_MBUF_H_ */ diff --git a/src/VBox/Devices/Network/slirp/bsd/sys/sbuf.h b/src/VBox/Devices/Network/slirp/bsd/sys/sbuf.h new file mode 100644 index 00000000..3f59c46f --- /dev/null +++ b/src/VBox/Devices/Network/slirp/bsd/sys/sbuf.h @@ -0,0 +1,95 @@ +/*- + * Copyright (c) 2000 Poul-Henning Kamp and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: src/sys/sys/sbuf.h,v 1.14.18.1.2.1 2009/04/15 03:14:26 kensmith Exp $ + */ + +#ifndef _SYS_SBUF_H_ +#define _SYS_SBUF_H_ + +#ifndef VBOX +#include <sys/_types.h> +#else +# include <iprt/types.h> +#endif + +/* + * Structure definition + */ +struct sbuf { + char *s_buf; /* storage buffer */ + void *s_unused; /* binary compatibility. */ + int s_size; /* size of storage buffer */ + int s_len; /* current length of string */ +#define SBUF_FIXEDLEN 0x00000000 /* fixed length buffer (default) */ +#define SBUF_AUTOEXTEND 0x00000001 /* automatically extend buffer */ +#define SBUF_USRFLAGMSK 0x0000ffff /* mask of flags the user may specify */ +#define SBUF_DYNAMIC 0x00010000 /* s_buf must be freed */ +#define SBUF_FINISHED 0x00020000 /* set by sbuf_finish() */ +#define SBUF_OVERFLOWED 0x00040000 /* sbuf overflowed */ +#define SBUF_DYNSTRUCT 0x00080000 /* sbuf must be freed */ + int s_flags; /* flags */ +}; + +__BEGIN_DECLS +/* + * API functions + */ +struct sbuf *sbuf_new(struct sbuf *, char *, int, int); +#define sbuf_new_auto() \ + sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND) +void sbuf_clear(struct sbuf *); +int sbuf_setpos(struct sbuf *, int); +int sbuf_bcat(struct sbuf *, const void *, size_t); +int sbuf_bcpy(struct sbuf *, const void *, size_t); +int sbuf_cat(struct sbuf *, const char *); +int sbuf_cpy(struct sbuf *, const char *); +#ifndef VBOX +int sbuf_printf(struct sbuf *, const char *, ...) __printflike(2, 3); +int sbuf_vprintf(struct sbuf *, const char *, __va_list) __printflike(2, 0); +#else +int sbuf_printf(struct sbuf *, const char *, ...); +int sbuf_vprintf(struct sbuf *, const char *, va_list); +#endif +int sbuf_putc(struct sbuf *, int); +int sbuf_trim(struct sbuf *); +int sbuf_overflowed(struct sbuf *); +void sbuf_finish(struct sbuf *); +char *sbuf_data(struct sbuf *); +int sbuf_len(struct sbuf *); +int sbuf_done(struct sbuf *); +void sbuf_delete(struct sbuf *); + +#ifdef _KERNEL +struct uio; +struct sbuf *sbuf_uionew(struct sbuf *, struct uio *, int *); +int sbuf_bcopyin(struct sbuf *, const void *, size_t); +int sbuf_copyin(struct sbuf *, const void *, size_t); +#endif +__END_DECLS + +#endif diff --git a/src/VBox/Devices/Network/slirp/cksum.c b/src/VBox/Devices/Network/slirp/cksum.c new file mode 100644 index 00000000..9e15fd89 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/cksum.c @@ -0,0 +1,174 @@ +/* $Id: cksum.c $ */ +/** @file + * NAT - IP checksum generation. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1988, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 + * in_cksum.c,v 1.2 1994/08/02 07:48:16 davidg Exp + */ + +#include <slirp.h> + +/* + * Checksum routine for Internet Protocol family headers (Portable Version). + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + * + * XXX Since we will never span more than 1 mbuf, we can optimise this + */ + +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) +#define REDUCE { l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum); } + +int cksum(struct mbuf *m, int len) +{ + register u_int16_t *w; + register int sum = 0; + register int mlen = 0; + int byte_swapped = 0; + + union + { + u_int8_t c[2]; + u_int16_t s; + } s_util; + union + { + u_int16_t s[2]; + u_int32_t l; + } l_util; + + if (m->m_len == 0) + goto cont; + w = mtod(m, u_int16_t *); + + mlen = m->m_len; + + if (len < mlen) + mlen = len; + len -= mlen; + /* + * Force to even boundary. + */ + if ((1 & (long) w) && (mlen > 0)) + { + REDUCE; + sum <<= 8; + s_util.c[0] = *(u_int8_t *)w; + w = (u_int16_t *)((int8_t *)w + 1); + mlen--; + byte_swapped = 1; + } + /* + * Unroll the loop to make overhead from + * branches &c small. + */ + while ((mlen -= 32) >= 0) + { + sum += w[ 0]; sum += w[ 1]; sum += w[ 2]; sum += w[ 3]; + sum += w[ 4]; sum += w[ 5]; sum += w[ 6]; sum += w[ 7]; + sum += w[ 8]; sum += w[ 9]; sum += w[10]; sum += w[11]; + sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; + w += 16; + } + mlen += 32; + while ((mlen -= 8) >= 0) + { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + w += 4; + } + mlen += 8; + if (mlen == 0 && byte_swapped == 0) + goto cont; + REDUCE; + while ((mlen -= 2) >= 0) + { + sum += *w++; + } + + if (byte_swapped) + { + REDUCE; + sum <<= 8; + byte_swapped = 0; + if (mlen == -1) + { + s_util.c[1] = *(u_int8_t *)w; + sum += s_util.s; + mlen = 0; + } + else + mlen = -1; + } + else if (mlen == -1) + s_util.c[0] = *(u_int8_t *)w; + +cont: +#ifdef DEBUG + if (len) + Log(("cksum: out of data: len = %d\n", len)); +#endif + if (mlen == -1) + { + /* The last mbuf has odd # of bytes. Follow the + standard (the odd byte may be shifted left by 8 bits + or not as determined by endian-ness of the machine) */ + s_util.c[1] = 0; + sum += s_util.s; + } + REDUCE; + return (~sum & 0xffff); +} diff --git a/src/VBox/Devices/Network/slirp/counters.h b/src/VBox/Devices/Network/slirp/counters.h new file mode 100644 index 00000000..7caae948 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/counters.h @@ -0,0 +1,149 @@ +/** $Id: counters.h $ */ +/** @file + * Counters macro invocation template. + * + * This is included with different PROFILE_COUNTER and COUNTING_COUNTER + * implementations to instantiate data members, create function prototypes and + * implement these prototypes. + */ + +/* + * Copyright (C) 2007-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * COUNTERS_INIT is used before using counters.h to declare helping macro + * definitions for (de-)registering counters + */ +#ifndef COUNTERS_H +# define COUNTERS_H +# if defined(VBOX_WITH_STATISTICS) +# define REGISTER_COUNTER(name, storage, type, units, dsc) \ + do { \ + PDMDrvHlpSTAMRegisterF(pDrvIns, \ + &(storage)->Stat ## name, \ + type, \ + STAMVISIBILITY_ALWAYS, \ + units, \ + dsc, \ + "/Drivers/NAT%u/" #name, \ + pDrvIns->iInstance); \ + } while (0) +# define DEREGISTER_COUNTER(name, storage) PDMDrvHlpSTAMDeregister(pDrvIns, &(storage)->Stat ## name) +# else +# define REGISTER_COUNTER(name, storage, type, units, dsc) do {} while (0) +# define DEREGISTER_COUNTER(name, storage) do {} while (0) +# endif +#else +# undef COUNTERS_INIT +#endif + +#ifndef COUNTERS_INIT +# if !defined(PROFILE_COUNTER) && !defined(DRV_PROFILE_COUNTER) +# error (DRV_)PROFILE_COUNTER is not defied +# endif +# if !defined(COUNTING_COUNTER) && !defined(DRV_COUNTING_COUNTER) +# error (DRV_)COUNTING_COUNTER is not defined +# endif + +/* + * DRV_ prefixed are counters used in DrvNAT the rest are used in Slirp + */ + +# if defined(PROFILE_COUNTER) || defined(COUNTING_COUNTER) +PROFILE_COUNTER(Fill, "Profiling slirp fills"); +PROFILE_COUNTER(Poll, "Profiling slirp polls"); +PROFILE_COUNTER(FastTimer, "Profiling slirp fast timer"); +PROFILE_COUNTER(SlowTimer, "Profiling slirp slow timer"); +PROFILE_COUNTER(IOwrite, "Profiling IO sowrite"); +PROFILE_COUNTER(IOread, "Profiling IO soread"); + +COUNTING_COUNTER(TCP, "TCP sockets"); +COUNTING_COUNTER(TCPHot, "TCP sockets active"); +COUNTING_COUNTER(UDP, "UDP sockets"); +COUNTING_COUNTER(UDPHot, "UDP sockets active"); + +COUNTING_COUNTER(IORead_in_1, "SB IORead_in_1"); +COUNTING_COUNTER(IORead_in_1_bytes, "SB IORead_in_1_bytes"); +COUNTING_COUNTER(IORead_in_2, "SB IORead_in_2"); +COUNTING_COUNTER(IORead_in_2_1st_bytes, "SB IORead_in_2_1st_bytes"); +COUNTING_COUNTER(IORead_in_2_2nd_bytes, "SB IORead_in_2_2nd_bytes"); +COUNTING_COUNTER(IOWrite_in_1, "SB IOWrite_in_1"); +COUNTING_COUNTER(IOWrite_in_1_bytes, "SB IOWrite_in_1_bytes"); +COUNTING_COUNTER(IOWrite_in_2, "SB IOWrite_in_2"); +COUNTING_COUNTER(IOWrite_in_2_1st_bytes, "SB IOWrite_in_2_1st_bytes"); +COUNTING_COUNTER(IOWrite_in_2_2nd_bytes, "SB IOWrite_in_2_2nd_bytes"); +COUNTING_COUNTER(IOWrite_no_w, "SB IOWrite_no_w"); +COUNTING_COUNTER(IOWrite_rest, "SB IOWrite_rest"); +COUNTING_COUNTER(IOWrite_rest_bytes, "SB IOWrite_rest_bytes"); + +PROFILE_COUNTER(IOSBAppend_pf, "Profiling sbuf::append common"); +PROFILE_COUNTER(IOSBAppend_pf_wa, "Profiling sbuf::append all writen in network"); +PROFILE_COUNTER(IOSBAppend_pf_wf, "Profiling sbuf::append writen fault"); +PROFILE_COUNTER(IOSBAppend_pf_wp, "Profiling sbuf::append writen partly"); +COUNTING_COUNTER(IOSBAppend, "SB: Append total"); +COUNTING_COUNTER(IOSBAppend_wa, "SB: Append all is written to network "); +COUNTING_COUNTER(IOSBAppend_wf, "SB: Append nothing is written"); +COUNTING_COUNTER(IOSBAppend_wp, "SB: Append is written partly"); +COUNTING_COUNTER(IOSBAppend_zm, "SB: Append mbuf is zerro or less"); + +COUNTING_COUNTER(IOSBAppendSB, "SB: AppendSB total"); +COUNTING_COUNTER(IOSBAppendSB_w_l_r, "SB: AppendSB (sb_wptr < sb_rptr)"); +COUNTING_COUNTER(IOSBAppendSB_w_ge_r, "SB: AppendSB (sb_wptr >= sb_rptr)"); +COUNTING_COUNTER(IOSBAppendSB_w_alter, "SB: AppendSB (altering of sb_wptr)"); +COUNTING_COUNTER(MBufAllocation,"MBUF::shows number of mbufs in used list"); + +COUNTING_COUNTER(TCP_retransmit, "TCP::retransmit"); + +PROFILE_COUNTER(TCP_reassamble, "TCP::reasamble"); +PROFILE_COUNTER(TCP_input, "TCP::input"); +PROFILE_COUNTER(IP_input, "IP::input"); +PROFILE_COUNTER(IP_output, "IP::output"); +PROFILE_COUNTER(IF_encap, "IF::encap"); +PROFILE_COUNTER(ALIAS_input, "ALIAS::input"); +PROFILE_COUNTER(ALIAS_output, "ALIAS::output"); + +# else +/*DrvNAT.cpp*/ +DRV_COUNTING_COUNTER(NATRecvWakeups, "counting wakeups of NAT RX thread"); +DRV_PROFILE_COUNTER(NATRecv,"Time spent in NATRecv worker"); +DRV_PROFILE_COUNTER(NATRecvWait,"Time spent in NATRecv worker in waiting of free RX buffers"); +DRV_COUNTING_COUNTER(QueuePktSent, "counting packet sent via PDM Queue"); +DRV_COUNTING_COUNTER(QueuePktDropped, "counting packet drops by PDM Queue"); +DRV_COUNTING_COUNTER(ConsumerFalse, "counting consumer's reject number to process the queue's item"); +# endif +#endif /*!COUNTERS_INIT*/ + +#ifdef DRV_COUNTING_COUNTER +# undef DRV_COUNTING_COUNTER +#endif + +#ifdef DRV_PROFILE_COUNTER +# undef DRV_PROFILE_COUNTER +#endif + +#ifdef COUNTING_COUNTER +# undef COUNTING_COUNTER +#endif + +#ifdef PROFILE_COUNTER +# undef PROFILE_COUNTER +#endif diff --git a/src/VBox/Devices/Network/slirp/ctl.h b/src/VBox/Devices/Network/slirp/ctl.h new file mode 100644 index 00000000..70fc4d3f --- /dev/null +++ b/src/VBox/Devices/Network/slirp/ctl.h @@ -0,0 +1,52 @@ +/* $Id: ctl.h $ */ +/** @file + * NAT - IP subnet constants. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef _SLIRP_CTL_H_ +#define _SLIRP_CTL_H_ + +#define CTL_CMD 0 +#define CTL_EXEC 1 +#define CTL_ALIAS 2 +#define CTL_DNS 3 +#define CTL_TFTP 4 +#define CTL_GUEST 15 +#define CTL_BROADCAST 255 + + +#define CTL_CHECK_NETWORK(x) (((x) & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr) + +#define CTL_CHECK(x, ctl) ( ((RT_N2H_U32((x)) & ~pData->netmask) == (ctl)) \ + && CTL_CHECK_NETWORK(x)) + +#define CTL_CHECK_MINE(x) ( CTL_CHECK(x, CTL_ALIAS) \ + || CTL_CHECK(x, CTL_DNS) \ + || CTL_CHECK(x, CTL_TFTP)) + +#define CTL_CHECK_BROADCAST(x) CTL_CHECK((x), ~pData->netmask) + + +#endif /* _SLIRP_CTL_H_ */ diff --git a/src/VBox/Devices/Network/slirp/debug.c b/src/VBox/Devices/Network/slirp/debug.c new file mode 100644 index 00000000..81426436 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/debug.c @@ -0,0 +1,679 @@ +/* $Id: debug.c $ */ +/** @file + * NAT - debug helpers. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/* + * This code is based on: + * + * Copyright (c) 1995 Danny Gasparovski. + * Portions copyright (c) 2000 Kelly Price. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> +#include <iprt/string.h> +#include <iprt/stream.h> +#include <iprt/critsect.h> +#include "zone.h" + +#ifdef DEBUG +void dump_packet(void *, int); +#endif + +#ifndef STRINGIFY +# define STRINGIFY(x) #x +#endif + +static char *g_apszTcpStates[TCP_NSTATES] = +{ + STRINGIFY(TCPS_CLOSED), + STRINGIFY(TCPS_LISTEN), + STRINGIFY(TCPS_SYN_SENT), + STRINGIFY(TCPS_SYN_RECEIVED), + STRINGIFY(TCPS_ESTABLISHED), + STRINGIFY(TCPS_CLOSE_WAIT), + STRINGIFY(TCPS_FIN_WAIT_1), + STRINGIFY(TCPS_CLOSING), + STRINGIFY(TCPS_LAST_ACK), + STRINGIFY(TCPS_FIN_WAIT_2), + STRINGIFY(TCPS_TIME_WAIT) +}; + +typedef struct DEBUGSTRSOCKETSTATE +{ + uint32_t u32SocketState; + const char *pcszSocketStateName; +} DEBUGSTRSOCKETSTATE; + +#define DEBUGSTRSOCKETSTATE_HELPER(x) {(x), #x} + +static DEBUGSTRSOCKETSTATE g_apszSocketStates[8] = +{ + DEBUGSTRSOCKETSTATE_HELPER(SS_NOFDREF), + DEBUGSTRSOCKETSTATE_HELPER(SS_ISFCONNECTING), + DEBUGSTRSOCKETSTATE_HELPER(SS_ISFCONNECTED), + DEBUGSTRSOCKETSTATE_HELPER(SS_FCANTRCVMORE), + DEBUGSTRSOCKETSTATE_HELPER(SS_FCANTSENDMORE), + DEBUGSTRSOCKETSTATE_HELPER(SS_FWDRAIN), + DEBUGSTRSOCKETSTATE_HELPER(SS_FACCEPTCONN), + DEBUGSTRSOCKETSTATE_HELPER(SS_FACCEPTONCE), +}; + +static DEBUGSTRSOCKETSTATE g_aTcpFlags[] = +{ + DEBUGSTRSOCKETSTATE_HELPER(TH_FIN), + DEBUGSTRSOCKETSTATE_HELPER(TH_SYN), + DEBUGSTRSOCKETSTATE_HELPER(TH_RST), + DEBUGSTRSOCKETSTATE_HELPER(TH_PUSH), + DEBUGSTRSOCKETSTATE_HELPER(TH_ACK), + DEBUGSTRSOCKETSTATE_HELPER(TH_URG), +}; + +/* + * Dump a packet in the same format as tcpdump -x + */ +#ifdef DEBUG +void +dump_packet(void *dat, int n) +{ + Log(("nat: PACKET DUMPED:\n%.*Rhxd\n", n, dat)); +} +#endif + +#ifdef LOG_ENABLED +static void +lprint(const char *pszFormat, ...) +{ + va_list args; + va_start(args, pszFormat); + RTLogPrintfV(pszFormat, args); + va_end(args); +} + +void +ipstats(PNATState pData) +{ + lprint("\n"); + + lprint("IP stats:\n"); + lprint(" %6d total packets received (%d were unaligned)\n", + ipstat.ips_total, ipstat.ips_unaligned); + lprint(" %6d with incorrect version\n", ipstat.ips_badvers); + lprint(" %6d with bad header checksum\n", ipstat.ips_badsum); + lprint(" %6d with length too short (len < sizeof(iphdr))\n", ipstat.ips_tooshort); + lprint(" %6d with length too small (len < ip->len)\n", ipstat.ips_toosmall); + lprint(" %6d with bad header length\n", ipstat.ips_badhlen); + lprint(" %6d with bad packet length\n", ipstat.ips_badlen); + lprint(" %6d fragments received\n", ipstat.ips_fragments); + lprint(" %6d fragments dropped\n", ipstat.ips_fragdropped); + lprint(" %6d fragments timed out\n", ipstat.ips_fragtimeout); + lprint(" %6d packets reassembled ok\n", ipstat.ips_reassembled); + lprint(" %6d outgoing packets fragmented\n", ipstat.ips_fragmented); + lprint(" %6d total outgoing fragments\n", ipstat.ips_ofragments); + lprint(" %6d with bad protocol field\n", ipstat.ips_noproto); + lprint(" %6d total packets delivered\n", ipstat.ips_delivered); +} + +void +tcpstats(PNATState pData) +{ + lprint("\n"); + + lprint("TCP stats:\n"); + + lprint(" %6d packets sent\n", tcpstat.tcps_sndtotal); + lprint(" %6d data packets (%d bytes)\n", + tcpstat.tcps_sndpack, tcpstat.tcps_sndbyte); + lprint(" %6d data packets retransmitted (%d bytes)\n", + tcpstat.tcps_sndrexmitpack, tcpstat.tcps_sndrexmitbyte); + lprint(" %6d ack-only packets (%d delayed)\n", + tcpstat.tcps_sndacks, tcpstat.tcps_delack); + lprint(" %6d URG only packets\n", tcpstat.tcps_sndurg); + lprint(" %6d window probe packets\n", tcpstat.tcps_sndprobe); + lprint(" %6d window update packets\n", tcpstat.tcps_sndwinup); + lprint(" %6d control (SYN/FIN/RST) packets\n", tcpstat.tcps_sndctrl); + lprint(" %6d times tcp_output did nothing\n", tcpstat.tcps_didnuttin); + + lprint(" %6d packets received\n", tcpstat.tcps_rcvtotal); + lprint(" %6d acks (for %d bytes)\n", + tcpstat.tcps_rcvackpack, tcpstat.tcps_rcvackbyte); + lprint(" %6d duplicate acks\n", tcpstat.tcps_rcvdupack); + lprint(" %6d acks for unsent data\n", tcpstat.tcps_rcvacktoomuch); + lprint(" %6d packets received in sequence (%d bytes)\n", + tcpstat.tcps_rcvpack, tcpstat.tcps_rcvbyte); + lprint(" %6d completely duplicate packets (%d bytes)\n", + tcpstat.tcps_rcvduppack, tcpstat.tcps_rcvdupbyte); + + lprint(" %6d packets with some duplicate data (%d bytes duped)\n", + tcpstat.tcps_rcvpartduppack, tcpstat.tcps_rcvpartdupbyte); + lprint(" %6d out-of-order packets (%d bytes)\n", + tcpstat.tcps_rcvoopack, tcpstat.tcps_rcvoobyte); + lprint(" %6d packets of data after window (%d bytes)\n", + tcpstat.tcps_rcvpackafterwin, tcpstat.tcps_rcvbyteafterwin); + lprint(" %6d window probes\n", tcpstat.tcps_rcvwinprobe); + lprint(" %6d window update packets\n", tcpstat.tcps_rcvwinupd); + lprint(" %6d packets received after close\n", tcpstat.tcps_rcvafterclose); + lprint(" %6d discarded for bad checksums\n", tcpstat.tcps_rcvbadsum); + lprint(" %6d discarded for bad header offset fields\n", + tcpstat.tcps_rcvbadoff); + + lprint(" %6d connection requests\n", tcpstat.tcps_connattempt); + lprint(" %6d connection accepts\n", tcpstat.tcps_accepts); + lprint(" %6d connections established (including accepts)\n", tcpstat.tcps_connects); + lprint(" %6d connections closed (including %d drop)\n", + tcpstat.tcps_closed, tcpstat.tcps_drops); + lprint(" %6d embryonic connections dropped\n", tcpstat.tcps_conndrops); + lprint(" %6d segments we tried to get rtt (%d succeeded)\n", + tcpstat.tcps_segstimed, tcpstat.tcps_rttupdated); + lprint(" %6d retransmit timeouts\n", tcpstat.tcps_rexmttimeo); + lprint(" %6d connections dropped by rxmt timeout\n", + tcpstat.tcps_timeoutdrop); + lprint(" %6d persist timeouts\n", tcpstat.tcps_persisttimeo); + lprint(" %6d keepalive timeouts\n", tcpstat.tcps_keeptimeo); + lprint(" %6d keepalive probes sent\n", tcpstat.tcps_keepprobe); + lprint(" %6d connections dropped by keepalive\n", tcpstat.tcps_keepdrops); + lprint(" %6d correct ACK header predictions\n", tcpstat.tcps_predack); + lprint(" %6d correct data packet header predictions\n", tcpstat.tcps_preddat); + lprint(" %6d TCP cache misses\n", tcpstat.tcps_socachemiss); + +/* lprint(" Packets received too short: %d\n", tcpstat.tcps_rcvshort); */ +/* lprint(" Segments dropped due to PAWS: %d\n", tcpstat.tcps_pawsdrop); */ + +} + +void +udpstats(PNATState pData) +{ + lprint("\n"); + + lprint("UDP stats:\n"); + lprint(" %6d datagrams received\n", udpstat.udps_ipackets); + lprint(" %6d with packets shorter than header\n", udpstat.udps_hdrops); + lprint(" %6d with bad checksums\n", udpstat.udps_badsum); + lprint(" %6d with data length larger than packet\n", udpstat.udps_badlen); + lprint(" %6d UDP socket cache misses\n", udpstat.udpps_pcbcachemiss); + lprint(" %6d datagrams sent\n", udpstat.udps_opackets); +} + +void +icmpstats(PNATState pData) +{ + lprint("\n"); + lprint("ICMP stats:\n"); + lprint(" %6d ICMP packets received\n", icmpstat.icps_received); + lprint(" %6d were too short\n", icmpstat.icps_tooshort); + lprint(" %6d with bad checksums\n", icmpstat.icps_checksum); + lprint(" %6d with type not supported\n", icmpstat.icps_notsupp); + lprint(" %6d with bad type feilds\n", icmpstat.icps_badtype); + lprint(" %6d ICMP packets sent in reply\n", icmpstat.icps_reflect); +} + +void +mbufstats(PNATState pData) +{ + /* + * (vvl) this static code can't work with mbuf zone anymore + * @todo: make statistic correct + */ + NOREF(pData); +} + +void +sockstats(PNATState pData) +{ + char buff[256]; + size_t n; + struct socket *so, *so_next; + + lprint("\n"); + + lprint( + "Proto[state] Sock Local Address, Port Remote Address, Port RecvQ SendQ\n"); + + QSOCKET_FOREACH(so, so_next, tcp) + /* { */ + n = RTStrPrintf(buff, sizeof(buff), "tcp[%s]", so->so_tcpcb?tcpstates[so->so_tcpcb->t_state]:"NONE"); + while (n < 17) + buff[n++] = ' '; + buff[17] = 0; + lprint("%s %3d %15s %5d ", + buff, so->s, inet_ntoa(so->so_laddr), RT_N2H_U16(so->so_lport)); + lprint("%15s %5d %5d %5d\n", + inet_ntoa(so->so_faddr), RT_N2H_U16(so->so_fport), + SBUF_LEN(&so->so_rcv), SBUF_LEN(&so->so_snd)); + LOOP_LABEL(tcp, so, so_next); + } + + QSOCKET_FOREACH(so, so_next, udp) + /* { */ + n = RTStrPrintf(buff, sizeof(buff), "udp[%d sec]", (so->so_expire - curtime) / 1000); + while (n < 17) + buff[n++] = ' '; + buff[17] = 0; + lprint("%s %3d %15s %5d ", + buff, so->s, inet_ntoa(so->so_laddr), RT_N2H_U16(so->so_lport)); + lprint("%15s %5d %5d %5d\n", + inet_ntoa(so->so_faddr), RT_N2H_U16(so->so_fport), + SBUF_LEN(&so->so_rcv), SBUF_LEN(&so->so_snd)); + LOOP_LABEL(udp, so, so_next); + } +} +#endif + +static DECLCALLBACK(size_t) +printSocket(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, void const *pvValue, + int cchWidth, int cchPrecision, unsigned fFlags, + void *pvUser) +{ + struct socket *so = (struct socket*)pvValue; + PNATState pData = (PNATState)pvUser; + size_t cb = 0; + + NOREF(cchWidth); + NOREF(cchPrecision); + NOREF(fFlags); + Assert(pData); + + AssertReturn(strcmp(pszType, "natsock") == 0, 0); + + if (so == NULL) + return RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, + "socket is null"); + if (so->s == -1) + return RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, + "socket(%d)", so->s); + + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, + "socket %d", so->s); + + if (so->so_type == IPPROTO_TCP) + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, + " (tcp)"); + else if (so->so_type == IPPROTO_UDP) + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, + " (udp)"); + else + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, + " (proto %u)", so->so_type); + + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, + " exp. in %d" + " state=%R[natsockstate]" + "%s" /* fUnderPolling */ + "%s" /* fShouldBeRemoved */ + " f_(addr:port)=%RTnaipv4:%d" + " l_(addr:port)=%RTnaipv4:%d", + so->so_expire ? so->so_expire - curtime : 0, + so->so_state, + so->fUnderPolling ? " fUnderPolling" : "", + so->fShouldBeRemoved ? " fShouldBeRemoved" : "", + so->so_faddr.s_addr, + RT_N2H_U16(so->so_fport), + so->so_laddr.s_addr, + RT_N2H_U16(so->so_lport)); + + if (so->s != -1) + { + struct sockaddr addr; + socklen_t socklen; + int status; + + socklen = sizeof(addr); + status = getsockname(so->s, &addr, &socklen); + + if (status != 0) + { + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, + " (getsockname failed)"); + } + else if (addr.sa_family != AF_INET) + { + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, + " (unexpected address family %d)", + addr.sa_family); + } + else + { + struct sockaddr_in *in_addr = (struct sockaddr_in *)&addr; + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, + " name=%RTnaipv4:%d", + in_addr->sin_addr.s_addr, + RT_N2H_U16(in_addr->sin_port)); + } + } + return cb; +} + +static DECLCALLBACK(size_t) +printNATSocketState(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, void const *pvValue, + int cchWidth, int cchPrecision, unsigned fFlags, + void *pvUser) +{ + uint32_t u32SocketState = (uint32_t)(uintptr_t)pvValue; + int idxNATState = 0; + bool fFirst = true; + size_t cbReturn = 0; + NOREF(cchWidth); + NOREF(cchPrecision); + NOREF(fFlags); + NOREF(pvUser); + AssertReturn(strcmp(pszType, "natsockstate") == 0, 0); + + for (idxNATState = 0; idxNATState < RT_ELEMENTS(g_apszSocketStates); ++idxNATState) + { + if (u32SocketState & g_apszSocketStates[idxNATState].u32SocketState) + { + if (fFirst) + { + cbReturn += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, g_apszSocketStates[idxNATState].pcszSocketStateName); + fFirst = false; + } + else + cbReturn += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "|%s", g_apszSocketStates[idxNATState].pcszSocketStateName); + } + } + + if (!cbReturn) + return RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "[unknown state %RX32]", u32SocketState); + + return cbReturn; +} + +/** + * Print callback dumping TCP Control Block in terms of RFC 793. + */ +static DECLCALLBACK(size_t) +printTcpcbRfc793(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, void const *pvValue, + int cchWidth, int cchPrecision, unsigned fFlags, + void *pvUser) +{ + size_t cb = 0; + const struct tcpcb *tp = (const struct tcpcb *)pvValue; + NOREF(cchWidth); + NOREF(cchPrecision); + NOREF(fFlags); + NOREF(pvUser); + AssertReturn(RTStrCmp(pszType, "tcpcb793") == 0, 0); + if (tp) + { + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "TCB793[ state:%R[tcpstate] SND(UNA: %x, NXT: %x, UP: %x, WND: %x, WL1:%x, WL2:%x, ISS:%x), ", + tp->t_state, tp->snd_una, tp->snd_nxt, tp->snd_up, tp->snd_wnd, tp->snd_wl1, tp->snd_wl2, tp->iss); + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "RCV(WND: %x, NXT: %x, UP: %x, IRS:%x)]", tp->rcv_wnd, tp->rcv_nxt, tp->rcv_up, tp->irs); + } + else + { + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "TCB793[ NULL ]"); + } + return cb; +} +/* + * Prints TCP segment in terms of RFC 793. + */ +static DECLCALLBACK(size_t) +printTcpSegmentRfc793(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, void const *pvValue, + int cchWidth, int cchPrecision, unsigned fFlags, + void *pvUser) +{ + size_t cb = 0; + const struct tcpiphdr *ti = (const struct tcpiphdr *)pvValue; + NOREF(cchWidth); + NOREF(cchPrecision); + NOREF(fFlags); + NOREF(pvUser); + AssertReturn(RTStrCmp(pszType, "tcpseg793") == 0 && ti, 0); + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "SEG[ACK: %x, SEQ: %x, LEN: %x, WND: %x, UP: %x]", + ti->ti_ack, ti->ti_seq, ti->ti_len, ti->ti_win, ti->ti_urp); + return cb; +} + +/* + * Prints TCP state + */ +static DECLCALLBACK(size_t) +printTcpState(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, void const *pvValue, + int cchWidth, int cchPrecision, unsigned fFlags, + void *pvUser) +{ + size_t cb = 0; + const int idxTcpState = (int)(uintptr_t)pvValue; + char *pszTcpStateName = (idxTcpState >= 0 && idxTcpState < TCP_NSTATES) ? g_apszTcpStates[idxTcpState] : "TCPS_INVALIDE_STATE"; + NOREF(cchWidth); + NOREF(cchPrecision); + NOREF(fFlags); + NOREF(pvUser); + AssertReturn(RTStrCmp(pszType, "tcpstate") == 0, 0); + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "%s", pszTcpStateName); + return cb; +} + +/* + * Prints TCP flags + */ +static DECLCALLBACK(size_t) +printTcpFlags(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, void const *pvValue, + int cchWidth, int cchPrecision, unsigned fFlags, + void *pvUser) +{ + size_t cbPrint = 0; + uint32_t u32TcpFlags = (uint32_t)(uintptr_t)pvValue; + bool fSingleValue = true; + int idxTcpFlags = 0; + NOREF(cchWidth); + NOREF(cchPrecision); + NOREF(fFlags); + NOREF(pvUser); + AssertReturn(RTStrCmp(pszType, "tcpflags") == 0, 0); + cbPrint += RTStrFormat(pfnOutput, + pvArgOutput, + NULL, + 0, + "tcpflags: %RX8 [", (uint8_t)u32TcpFlags); + for (idxTcpFlags = 0; idxTcpFlags < RT_ELEMENTS(g_aTcpFlags); ++idxTcpFlags) + { + if (u32TcpFlags & g_aTcpFlags[idxTcpFlags].u32SocketState) + { + cbPrint += RTStrFormat(pfnOutput, + pvArgOutput, + NULL, + 0, + fSingleValue ? "%s(%RX8)" : "|%s(%RX8)", + g_aTcpFlags[idxTcpFlags].pcszSocketStateName, + (uint8_t)g_aTcpFlags[idxTcpFlags].u32SocketState); + fSingleValue = false; + } + } + cbPrint += RTStrFormat(pfnOutput, + pvArgOutput, + NULL, + 0, + "]"); + return cbPrint; +} + +/* + * Prints sbuf state + */ +static DECLCALLBACK(size_t) +printSbuf(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, void const *pvValue, + int cchWidth, int cchPrecision, unsigned fFlags, + void *pvUser) +{ + size_t cb = 0; + const struct sbuf *sb = (struct sbuf *)pvValue; + NOREF(cchWidth); + NOREF(cchPrecision); + NOREF(fFlags); + NOREF(pvUser); + AssertReturn(RTStrCmp(pszType, "sbuf") == 0, 0); + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "[sbuf:%p cc:%d, datalen:%d, wprt:%p, rptr:%p data:%p]", + sb, sb->sb_cc, sb->sb_datalen, sb->sb_wptr, sb->sb_rptr, sb->sb_data); + return cb; +} + +/* + * Prints zone state + */ +static DECLCALLBACK(size_t) +printMbufZone(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, void const *pvValue, + int cchWidth, int cchPrecision, unsigned fFlags, + void *pvUser) +{ + size_t cb = 0; + const uma_zone_t zone = (const uma_zone_t)pvValue; + NOREF(cchWidth); + NOREF(cchPrecision); + NOREF(fFlags); + NOREF(pvUser); + AssertReturn(RTStrCmp(pszType, "mzone") == 0, 0); + if (!zone) + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "[zone:NULL]"); + else + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "[zone:%p name:%s, master_zone:%R[mzone]]", + zone, zone->name, zone->master_zone); + return cb; +} + +/* + * Prints zone's item state + */ +static DECLCALLBACK(size_t) +printMbufZoneItem(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, void const *pvValue, + int cchWidth, int cchPrecision, unsigned fFlags, + void *pvUser) +{ + size_t cb = 0; + const struct item *it = (const struct item *)pvValue; + NOREF(cchWidth); + NOREF(cchPrecision); + NOREF(fFlags); + NOREF(pvUser); + AssertReturn(RTStrCmp(pszType, "mzoneitem") == 0, 0); + if (!it) + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "[item:NULL]"); + else + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "[iptem:%p ref_count:%d, zone:%R[mzone]]", + it, it->ref_count, it->zone); + return cb; +} + +static DECLCALLBACK(size_t) +print_networkevents(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char *pszType, void const *pvValue, + int cchWidth, int cchPrecision, unsigned fFlags, + void *pvUser) +{ + size_t cb = 0; +#ifdef RT_OS_WINDOWS + WSANETWORKEVENTS *pNetworkEvents = (WSANETWORKEVENTS*)pvValue; + bool fDelim = false; +#endif + + NOREF(cchWidth); + NOREF(cchPrecision); + NOREF(fFlags); + NOREF(pvUser); + +#ifdef RT_OS_WINDOWS + AssertReturn(strcmp(pszType, "natwinnetevents") == 0, 0); + + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, "events=%02x (", + pNetworkEvents->lNetworkEvents); +# define DO_BIT(bit) \ + if (pNetworkEvents->lNetworkEvents & FD_ ## bit) \ + { \ + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, \ + "%s" #bit "(%d)", fDelim ? "," : "", \ + pNetworkEvents->iErrorCode[FD_ ## bit ## _BIT]); \ + fDelim = true; \ + } + DO_BIT(READ); + DO_BIT(WRITE); + DO_BIT(OOB); + DO_BIT(ACCEPT); + DO_BIT(CONNECT); + DO_BIT(CLOSE); + DO_BIT(QOS); +# undef DO_BIT + cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, 0, ")"); +#else + NOREF(pfnOutput); + NOREF(pvArgOutput); + NOREF(pszType); + NOREF(pvValue); +#endif + return cb; +} + +#if 0 +/* + * Debugging + */ +int errno_func(const char *file, int line) +{ + int err = WSAGetLastError(); + LogRel(("errno=%d (%s:%d)\n", err, file, line)); + return err; +} +#endif + +int +debug_init(PNATState pData) +{ + int rc = VINF_SUCCESS; + + static int g_fFormatRegistered; + + if (!g_fFormatRegistered) + { + + rc = RTStrFormatTypeRegister("natsock", printSocket, pData); AssertRC(rc); + rc = RTStrFormatTypeRegister("natsockstate", printNATSocketState, NULL); AssertRC(rc); + rc = RTStrFormatTypeRegister("natwinnetevents", + print_networkevents, NULL); AssertRC(rc); + rc = RTStrFormatTypeRegister("tcpcb793", printTcpcbRfc793, NULL); AssertRC(rc); + rc = RTStrFormatTypeRegister("tcpseg793", printTcpSegmentRfc793, NULL); AssertRC(rc); + rc = RTStrFormatTypeRegister("tcpstate", printTcpState, NULL); AssertRC(rc); + rc = RTStrFormatTypeRegister("tcpflags", printTcpFlags, NULL); AssertRC(rc); + rc = RTStrFormatTypeRegister("sbuf", printSbuf, NULL); AssertRC(rc); + rc = RTStrFormatTypeRegister("mzone", printMbufZone, NULL); AssertRC(rc); + rc = RTStrFormatTypeRegister("mzoneitem", printMbufZoneItem, NULL); AssertRC(rc); + g_fFormatRegistered = 1; + } + + return rc; +} diff --git a/src/VBox/Devices/Network/slirp/debug.h b/src/VBox/Devices/Network/slirp/debug.h new file mode 100644 index 00000000..b0b08b5d --- /dev/null +++ b/src/VBox/Devices/Network/slirp/debug.h @@ -0,0 +1,77 @@ +/* $Id: debug.h $ */ +/** @file + * NAT - debug helpers (declarations/defines). + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef _DEBUG_H_ +#define _DEBUG_H_ + +#include <VBox/log.h> +/* we've excluded stdio.h */ +#define FILE void + +int debug_init (PNATState); +void ipstats (PNATState); +void tcpstats (PNATState); +void udpstats (PNATState); +void icmpstats (PNATState); +void mbufstats (PNATState); +void sockstats (PNATState); + +#ifdef LOG_ENABLED +# define TCP_STATE_SWITCH_TO(tp, new_tcp_state) \ + do { \ + Log2(("%R[tcpcb793] switch to %R[tcpstate] -> %R[tcpstate]\n", (tp), (tp->t_state) ,(new_tcp_state))); \ + if ((tp)->t_socket) \ + Log2(("%R[tcpcb793] %R[natsock]\n", (tp), (tp)->t_socket)); \ + (tp)->t_state = (new_tcp_state); \ + } while (0) +#else +# define TCP_STATE_SWITCH_TO(tp, new_tcp_state) (tp)->t_state = (new_tcp_state) +#endif + +/* TCP CB state validity macro definitions + * we need to be sure that TCP is in right state. + * TCP_ACCEPTABLE_STATEX(tp, (X-states here)) + */ +#ifdef DEBUG_vvl +# define TCP_ACCEPTABLE_STATE1(tp, tcp_state1) Assert((tp)->t_state == (tcp_state)) +# define TCP_ACCEPTABLE_STATE2(tp, tcp_state1, tcp_state2) \ + Assert( (tp)->t_state == (tcp_state1) \ + || (tp)->t_state == (tcp_state2) ); \ +#else +# define TCP_ACCEPTABLE_STATE1(tp, tcp_state1) do { } while(0) +# define TCP_ACCEPTABLE_STATE2(tp, tcp_state1, tcp_state2) do { } while(0) +#endif +#endif diff --git a/src/VBox/Devices/Network/slirp/dnsproxy/dnsproxy.c b/src/VBox/Devices/Network/slirp/dnsproxy/dnsproxy.c new file mode 100644 index 00000000..0e478b10 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/dnsproxy/dnsproxy.c @@ -0,0 +1,764 @@ +/* $Id: dnsproxy.c $ */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * Copyright (c) 2003,2004,2005 Armin Wolfermann + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef VBOX +#include <config.h> +#include <errno.h> +#include <pwd.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#else +#include "slirp.h" +#endif + +#ifndef VBOX +#define GLOBALS 1 +#include "dnsproxy.h" + +#define RD(x) (*(x + 2) & 0x01) +#define MAX_BUFSPACE 512 + +static unsigned short queryid = 0; +#define QUERYID queryid++ + +static struct sockaddr_in authoritative_addr; +static struct sockaddr_in recursive_addr; +static int sock_query; +static int sock_answer; +static int dnsproxy_sig; + +extern int event_gotsig; +extern int (*event_sigcb)(void); + +#ifdef DEBUG +char *malloc_options = "AGZ"; +#endif + +/* signal_handler -- Native signal handler. Set external flag for libevent + * and store type of signal. Real signal handling is done in signal_event. + */ + +RETSIGTYPE +signal_handler(int sig) +{ + event_gotsig = 1; + dnsproxy_sig = sig; +} + +/* signal_event -- Called by libevent to deliver a signal. + */ + +int +signal_event(void) +{ + fatal("exiting on signal %d", dnsproxy_sig); + return 0; +} + +#else /* VBOX */ + +# define RD(x) (*(x + 2) & 0x01) + +# define QUERYID queryid++ + +#endif +/* timeout -- Called by the event loop when a query times out. Removes the + * query from the queue. + */ +/* ARGSUSED */ +#ifndef VBOX +static void +timeout(int fd, short event, void *arg) +{ + /* here we should check if we reached the end of the DNS server list */ + hash_remove_request(pData, (struct request *)arg); + free((struct request *)arg); + ++removed_queries; +} +#else /* VBOX */ +static void +timeout(PNATState pData, struct socket *so, void *arg) +{ + struct request *req = (struct request *)arg; + struct dns_entry *de; + /* be paranoid */ + AssertPtrReturnVoid(arg); + + if ( req->dnsgen != pData->dnsgen + || req->dns_server == NULL + || (de = TAILQ_PREV(req->dns_server, dns_list_head, de_list)) == NULL) + { + if (req->dnsgen != pData->dnsgen) + { + /* XXX: Log2 */ + LogRel(("NAT: dnsproxy: timeout: req %p dnsgen %u != %u on %R[natsock]\n", + req, req->dnsgen, pData->dnsgen, so)); + } + hash_remove_request(pData, req); + RTMemFree(req); + ++removed_queries; + /* the rest of clean up at the end of the method. */ + } + else + { + struct ip *ip; + struct udphdr *udp; + int iphlen; + struct mbuf *m = NULL; + char *data; + + m = slirpDnsMbufAlloc(pData); + if (m == NULL) + { + LogRel(("NAT: Can't allocate mbuf\n")); + goto socket_clean_up; + } + + /* mbuf initialization */ + m->m_data += if_maxlinkhdr; + + ip = mtod(m, struct ip *); + udp = (struct udphdr *)&ip[1]; /* ip attributes */ + data = (char *)&udp[1]; + iphlen = sizeof(struct ip); + + m->m_len += sizeof(struct ip); + m->m_len += sizeof(struct udphdr); + m->m_len += req->nbyte; + + ip->ip_src.s_addr = so->so_laddr.s_addr; + ip->ip_dst.s_addr = RT_H2N_U32(RT_N2H_U32(pData->special_addr.s_addr) | CTL_DNS); + udp->uh_dport = ntohs(53); + udp->uh_sport = so->so_lport; + + memcpy(data, req->byte, req->nbyte); /* coping initial req */ + + /* req points to so->so_timeout_arg */ + req->dns_server = de; + + /* expiration will be bumped in dnsproxy_query */ + + dnsproxy_query(pData, so, m, iphlen); + /* should we free so->so_m ? */ + return; + } + + socket_clean_up: + /* This socket (so) will be detached, so we need to remove timeout(&_arg) references + * before leave + */ + so->so_timeout = NULL; + so->so_timeout_arg = NULL; + return; + +} +#endif /* VBOX */ + +/* do_query -- Called by the event loop when a packet arrives at our + * listening socket. Read the packet, create a new query, append it to the + * queue and send it to the correct server. + * + * Slirp: this routine should be called from udp_input + * socket is Slirp's construction (here we should set expiration time for socket) + * mbuf points on ip header to easy fetch information about source and destination. + * iphlen - len of ip header + */ + +/* ARGSUSED */ +#ifndef VBOX +static void +do_query(int fd, short event, void *arg) +#else +void +dnsproxy_query(PNATState pData, struct socket *so, struct mbuf *m, int iphlen) +#endif +{ +#ifndef VBOX + char buf[MAX_BUFSPACE]; + unsigned int fromlen = sizeof(fromaddr); + struct timeval tv; +#else + struct ip *ip; + char *buf; + int retransmit; + struct udphdr *udp; +#endif + struct sockaddr_in addr; + struct request *req = NULL; +#ifndef VBOX + struct sockaddr_in fromaddr; +#else + struct sockaddr_in fromaddr = { 0, }; +#endif + int byte = 0; + + ++all_queries; + +#ifndef VBOX + /* Reschedule event */ + event_add((struct event *)arg, NULL); + + /* read packet from socket */ + if ((byte = recvfrom(fd, buf, sizeof(buf), 0, + (struct sockaddr *)&fromaddr, &fromlen)) == -1) { + LogRel(("recvfrom failed: %s\n", strerror(errno))); + ++dropped_queries; + return; + } + + /* check for minimum dns packet length */ + if (byte < 12) { + LogRel(("query too short from %s\n", inet_ntoa(fromaddr.sin_addr))); + ++dropped_queries; + return; + } + + /* allocate new request */ + if ((req = calloc(1, sizeof(struct request))) == NULL) { + LogRel(("calloc failed\n")); + ++dropped_queries; + return; + } + + req->id = QUERYID; + memcpy(&req->client, &fromaddr, sizeof(struct sockaddr_in)); + memcpy(&req->clientid, &buf[0], 2); + + /* where is this query coming from? */ + if (is_internal(pData, fromaddr.sin_addr)) { + req->recursion = RD(buf); + DPRINTF(("Internal query RD=%d\n", req->recursion)); + } else { + /* no recursion for foreigners */ + req->recursion = 0; + DPRINTF(("External query RD=%d\n", RD(buf))); + } + + /* insert it into the hash table */ + hash_add_request(pData, req); + + /* overwrite the original query id */ + memcpy(&buf[0], &req->id, 2); + + if (req->recursion) { + + /* recursive queries timeout in 90s */ + event_set(&req->timeout, -1, 0, timeout, req); + tv.tv_sec=recursive_timeout; tv.tv_usec=0; + event_add(&req->timeout, &tv); + + /* send it to our recursive server */ + if ((byte = sendto(sock_answer, buf, (unsigned int)byte, 0, + (struct sockaddr *)&recursive_addr, + sizeof(struct sockaddr_in))) == -1) { + LogRel(("sendto failed: %s\n", strerror(errno))); + ++dropped_queries; + return; + } + + ++recursive_queries; + + } else { + + /* authoritative queries timeout in 10s */ + event_set(&req->timeout, -1, 0, timeout, req); + tv.tv_sec=authoritative_timeout; tv.tv_usec=0; + event_add(&req->timeout, &tv); + + /* send it to our authoritative server */ + if ((byte = sendto(sock_answer, buf, (unsigned int)byte, 0, + (struct sockaddr *)&authoritative_addr, + sizeof(struct sockaddr_in))) == -1) { + LogRel(("sendto failed: %s\n", strerror(errno))); + ++dropped_queries; + return; + } + ++authoritative_queries; + } + +#else /* VBOX */ + AssertPtr(pData); + + /* m->m_data points to IP header */ +#if 0 + /* XXX: for some reason it make gdb ill, + * it good to have this assert here with assumption above. + */ + M_ASSERTPKTHDR(m); +#endif + + ip = mtod(m, struct ip *); + udp = (struct udphdr *)(m->m_data + iphlen); + + fromaddr.sin_addr.s_addr = ip->ip_src.s_addr; + fromaddr.sin_port = udp->uh_sport; + fromaddr.sin_family = AF_INET; + + /* iphlen equals to lenght of ip header */ + Assert(iphlen == sizeof(struct ip)); + iphlen += sizeof (struct udphdr); + + byte = m->m_len - iphlen; + buf = m->m_data + iphlen; + + /* check for minimum dns packet length */ + if (byte < 12) { + LogRel(("NAT: Query too short from %RTnaipv4\n", fromaddr.sin_addr)); + ++dropped_queries; + return; + } + + req = so->so_timeout_arg; + + if (!req) + { + + Assert(!so->so_timeout_arg); + + if ((req = RTMemAllocZ(sizeof(struct request) + byte)) == NULL) + { + LogRel(("NAT: calloc failed\n")); + ++dropped_queries; + return; + } + + req->id = QUERYID; + memcpy(&req->client, &fromaddr, sizeof(struct sockaddr_in)); + memcpy(&req->clientid, &buf[0], 2); + req->dns_server = TAILQ_LAST(&pData->pDnsList, dns_list_head); + req->dnsgen = pData->dnsgen; + if (req->dns_server == NULL) + { + RTMemFree(req); + return; + } + retransmit = 0; + so->so_timeout = timeout; + so->so_timeout_arg = req; + req->nbyte = byte; + memcpy(req->byte, buf, byte); /* copying original request */ + } + else + { + if (req->dnsgen != pData->dnsgen) + { + /* XXX: Log2 */ + LogRel(("NAT: dnsproxy: query: req %p dnsgen %u != %u on %R[natsock]\n", + req, req->dnsgen, pData->dnsgen, so)); + /* + * XXX: TODO: this probably requires more cleanup. + * Cf. XXX comment for sendto() failure below, but that + * error leg is probably untested since ~never taken. + */ + ++dropped_queries; + return; + } + retransmit = 1; + } + + req->recursion = 0; + + DPRINTF(("External query RD=%d\n", RD(buf))); + + if (retransmit == 0) + hash_add_request(pData, req); + + + /* overwrite the original query id */ + memcpy(&buf[0], &req->id, 2); + + /* let's slirp to care about expiration */ + so->so_expire = curtime + recursive_timeout * 1000; + + memset(&addr, 0, sizeof(struct sockaddr_in)); + addr.sin_family = AF_INET; + if (req->dns_server->de_addr.s_addr == (pData->special_addr.s_addr | RT_H2N_U32_C(CTL_ALIAS))) { + /* undo loopback remapping done in get_dns_addr_domain() */ + addr.sin_addr.s_addr = RT_N2H_U32_C(INADDR_LOOPBACK); + } + else { + addr.sin_addr.s_addr = req->dns_server->de_addr.s_addr; + } + addr.sin_port = htons(53); + + /* send it to our authoritative server */ + Log2(("NAT: request will be %ssent to %RTnaipv4 on %R[natsock]\n", + retransmit ? "re" : "", addr.sin_addr, so)); + + byte = sendto(so->s, buf, (unsigned int)byte, 0, + (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)); + if (byte == -1) + { + /* XXX: is it really enough? */ + LogRel(("NAT: sendto failed: %s\n", strerror(errno))); + ++dropped_queries; + return; + } + + so->so_state = SS_ISFCONNECTED; /* now it's selected */ + Log2(("NAT: request was %ssent to %RTnaipv4 on %R[natsock]\n", + retransmit ? "re" : "", addr.sin_addr, so)); + + ++authoritative_queries; + +# if 0 + /* XXX: this stuff for _debugging_ only, + * first enforce guest to send next request + * and second for faster getting timeout callback + * other option is adding couple entries in resolv.conf with + * invalid nameservers. + * + * For testing purposes could be used + * namebench -S -q 10000 -m random or -m chunk + */ + /* RTThreadSleep(3000); */ + /* curtime += 300; */ +# endif +#endif /* VBOX */ +} + +/* do_answer -- Process a packet coming from our authoritative or recursive + * server. Find the corresponding query and send answer back to querying + * host. + * + * Slirp: we call this from the routine from socrecvfrom routine handling UDP responses. + * So at the moment of call response already has been readed and packed into the mbuf + */ + +/* ARGSUSED */ +#ifndef VBOX +static void +do_answer(int fd, short event, void *arg) +#else +void +dnsproxy_answer(PNATState pData, struct socket *so, struct mbuf *m) +#endif +{ +#ifndef VBOX + char buf[MAX_BUFSPACE]; + int byte = 0; + struct request *query = NULL; + + /* Reschedule event */ + event_add((struct event *)arg, NULL); + + /* read packet from socket */ + if ((byte = recvfrom(fd, buf, sizeof(buf), 0, NULL, NULL)) == -1) { + LogRel(("recvfrom failed: %s\n", strerror(errno))); + ++dropped_answers; + return; + } + + /* check for minimum dns packet length */ + if (byte < 12) { + LogRel(("answer too short\n")); + ++dropped_answers; + return; + } + + /* find corresponding query */ + if ((query = hash_find_request(pData, *((unsigned short *)&buf))) == NULL) { + ++late_answers; + return; + } + event_del(&query->timeout); + + hash_remove_request(pData, query); + + /* restore original query id */ + memcpy(&buf[0], &query->clientid, 2); + + if (sendto(sock_query, buf, (unsigned int)byte, 0, + (struct sockaddr *)&query->client, + sizeof(struct sockaddr_in)) == -1) { + LogRel(("sendto failed: %s\n", strerror(errno))); + ++dropped_answers; + } + else + ++answered_queries; + + free(query); +#else /* VBOX */ + + char *buf = NULL; + int byte = 0; + struct request *query = NULL; + + AssertPtr(pData); + + /* XXX: mbuf->data points to ??? */ + byte = m->m_len; + buf = mtod(m, char *); + + /* check for minimum dns packet length */ + if (byte < 12) { + LogRel(("NAT: Answer too short\n")); + ++dropped_answers; + return; + } + + /* find corresponding query (XXX: but see below) */ + query = hash_find_request(pData, *((unsigned short *)buf)); + + if (query == NULL) + { + /* XXX: if we haven't found anything for this request ... + * What we are expecting later? + */ + ++late_answers; + so->so_expire = curtime + SO_EXPIREFAST; + Log2(("NAT: query wasn't found\n")); + return; + } + + /* + * XXX: The whole hash thing is pretty meaningless right now since + * we use a separate socket for each request, so we already know + * the answer. + * + * If the answer is not what we expect it to be, then it's + * probably a stray or malicious reply and we'd better not free a + * query owned by some other socket - that would cause + * use-after-free later on. + */ + if (query != so->so_timeout_arg) + return; + + so->so_timeout = NULL; + so->so_timeout_arg = NULL; + + hash_remove_request(pData, query); + + /* restore original query id */ + memcpy(&buf[0], &query->clientid, 2); + + ++answered_queries; + + RTMemFree(query); +#endif /* VBOX */ +} + + +#ifdef VBOX +int +dnsproxy_init(PNATState pData) +{ + /* globals initialization */ + authoritative_port = 53; + authoritative_timeout = 10; + recursive_port = 53; + recursive_timeout = 2; + stats_timeout = 3600; + dns_port = 53; + return 0; +} +#else /* !VBOX */ +/* main -- dnsproxy main function + */ +int +main(int argc, char *argv[]) +{ + int ch; + struct passwd *pw = NULL; + struct sockaddr_in addr; + struct event evq, eva; + const char *config = "/etc/dnsproxy.conf"; + int daemonize = 0; + + /* Process commandline arguments */ + while ((ch = getopt(argc, argv, "c:dhV")) != -1) { + switch (ch) { + case 'c': + config = optarg; + break; + case 'd': + daemonize = 1; + break; + case 'V': + fprintf(stderr, PACKAGE_STRING "\n"); + exit(0); + RT_FALL_THRU(); + case 'h': + default: + fprintf(stderr, + "usage: dnsproxy [-c file] [-dhV]\n" \ + "\t-c file Read configuration from file\n" \ + "\t-d Detach and run as a daemon\n" \ + "\t-h This help text\n" \ + "\t-V Show version information\n"); + exit(1); + } + } + + /* Parse configuration and check required parameters */ + if (!parse(config)) + fatal("unable to parse configuration"); + + if (!authoritative || !recursive) + fatal("No authoritative or recursive server defined"); + + if (!listenat) + listenat = strdup("0.0.0.0"); + + /* Create and bind query socket */ + if ((sock_query = socket(AF_INET, SOCK_DGRAM, 0)) == -1) + fatal("unable to create socket: %s", strerror(errno)); + + memset(&addr, 0, sizeof(struct sockaddr_in)); + addr.sin_addr.s_addr = inet_addr(listenat); + addr.sin_port = htons(port); + addr.sin_family = AF_INET; + + if (bind(sock_query, (struct sockaddr *)&addr, sizeof(addr)) != 0) + fatal("unable to bind socket: %s", strerror(errno)); + + /* Create and bind answer socket */ + if ((sock_answer = socket(AF_INET, SOCK_DGRAM, 0)) == -1) + fatal("unable to create socket: %s", strerror(errno)); + + memset(&addr, 0, sizeof(struct sockaddr_in)); + addr.sin_family = AF_INET; + + if (bind(sock_answer, (struct sockaddr *)&addr, sizeof(addr)) != 0) + fatal("unable to bind socket: %s", strerror(errno)); + + /* Fill sockaddr_in structs for both servers */ + memset(&authoritative_addr, 0, sizeof(struct sockaddr_in)); + authoritative_addr.sin_addr.s_addr = inet_addr(authoritative); + authoritative_addr.sin_port = htons(authoritative_port); + authoritative_addr.sin_family = AF_INET; + + memset(&recursive_addr, 0, sizeof(struct sockaddr_in)); + recursive_addr.sin_addr.s_addr = inet_addr(recursive); + recursive_addr.sin_port = htons(recursive_port); + recursive_addr.sin_family = AF_INET; + + /* Daemonize if requested and switch to syslog */ + if (daemonize) { + if (daemon(0, 0) == -1) + fatal("unable to daemonize"); + log_syslog("dnsproxy"); + } + + /* Find less privileged user */ + if (user) { + pw = getpwnam(user); + if (!pw) + fatal("unable to find user %s", user); + } + + /* Do a chroot if requested */ + if (chrootdir) { + if (chdir(chrootdir) || chroot(chrootdir)) + fatal("unable to chroot to %s", chrootdir); + chdir("/"); + } + + /* Drop privileges */ + if (user) { + if (setgroups(1, &pw->pw_gid) < 0) + fatal("setgroups: %s", strerror(errno)); +#if defined(HAVE_SETRESGID) + if (setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) < 0) + fatal("setresgid: %s", strerror(errno)); +#elif defined(HAVE_SETREGID) + if (setregid(pw->pw_gid, pw->pw_gid) < 0) + fatal("setregid: %s", strerror(errno)); +#else + if (setegid(pw->pw_gid) < 0) + fatal("setegid: %s", strerror(errno)); + if (setgid(pw->pw_gid) < 0) + fatal("setgid: %s", strerror(errno)); +#endif +#if defined(HAVE_SETRESUID) + if (setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid) < 0) + fatal("setresuid: %s", strerror(errno)); +#elif defined(HAVE_SETREUID) + if (setreuid(pw->pw_uid, pw->pw_uid) < 0) + fatal("setreuid: %s", strerror(errno)); +#else + if (seteuid(pw->pw_uid) < 0) + fatal("seteuid: %s", strerror(errno)); + if (setuid(pw->pw_uid) < 0) + fatal("setuid: %s", strerror(errno)); +#endif + } + + /* Init event handling */ + event_init(); + + event_set(&evq, sock_query, EV_READ, do_query, &evq); + event_add(&evq, NULL); + + event_set(&eva, sock_answer, EV_READ, do_answer, &eva); + event_add(&eva, NULL); + + /* Zero counters and start statistics timer */ + statistics_start(); + + /* Take care of signals */ + if (signal(SIGINT, signal_handler) == SIG_ERR) + fatal("unable to mask signal SIGINT: %s", strerror(errno)); + + if (signal(SIGTERM, signal_handler) == SIG_ERR) + fatal("unable to mask signal SIGTERM: %s", strerror(errno)); + + if (signal(SIGHUP, SIG_IGN) == SIG_ERR) + fatal("unable to mask signal SIGHUP: %s", strerror(errno)); + + event_sigcb = signal_event; + + /* Start libevent main loop */ + event_dispatch(); + + return 0; + +} +#endif diff --git a/src/VBox/Devices/Network/slirp/dnsproxy/dnsproxy.h b/src/VBox/Devices/Network/slirp/dnsproxy/dnsproxy.h new file mode 100644 index 00000000..584f25dd --- /dev/null +++ b/src/VBox/Devices/Network/slirp/dnsproxy/dnsproxy.h @@ -0,0 +1,157 @@ +/* $Id: dnsproxy.h $ */ +/* + * Copyright (c) 2003,2004,2005 Armin Wolfermann + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DNSPROXY_H_ +#define _DNSPROXY_H_ + +/* LONGLONG */ +#include <sys/types.h> + +#if TIME_WITH_SYS_TIME +# include <sys/time.h> +# include <time.h> +#else +# if HAVE_SYS_TIME_H +# include <sys/time.h> +# else +# include <time.h> +# endif +#endif + +#ifndef VBOX +#include <sys/socket.h> +#include <netinet/in.h> +#ifdef HAVE_ARPA_INET_H +# include <arpa/inet.h> +#endif +#include <stdarg.h> + +#include <event.h> + +#ifdef DEBUG +#define DPRINTF(x) do { printf x ; } while (0) +#else +#define DPRINTF(x) +#endif + +#ifdef GLOBALS +#define GLOBAL(a) a +#define GLOBAL_INIT(a,b) a = b +#else +#define GLOBAL(a) extern a +#define GLOBAL_INIT(a,b) extern a +#endif +#endif + +struct request { + unsigned short id; + + struct sockaddr_in client; + unsigned short clientid; + unsigned char recursion; + +#ifndef VBOX + struct event timeout; +#endif + + struct request **prev; + struct request *next; +#ifdef VBOX + /* this field used for saving last attempt + * to connect server, timeout function should change + * it's value on next server. And dnsproxy_query should + * initializate with first server in the list + * + * dnsgen is a generation number - a copy of pData->dnsgen at the + * time of request creation (poor man's weak reference). + * dns_server must not be used if pData->dnsgen changed. + */ + struct dns_entry *dns_server; + uint32_t dnsgen; + int nbyte; /* length of dns request */ + char byte[1]; /* copy of original request */ +#endif +}; + +#ifndef VBOX +GLOBAL_INIT(unsigned int authoritative_port, 53); +GLOBAL_INIT(unsigned int authoritative_timeout, 10); +GLOBAL_INIT(unsigned int recursive_port, 53); +GLOBAL_INIT(unsigned int recursive_timeout, 90); +GLOBAL_INIT(unsigned int stats_timeout, 3600); +GLOBAL_INIT(unsigned int port, 53); + +GLOBAL(char *authoritative); +GLOBAL(char *chrootdir); +GLOBAL(char *listenat); +GLOBAL(char *recursive); +GLOBAL(char *user); + +GLOBAL(unsigned long active_queries); +GLOBAL(unsigned long all_queries); +GLOBAL(unsigned long authoritative_queries); +GLOBAL(unsigned long recursive_queries); +GLOBAL(unsigned long removed_queries); +GLOBAL(unsigned long dropped_queries); +GLOBAL(unsigned long answered_queries); +GLOBAL(unsigned long dropped_answers); +GLOBAL(unsigned long late_answers); +GLOBAL(unsigned long hash_collisions); + +/* dnsproxy.c */ +RETSIGTYPE signal_handler(int); +int signal_event(void); + +/* daemon.c */ +int daemon(int, int); +#endif + +/* hash.c */ +void hash_add_request(PNATState, struct request *); +void hash_remove_request(PNATState, struct request *); +struct request *hash_find_request(PNATState, unsigned short); + +/* internal.c */ +int add_internal(PNATState, char *); +int is_internal(PNATState, struct in_addr); + +#ifndef VBOX +/* log.c */ +void log_syslog(const char *); +void info(const char *, ...); +void error(const char *, ...); +void fatal(const char *, ...); + +/* parse.c */ +int parse(const char *); + +/* statistics.c */ +void statistics_start(void); +#else +# define DPRINTF Log2 +int dnsproxy_init(PNATState pData); +void dnsproxy_query(PNATState pData, struct socket *so, struct mbuf *m, int iphlen); +void dnsproxy_answer(PNATState pData, struct socket *so, struct mbuf *m); +#endif + +#endif /* _DNSPROXY_H_ */ diff --git a/src/VBox/Devices/Network/slirp/dnsproxy/hash.c b/src/VBox/Devices/Network/slirp/dnsproxy/hash.c new file mode 100644 index 00000000..0de6cbea --- /dev/null +++ b/src/VBox/Devices/Network/slirp/dnsproxy/hash.c @@ -0,0 +1,77 @@ +/* $Id: hash.c $ */ +/* + * Copyright (c) 2003,2004 Armin Wolfermann + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef VBOX +#include <config.h> +#include "dnsproxy.h" + +#define HASHSIZE 10 +#define HASH(id) (id & ((1 << HASHSIZE) - 1)) + +static struct request *request_hash[1 << HASHSIZE]; +#else /* VBOX */ +# include "slirp.h" +#endif + +void +hash_add_request(PNATState pData, struct request *req) +{ + struct request **p = &request_hash[HASH(req->id)]; + Log2(("NAT: hash req id %d has been added \n", req->id)); + + if ((req->next = *p) != NULL) { + (*p)->prev = &req->next; + ++hash_collisions; + } + *p = req; + req->prev = p; + + ++active_queries; +} + +void +hash_remove_request(PNATState pData, struct request *req) +{ + if (!req->prev) return; + if (req->next) + req->next->prev = req->prev; + *req->prev = req->next; + req->prev = NULL; + + --active_queries; +} + +struct request * +hash_find_request(PNATState pData, unsigned short id) +{ + struct request *req = request_hash[HASH(id)]; + Log2(("NAT: hash try to find req by id %d \n", id)); + + for (;;) { + if (!req) break; + if (req->id == id) break; + req = req->next; + } + + return req; +} diff --git a/src/VBox/Devices/Network/slirp/ext.h b/src/VBox/Devices/Network/slirp/ext.h new file mode 100644 index 00000000..e034496d --- /dev/null +++ b/src/VBox/Devices/Network/slirp/ext.h @@ -0,0 +1,105 @@ +/** $Id: ext.h $ */ +/** @file + * NAT - some externals helpers + */ + +/* + * Copyright (C) 2007-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef IN_BSD +# define zone_mbuf slirp_zone_mbuf(pData) +# define zone_clust slirp_zone_clust(pData) +# define zone_pack slirp_zone_pack(pData) +# define zone_jumbop slirp_zone_jumbop(pData) +# define zone_jumbo9 slirp_zone_jumbo9(pData) +# define zone_jumbo16 slirp_zone_jumbo16(pData) +# define zone_ext_refcnt slirp_zone_ext_refcnt(pData) +static inline uma_zone_t slirp_zone_mbuf(PNATState); +static inline uma_zone_t slirp_zone_clust(PNATState); +static inline uma_zone_t slirp_zone_pack(PNATState); +static inline uma_zone_t slirp_zone_jumbop(PNATState); +static inline uma_zone_t slirp_zone_jumbo9(PNATState); +static inline uma_zone_t slirp_zone_jumbo16(PNATState); +static inline uma_zone_t slirp_zone_ext_refcnt(PNATState); +#else +# undef zone_mbuf +# undef zone_clust +# undef zone_pack +# undef zone_jumbop +# undef zone_jumbo9 +# undef zone_jumbo16 +# undef zone_ext_refcnt + +# define zone_mbuf pData->zone_mbuf +# define zone_clust pData->zone_clust +# define zone_pack pData->zone_pack +# define zone_jumbop pData->zone_jumbop +# define zone_jumbo9 pData->zone_jumbo9 +# define zone_jumbo16 pData->zone_jumbo16 +# define zone_ext_refcnt pData->zone_ext_refcnt +#endif + +#ifndef _EXT_H_ +#define _EXT_H_ + +# define fprintf vbox_slirp_fprintf +# define printf vbox_slirp_printf + +# ifndef vbox_slirp_printfV +DECLINLINE(void) vbox_slirp_printV(char *format, va_list args) +{ + char buffer[1024]; + memset(buffer, 0, 1024); + RTStrPrintfV(buffer, 1024, format, args); + + LogRel(("NAT:EXT: %s\n", buffer)); +} +# endif + +# ifndef vbox_slirp_printf +DECLINLINE(void) vbox_slirp_printf(char *format, ...) +{ + va_list args; + va_start(args, format); + vbox_slirp_printV(format, args); + va_end(args); +} +# endif + +# ifndef vbox_slirp_fprintf +DECLINLINE(void) vbox_slirp_fprintf(void *ignored, char *format, ...) +{ +# ifdef LOG_ENABLED + va_list args; + NOREF(ignored); + va_start(args, format); + vbox_slirp_printV(format, args); + va_end(args); +# else + NOREF(format); + NOREF(ignored); +# endif +} +# endif + +#endif + diff --git a/src/VBox/Devices/Network/slirp/hostres.c b/src/VBox/Devices/Network/slirp/hostres.c new file mode 100644 index 00000000..1ad55439 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/hostres.c @@ -0,0 +1,1529 @@ +/* $Id: hostres.c $ */ +/** @file + * Host resolver + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef RT_OS_WINDOWS +# include <netdb.h> +#endif +#include <iprt/assert.h> +#include <iprt/ctype.h> +#include <iprt/errcore.h> +#include <slirp.h> + +#define isdigit(ch) RT_C_IS_DIGIT(ch) +#define isalpha(ch) RT_C_IS_ALPHA(ch) + +#define DNS_CONTROL_PORT_NUMBER 53 +/* see RFC 1035(4.1.1) */ +struct dnsmsg_header +{ + uint16_t id; + +#ifdef RT_OS_WINDOWS + /* size of the type forces alignment */ +# define U16_BIT_FIELD_T uint16_t +#else + /* gcc -pedantic complains about implementaion-defined types */ +# define U16_BIT_FIELD_T unsigned int +#endif + + /* XXX: endianness */ + U16_BIT_FIELD_T rd:1; + U16_BIT_FIELD_T tc:1; + U16_BIT_FIELD_T aa:1; + U16_BIT_FIELD_T opcode:4; + U16_BIT_FIELD_T qr:1; + U16_BIT_FIELD_T rcode:4; + U16_BIT_FIELD_T Z:3; + U16_BIT_FIELD_T ra:1; + + uint16_t qdcount; + uint16_t ancount; + uint16_t nscount; + uint16_t arcount; +}; +AssertCompileSize(struct dnsmsg_header, 12); + +#define QR_Query 0 +#define QR_Response 1 + +#define OpCode_Query 0 + +#define RCode_NoError 0 +#define RCode_FormErr 1 +#define RCode_ServFail 2 +#define RCode_NXDomain 3 +#define RCode_NotImp 4 +#define RCode_Refused 5 + +#define Type_A 1 +#define Type_CNAME 5 +#define Type_PTR 12 +#define Type_ANY 255 + +#define Class_IN 1 +#define Class_ANY 255 + +/* compressed label encoding */ +#define DNS_LABEL_PTR 0xc0 + +#define DNS_MAX_UDP_LEN 512 +#define DNS_MAX_LABEL_LEN 63 +#define DNS_MAX_NAME_LEN 255 + + +/* + * A tree of labels. + * + * rfc1035#section-3.1 + * rfc1035#section-4.1.4 + */ +struct label +{ + const uint8_t *buf; + ssize_t off; + struct label *children; + struct label *sibling; +}; + + +/* + * A structure to build DNS response. + */ +struct response +{ + PNATState pData; + + uint32_t src; + uint16_t sport; + + struct label *labels; /* already encoded in buf */ + size_t qlen; /* original question */ + size_t end; /* of data in buf */ + + /* continuous buffer to build the response */ + uint8_t buf[DNS_MAX_UDP_LEN]; +}; + + +static int verify_header(PNATState pData, struct mbuf **pMBuf); +static struct mbuf *refuse_mbuf(struct mbuf *m, unsigned int rcode); + +static int respond(struct response *res); +static int resolve(struct response *res, uint16_t qtype, size_t qname); +static int resolve_reverse(struct response *res, uint16_t qtype, size_t qname, + struct in_addr addr); + +static int refuse(struct response *res, unsigned int rcode); + + +static ssize_t append_a(struct response *res, const char *name, struct in_addr addr); +static ssize_t append_cname(struct response *res, const char *name, const char *cname); +static ssize_t append_ptr(struct response *res, const char *inaddrname, const char *name); +static ssize_t append_name_rr(struct response *res, const char *question, int type, const char *answer); +static ssize_t append_rrhdr(struct response *res, const char *name, uint16_t type, uint32_t ttl); +static ssize_t append_name(struct response *res, const char *name); +static ssize_t append_u32(struct response *res, uint32_t value); +static ssize_t append_u16(struct response *res, uint16_t value); +static ssize_t append_u8(struct response *res, uint8_t value); +static ssize_t append_bytes(struct response *res, uint8_t *p, size_t size); +static ssize_t check_space(struct response *res, size_t size); + +static int get_in_addr_arpa(struct in_addr *paddr, struct label *root); +static int labelstrcmp(struct label *l, const char *s); +static void strnlabels(char *namebuf, size_t nbuflen, const uint8_t *msg, size_t off); + +/*static void LogLabelsTree(const char *before, struct label *l, const char *after); - unused */ +static void free_labels(struct label *root); + +#ifdef VBOX_WITH_DNSMAPPING_IN_HOSTRESOLVER +static void alterHostentWithDataFromDNSMap(PNATState pData, struct hostent *h); +static PDNSMAPPINGENTRY getDNSMapByName(PNATState pData, const char *name); +static PDNSMAPPINGENTRY getDNSMapByAddr(PNATState pData, const uint32_t *pu32IpAddress); +#endif + +#if 1 /* XXX */ +# define LogErr(args) Log2(args) +# define LogDbg(args) Log3(args) +#else +# define LogErr(args) LogRel(args) +# define LogDbg(args) LogRel(args) +#endif + + +static void hostres_async(struct response *res); +static void hostres_slirp_reply(struct response *res); + + +/* + * Host resolver is called on slirp thread from udp.c + */ +struct mbuf * +hostresolver(PNATState pData, struct mbuf *m, uint32_t src, uint16_t sport) +{ + struct response *res; + u_int mlen; + int rc; + + rc = verify_header(pData, &m); + if (RT_FAILURE(rc)) + return m; + + res = RTMemAllocZ(sizeof(*res)); + if (res == NULL) + return refuse_mbuf(m, RCode_ServFail); + + res->pData = pData; + res->src = src; + res->sport = sport; + + mlen = m_length(m, NULL); + m_copydata(m, 0, mlen, (char *)res->buf); + res->end = res->qlen = mlen; + + rc = slirp_call_hostres(pData->pvUser, NULL, 0, + RTREQFLAGS_VOID | RTREQFLAGS_NO_WAIT, + (PFNRT)hostres_async, 1, res); + + if (RT_FAILURE(rc)) + { + LogErr(("NAT: hostres: failed to post async request: %Rrc\n", rc)); + RTMemFree(res); + return refuse_mbuf(m, RCode_ServFail); + } + + m_freem(pData, m); + return NULL; +} + + +/* + * Do quick sanity-checks on the request before doing async + * resolution. If we don't like it, immediately drop or convert to + * response in place and bounce back the mbuf. + */ +static int +verify_header(PNATState pData, struct mbuf **pMBuf) +{ + struct mbuf *m; + struct dnsmsg_header *pHdr; + size_t mlen; + + m = *pMBuf; + mlen = m_length(m, NULL); + + /* + * In theory we should have called + * + * m = m_pullup(m, sizeof(struct dnsmsg_header)); + * + * here first (which should have been a nop), but the way mbufs + * are used in NAT will always cause a copy that will have no + * leading space. We can use m_copyup() instead, but if we are + * peeking under the hood anyway, we might as well just rely on + * the fact that this header will be contiguous. + */ + pHdr = mtod(m, struct dnsmsg_header *); + + if (RT_UNLIKELY(mlen < sizeof(*pHdr))) + { + LogErr(("NAT: hostres: packet too small: %zu bytes\n", mlen)); + goto drop; /* can't even refuse it */ + } + + if (RT_UNLIKELY(mlen > DNS_MAX_UDP_LEN)) + { + LogErr(("NAT: hostres: packet too large: %zu bytes\n", mlen)); + goto drop; /* don't echo back huge packets */ + } + + if (RT_UNLIKELY(pHdr->qr != QR_Query)) + { + LogErr(("NAT: hostres: unexpected response\n")); + goto drop; /* ignore */ + } + + if (RT_UNLIKELY(pHdr->opcode != OpCode_Query)) + { + LogErr(("NAT: hostres: unsupported opcode %d\n", pHdr->opcode)); + refuse_mbuf(m, RCode_NotImp); + return VERR_PARSE_ERROR; + } + + if (RT_UNLIKELY(pHdr->qdcount != RT_H2N_U16_C(1))) + { + LogErr(("NAT: hostres: multiple questions\n")); + refuse_mbuf(m, RCode_FormErr); + return VERR_PARSE_ERROR; + } + + if (RT_UNLIKELY(pHdr->ancount != 0)) + { + LogErr(("NAT: hostres: answers in query\n")); + refuse_mbuf(m, RCode_FormErr); + return VERR_PARSE_ERROR; + } + + /* XXX: let it fail when we parse it? */ + if (RT_UNLIKELY(mlen < sizeof(*pHdr) + + /* qname */ 1 + + /* qtype */ 2 + + /* qclass */ 2)) + { + LogErr(("NAT: hostres: packet too small: %zu bytes\n", mlen)); + refuse_mbuf(m, RCode_FormErr); + return VERR_PARSE_ERROR; + } + + return VINF_SUCCESS; + + drop: + if (m != NULL) + m_freem(pData, m); + *pMBuf = NULL; + return VERR_PARSE_ERROR; +} + + +/* + * Turn the request in mbuf into an error response. This is used on + * slirp thread for pre-checks before we do async resolution. + */ +static struct mbuf * +refuse_mbuf(struct mbuf *m, unsigned int rcode) +{ + struct dnsmsg_header *pHdr; + + pHdr = mtod(m, struct dnsmsg_header *); + pHdr->qr = QR_Response; + pHdr->rcode = rcode; + pHdr->ra = 1; + pHdr->aa = 0; + + return m; +} + + +/* + * Actuall resolution runs on the dedicated host resolver thread. + */ +static void +hostres_async(struct response *res) +{ + int rc; + + /* build reply in res->buf[] */ + respond(res); + + free_labels(res->labels); + + rc = slirp_call(res->pData->pvUser, NULL, 0, + RTREQFLAGS_VOID | RTREQFLAGS_NO_WAIT, + (PFNRT)hostres_slirp_reply, 1, res); + + if (RT_FAILURE(rc)) + { + LogErr(("NAT: hostres: failed to post async reply: %Rrc\n", rc)); + RTMemFree(res); + } +} + + +/* + * We are back to the slirp thread to send the reply. + */ +static void +hostres_slirp_reply(struct response *res) +{ + PNATState pData = res->pData; + struct sockaddr_in src, dst; + struct mbuf *m = NULL; + size_t mlen; + int ok; + + mlen = if_maxlinkhdr + sizeof(struct ip) + sizeof(struct udphdr); + mlen += res->end; + + if (mlen <= MHLEN) + { + m = m_gethdr(pData, M_NOWAIT, MT_HEADER); + } + else + { + void *pvBuf; /* ignored */ + size_t cbBuf; + + m = slirp_ext_m_get(pData, mlen, &pvBuf, &cbBuf); + } + + if (m == NULL) + goto out; + + /* reserve leading space for ethernet header */ + m->m_data += if_maxlinkhdr; + + /* reserve leading space for protocol headers */ + m->m_pkthdr.header = mtod(m, void *); + m->m_data += sizeof(struct ip) + sizeof(struct udphdr); + + m->m_len = 0; + ok = m_append(pData, m, (int)res->end, (c_caddr_t)res->buf); + if (!ok) + { + m_freem(pData, m); + goto out; + } + + src.sin_addr.s_addr = RT_H2N_U32(RT_N2H_U32(pData->special_addr.s_addr) | CTL_DNS); + src.sin_port = RT_H2N_U16_C(53); + dst.sin_addr.s_addr = res->src; + dst.sin_port = res->sport; + + udp_output2(pData, NULL, m, &src, &dst, IPTOS_LOWDELAY); + + out: + RTMemFree(res); +} + + +static int +respond(struct response *res) +{ + struct dnsmsg_header *pHdr; + size_t off; + size_t qname; + uint16_t qtype, qclass; + struct in_addr in_addr_arpa; + struct label *l; + + /* convert header to response */ + pHdr = (struct dnsmsg_header *)res->buf; + pHdr->qr = QR_Response; + pHdr->rcode = RCode_NoError; + pHdr->ra = 1; /* the host provides recursion */ + pHdr->aa = 0; /* we are not authoritative */ + pHdr->Z = 0; /* clear rfc2535 dnssec bits */ + + off = sizeof(*pHdr); + qname = off; + + /* + * Parse/verify QNAME and collect the suffixes to be used for + * compression in the answer. + */ + while (off < res->qlen) { + size_t loff, llen; + uint8_t c; + + c = res->buf[off]; + + /* + * There's just one question with just one name, so there are + * no other labels it can point to. Thus all well-formed + * names with a pointer can only be infinite loops. + */ + if ((c & DNS_LABEL_PTR) == DNS_LABEL_PTR) + { + LogErr(("NAT: hostres: label pointer in the qname\n")); + return refuse(res, RCode_FormErr); + } + + if ((c & DNS_LABEL_PTR) != 0) + { + LogErr(("NAT: hostres: unexpected high bits\n")); + return refuse(res, RCode_FormErr); + } + + /* + * label of "llen" chars starts at offset "loff". + */ + loff = off; + llen = c; + ++off; + + if (loff + 1 + llen > res->qlen) + { + LogErr(("NAT: hostres: length byte points beyound packet boundary\n")); + return refuse(res, RCode_FormErr); + } + + if (llen == 0) /* end of the label list */ + { + break; + } + + /* do only minimal verification of the label */ + while (off < loff + 1 + llen) + { + c = res->buf[off]; + ++off; + + if (c == '.') + { + LogErr(("NAT: hostres: dot inside label\n")); + return refuse(res, RCode_FormErr); + } + + if (c == '\0') + { + LogErr(("NAT: hostres: nul byte inside label\n")); + return refuse(res, RCode_FormErr); + } + } + + l = RTMemAllocZ(sizeof(*l)); + l->buf = res->buf; + l->off = loff; + l->children = res->labels; + res->labels = l; + } + + /* + * QTYPE and QCLASS + */ + if (RT_UNLIKELY(off + 4 > res->qlen)) + { + LogErr(("NAT: hostres: question too short\n")); + return refuse(res, RCode_FormErr); + } + + memcpy(&qtype, &res->buf[off], sizeof(qtype)); + qtype = RT_N2H_U16(qtype); + off += sizeof(qtype); + + memcpy(&qclass, &res->buf[off], sizeof(qclass)); + qclass = RT_N2H_U16(qclass); + off += sizeof(qclass); + + if ( qclass != Class_IN + && qclass != Class_ANY) + { + LogErr(("NAT: hostres: unsupported qclass %d\n", qclass)); + return refuse(res, RCode_NoError); + } + + if ( qtype != Type_A + && qtype != Type_CNAME + && qtype != Type_PTR + && qtype != Type_ANY) + { + LogErr(("NAT: hostres: unsupported qtype %d\n", qtype)); + return refuse(res, RCode_NoError); + } + + + /** + * Check if there's anything after the question. If query says it + * has authority or additional records, ignore and drop them + * without parsing. + * + * We have already rejected queries with answer(s) before. We + * have ensured that qname in the question doesn't contain + * pointers, so truncating the buffer is safe. + */ + if (off < res->qlen) + { + ssize_t trailer = res->qlen - off; + + LogDbg(("NAT: hostres: question %zu < mlen %zu\n", off, res->qlen)); + + if (pHdr->nscount == 0 && pHdr->arcount == 0) + { + LogErr(("NAT: hostres: unexpected %d bytes after the question\n", trailer)); + return refuse(res, RCode_FormErr); + } + + LogDbg(("NAT: hostres: ignoring %d bytes of %s%s%s records\n", + trailer, + pHdr->nscount != 0 ? "authority" : "", + pHdr->nscount != 0 && pHdr->arcount != 0 ? " and " : "", + pHdr->arcount != 0 ? "additional" : "")); + + res->qlen -= trailer; + res->end = res->qlen; + + pHdr->nscount = 0; + pHdr->arcount = 0; + } + + + /* + * Check for IN-ADDR.ARPA. Use the fact that res->labels at this + * point contains only the qname, so we have easy top-down access + * to its components. + */ + if (get_in_addr_arpa(&in_addr_arpa, res->labels)) + return resolve_reverse(res, qtype, qname, in_addr_arpa); + else + return resolve(res, qtype, qname); +} + + +static int +resolve(struct response *res, uint16_t qtype, size_t qname) +{ + struct dnsmsg_header *pHdr; + struct hostent *h; + struct hostent hostent; + char *h_aliases[1]; + char *h_addr_list[2]; + size_t oend; + size_t nanswers; + ssize_t nbytes; + int i; + + char name[DNS_MAX_NAME_LEN+1]; + + pHdr = (struct dnsmsg_header *)res->buf; + nanswers = 0; + oend = res->end; + + strnlabels(name, sizeof(name), res->buf, qname); + LogDbg(("NAT: hostres: qname=\"%s\"\n", name)); + + if (qtype != Type_A && qtype != Type_CNAME && qtype != Type_ANY) + { + goto out; /* NB: RCode_NoError without an answer, not RCode_NXDomain */ + } + + h = NULL; +#ifdef VBOX_WITH_DNSMAPPING_IN_HOSTRESOLVER + { + PDNSMAPPINGENTRY pDNSMapingEntry = getDNSMapByName(res->pData, name); + if (pDNSMapingEntry != NULL) + { + LogDbg(("NAT: hostres: %s resolved from %s%s\n", + name, + pDNSMapingEntry->fPattern ? "pattern " : "mapping", + pDNSMapingEntry->fPattern ? pDNSMapingEntry->pszName : "")); + + if (qtype == Type_CNAME) + { + goto out; + } + + hostent.h_name = name; + hostent.h_aliases = h_aliases; + h_aliases[0] = NULL; + hostent.h_addrtype = AF_INET; + hostent.h_length = sizeof(RTNETADDRIPV4); + hostent.h_addr_list = h_addr_list; + h_addr_list[0] = (char *)&pDNSMapingEntry->u32IpAddress; + h_addr_list[1] = NULL; + + h = &hostent; + } + } +#endif + + if (h == NULL) + { + h = gethostbyname(name); + } + + if (h == NULL) + { + /* LogErr: h_errno */ + return refuse(res, RCode_NXDomain); + } + + if (h->h_length != sizeof(RTNETADDRIPV4)) + { + /* Log: what kind of address did we get?! */ + goto out; + } + + if ( h->h_addr_list == NULL + || h->h_addr_list[0] == NULL) + { + /* Log: shouldn't happen */ + goto out; + } + +#ifdef VBOX_WITH_DNSMAPPING_IN_HOSTRESOLVER + alterHostentWithDataFromDNSMap(res->pData, h); +#endif + + /* + * Emit CNAME record if canonical name differs from the qname. + */ + if ( h->h_name != NULL + && RTStrICmp(h->h_name, name) != 0) + { + LogDbg(("NAT: hostres: %s CNAME %s\n", name, h->h_name)); + nbytes = append_cname(res, name, h->h_name); + if (nbytes > 0) + { + ++nanswers; + } + else + { + LogErr(("NAT: hostres: failed to add %s CNAME %s\n", + name, h->h_name)); + if (nbytes < 0) + return refuse(res, RCode_ServFail); + else + { + pHdr->tc = 1; + goto out; + } + } + + /* + * rfc1034#section-3.6.2 - ... a type CNAME or * query should + * return just the CNAME. + */ + if (qtype == Type_CNAME || qtype == Type_ANY) + goto out; + } + else if (qtype == Type_CNAME) + { + LogDbg(("NAT: hostres: %s is already canonical\n", name)); + goto out; /* NB: RCode_NoError without an answer, not RCode_NXDomain */ + } + + /* + * Emit A records. + */ + for (i = 0; h->h_addr_list[i] != NULL; ++i) + { + const char *cname = h->h_name ? h->h_name : name; + struct in_addr addr; + + addr.s_addr = *(uint32_t *)h->h_addr_list[i]; + nbytes = append_a(res, cname, addr); + + if (nbytes > 0) + { + ++nanswers; + } + else + { + LogErr(("NAT: hostres: failed to add %s A %RTnaipv4\n", + cname, addr.s_addr)); + if (nbytes < 0) + return refuse(res, RCode_ServFail); + else + { + pHdr->tc = 1; + goto out; + } + } + } + +#if 0 + /* + * It's not clear what to do with h_aliases. + * + * For names from the DNS it seems to contain the chain of CNAMEs, + * starting with the original qname from the question. So for + * them we'd need to reply with a chain of: + * + * h_aliases[i] CNAME h_aliases[i+1] + * + * OTOH, for the names from the hosts file it seems to contain all + * the names except the first one (which is considered primary and + * is reported as h_name). In which case the reply should be: + * + * h_aliases[i] CNAME h_name + * + * Obviously, we have no idea how the name was resolved, so we + * generate at most one CNAME for h_host (if differs) and ignore + * aliases altogehter. + */ + for (i = 0; h->h_aliases[i] != NULL; ++i) + { + LogDbg(("NAT: hostres: ... %s\n", h->h_aliases[i])); + } +#endif + + out: + pHdr->ancount = RT_H2N_U16((uint16_t)nanswers); + return VINF_SUCCESS; +} + + +static int +resolve_reverse(struct response *res, uint16_t qtype, size_t qname, + struct in_addr in_addr_arpa) +{ + struct dnsmsg_header *pHdr; + struct hostent *h; + struct hostent hostent; + char *h_aliases[1]; + char *h_addr_list[2]; + size_t oend; + size_t nanswers; + ssize_t nbytes; + + pHdr = (struct dnsmsg_header *)res->buf; + nanswers = 0; + oend = res->end; + + LogDbg(("NAT: hostres: %RTnaipv4\n", in_addr_arpa.s_addr)); + + if (qtype != Type_PTR && qtype != Type_ANY) + { + /* can't answer CNAME to PTR queries using gethostby* */ + goto out; /* NB: RCode_NoError without an answer, not RCode_NXDomain */ + } + + h = NULL; +#ifdef VBOX_WITH_DNSMAPPING_IN_HOSTRESOLVER + /* + * If the address in the question is unknown to the real resolver + * but has a mapping, and if we do the real lookup first, then the + * guest will time out before our lookup times out and even though + * we reply with the answer from the map, the answer will be lost. + */ + { + PDNSMAPPINGENTRY pReverseMapping = getDNSMapByAddr(res->pData, (const uint32_t *)&in_addr_arpa.s_addr); + if (pReverseMapping != NULL) + { + LogDbg(("NAT: hostres: %RTnaipv4 resolved from mapping\n", + in_addr_arpa.s_addr)); + + hostent.h_name = pReverseMapping->pszName; + hostent.h_aliases = h_aliases; + h_aliases[0] = NULL; + hostent.h_addrtype = AF_INET; + hostent.h_length = sizeof(RTNETADDRIPV4); + hostent.h_addr_list = h_addr_list; + h_addr_list[0] = (char *)&in_addr_arpa.s_addr; + h_addr_list[1] = NULL; + + h = &hostent; + } + } +#endif + + if (h == NULL) + { +#ifdef RT_OS_WINDOWS + h = gethostbyaddr((const char *)&in_addr_arpa, sizeof(struct in_addr), AF_INET); +#else + h = gethostbyaddr(&in_addr_arpa, sizeof(struct in_addr), AF_INET); +#endif + } + + if (h == NULL) + { + /* LogErr: h_errno */ + return refuse(res, RCode_NXDomain); + } + + if (h->h_name != NULL) + { + char name[DNS_MAX_NAME_LEN+1]; + strnlabels(name, sizeof(name), res->buf, qname); + + LogDbg(("NAT: hostres: %s PTR %s\n", name, h->h_name)); + nbytes = append_ptr(res, name, h->h_name); + if (nbytes > 0) + { + ++nanswers; + } + else + { + LogErr(("NAT: hostres: failed to add %s PTR %s\n", + name, h->h_name)); + if (nbytes < 0) + return refuse(res, RCode_ServFail); + else + { + pHdr->tc = 1; + goto out; + } + } + } + + out: + pHdr->ancount = RT_H2N_U16((uint16_t)nanswers); + return VINF_SUCCESS; +} + + +static int +refuse(struct response *res, unsigned int rcode) +{ + struct dnsmsg_header *pHdr = (struct dnsmsg_header *)res->buf; + pHdr->rcode = rcode; + + return VINF_SUCCESS; +} + + +#define APPEND_PROLOGUE() \ + ssize_t size = -1; \ + size_t oend = res->end; \ + ssize_t nbytes; \ + do {} while (0) + +#define CHECKED(_append) \ + do { \ + nbytes = (_append); \ + if (RT_UNLIKELY(nbytes <= 0)) \ + { \ + if (nbytes == 0) \ + size = 0; \ + goto out; \ + } \ + } while (0) + +#define APPEND_EPILOGUE() \ + do { \ + size = res->end - oend; \ + out: \ + if (RT_UNLIKELY(size <= 0)) \ + res->end = oend; \ + return size; \ + } while (0) + + +/* + * A RR - rfc1035#section-3.4.1 + */ +static ssize_t +append_a(struct response *res, const char *name, struct in_addr addr) +{ + APPEND_PROLOGUE(); + + CHECKED( append_rrhdr(res, name, Type_A, 3600) ); + CHECKED( append_u16(res, RT_H2N_U16_C(sizeof(addr))) ); + CHECKED( append_u32(res, addr.s_addr) ); + + APPEND_EPILOGUE(); +} + + +/* + * CNAME RR - rfc1035#section-3.3.1 + */ +static ssize_t +append_cname(struct response *res, const char *name, const char *cname) +{ + return append_name_rr(res, name, Type_CNAME, cname); +} + + +/* + * PTR RR - rfc1035#section-3.3.12 + */ +static ssize_t +append_ptr(struct response *res, const char *inaddrname, const char *name) +{ + return append_name_rr(res, inaddrname, Type_PTR, name); +} + + +static ssize_t +append_name_rr(struct response *res, const char *question, + int type, const char *answer) +{ + size_t rdlpos; + uint16_t rdlength; + + APPEND_PROLOGUE(); + + CHECKED( append_rrhdr(res, question, type, 3600) ); + + rdlpos = res->end; + CHECKED( append_u16(res, 0) ); /* RDLENGTH placeholder */ + + CHECKED( append_name(res, answer) ); + + rdlength = RT_H2N_U16(nbytes); + memcpy(&res->buf[rdlpos], &rdlength, sizeof(rdlength)); + + APPEND_EPILOGUE(); +} + + +/* + * Append common RR header, up to but not including RDLENGTH and RDATA + * proper (rfc1035#section-3.2.1). + */ +static ssize_t +append_rrhdr(struct response *res, const char *name, uint16_t type, uint32_t ttl) +{ + APPEND_PROLOGUE(); + + CHECKED( append_name(res, name) ); + CHECKED( append_u16(res, RT_H2N_U16(type)) ); + CHECKED( append_u16(res, RT_H2N_U16_C(Class_IN)) ); + CHECKED( append_u32(res, RT_H2N_U32(ttl)) ); + + APPEND_EPILOGUE(); +} + + +static ssize_t +append_name(struct response *res, const char *name) +{ + ssize_t size, nbytes; + struct label *root; + struct label *haystack, *needle; + struct label *head, **neck; + struct label *tail, **graft; + uint8_t *buf; + size_t wr, oend; + const char *s; + + size = -1; + oend = res->end; + + /** + * Split new name into a list of labels encoding it into the + * temporary buffer. + */ + root = NULL; + + buf = RTMemAllocZ(strlen(name) + 1); + if (buf == NULL) + return -1; + wr = 0; + + s = name; + while (*s != '\0') { + const char *part; + size_t poff, plen; + struct label *l; + + part = s; + while (*s != '\0' && *s != '.') + ++s; + + plen = s - part; + + if (plen > DNS_MAX_LABEL_LEN) + { + LogErr(("NAT: hostres: name component too long\n")); + goto out; + } + + if (*s == '.') + { + if (plen == 0) + { + LogErr(("NAT: hostres: empty name component\n")); + goto out; + } + + ++s; + } + + poff = wr; + + buf[poff] = (uint8_t)plen; /* length byte */ + ++wr; + + memcpy(&buf[wr], part, plen); /* label text */ + wr += plen; + + l = RTMemAllocZ(sizeof(*l)); + if (l == NULL) + goto out; + + l->buf = buf; + l->off = poff; + l->children = root; + root = l; + } + + + /** + * Search for a tail that is already encoded in the message. + */ + neck = &root; /* where needle head is connected */ + needle = root; + + tail = NULL; /* tail in the haystack */ + graft = &res->labels; + haystack = res->labels; + + while (needle != NULL && haystack != NULL) + { + size_t nlen, hlen; + + nlen = needle->buf[needle->off]; + Assert((nlen & DNS_LABEL_PTR) == 0); + + hlen = haystack->buf[haystack->off]; + Assert((hlen & DNS_LABEL_PTR) == 0); + + if ( nlen == hlen + && RTStrNICmp((char *)&needle->buf[needle->off+1], + (char *)&haystack->buf[haystack->off+1], + nlen) == 0) + { + neck = &needle->children; + needle = needle->children; + + tail = haystack; + graft = &haystack->children; + haystack = haystack->children; + } + else + { + haystack = haystack->sibling; + } + } + + + /** + * Head contains (in reverse) the prefix that needs to be encoded + * and added to the haystack. Tail points to existing suffix that + * can be compressed to a pointer into the haystack. + */ + head = *neck; + if (head != NULL) + { + struct label *l; + size_t nlen, pfxlen, pfxdst; + + nlen = needle->buf[head->off]; /* last component */ + pfxlen = head->off + 1 + nlen; /* all prefix */ + pfxdst = res->end; /* in response buffer */ + + /* copy new prefix into response buffer */ + nbytes = append_bytes(res, buf, pfxlen); + if (nbytes <= 0) + { + if (nbytes == 0) + size = 0; + goto out; + } + + /* adjust labels to point to the response */ + for (l = head; l != NULL; l = l->children) + { + l->buf = res->buf; + l->off += pfxdst; + } + + *neck = NULL; /* decapitate */ + + l = *graft; /* graft to the labels tree */ + *graft = head; + head->sibling = l; + } + + if (tail == NULL) + nbytes = append_u8(res, 0); + else + nbytes = append_u16(res, RT_H2N_U16((DNS_LABEL_PTR << 8) | tail->off)); + if (nbytes <= 0) + { + if (nbytes == 0) + size = 0; + goto out; + } + + size = res->end - oend; + out: + if (RT_UNLIKELY(size <= 0)) + res->end = oend; + free_labels(root); + RTMemFree(buf); + return size; +} + + +static ssize_t +append_u32(struct response *res, uint32_t value) +{ + return append_bytes(res, (uint8_t *)&value, sizeof(value)); +} + + +static ssize_t +append_u16(struct response *res, uint16_t value) +{ + return append_bytes(res, (uint8_t *)&value, sizeof(value)); +} + + +static ssize_t +append_u8(struct response *res, uint8_t value) +{ + return append_bytes(res, &value, sizeof(value)); +} + + +static ssize_t +append_bytes(struct response *res, uint8_t *p, size_t size) +{ + if (check_space(res, size) == 0) + return 0; + + memcpy(&res->buf[res->end], p, size); + res->end += size; + return size; +} + + +static ssize_t +check_space(struct response *res, size_t size) +{ + if ( size > sizeof(res->buf) + || res->end > sizeof(res->buf) - size) + return 0; + + return size; +} + + +static int +get_in_addr_arpa(struct in_addr *paddr, struct label *root) +{ + RTNETADDRIPV4 addr; + struct label *l; + int i; + RT_ZERO(addr); /* makes MSC happy*/ + + l = root; + if (l == NULL || labelstrcmp(l, "arpa") != 0) + return 0; + + l = l->children; + if (l == NULL || labelstrcmp(l, "in-addr") != 0) + return 0; + + for (i = 0; i < 4; ++i) + { + char buf[4]; + size_t llen; + int rc; + uint8_t octet; + + l = l->children; + if (l == NULL) + return 0; + + llen = l->buf[l->off]; + Assert((llen & DNS_LABEL_PTR) == 0); + + /* valid octet values are at most 3 digits */ + if (llen > 3) + return 0; + + /* copy to avoid dealing with trailing bytes */ + memcpy(buf, &l->buf[l->off + 1], llen); + buf[llen] = '\0'; + + rc = RTStrToUInt8Full(buf, 10, &octet); + if (rc != VINF_SUCCESS) + return 0; + + addr.au8[i] = octet; + } + + if (l->children != NULL) + return 0; /* too many components */ + + if (paddr != NULL) + paddr->s_addr = addr.u; + + return 1; +} + + +/* + * Compare label with string. + */ +static int +labelstrcmp(struct label *l, const char *s) +{ + size_t llen; + + llen = l->buf[l->off]; + Assert((llen & DNS_LABEL_PTR) == 0); + + return RTStrNICmp((char *)&l->buf[l->off + 1], s, llen); +} + + +/* + * Convert a chain of labels to a C string. + * + * I'd rather use a custom formatter for e.g. %R[label] , but it needs + * two arguments and microsoft VC doesn't support compound literals. + */ +static void +strnlabels(char *namebuf, size_t nbuflen, const uint8_t *msg, size_t off) +{ + size_t cb; + size_t llen; + + namebuf[0] = '\0'; + cb = 0; + + llen = 0; + + while (cb < nbuflen - 1) { + llen = msg[off]; + if ((llen & DNS_LABEL_PTR) == DNS_LABEL_PTR) + { + off = ((llen & ~DNS_LABEL_PTR) << 8) | msg[off + 1]; + llen = msg[off]; + } + + /* pointers to pointers should not happen */ + if ((llen & DNS_LABEL_PTR) != 0) + { + cb += RTStrPrintf(namebuf + cb, nbuflen - cb, "[???]"); + return; + } + + if (llen == 0) + { + if (namebuf[0] == '\0') + cb += RTStrPrintf(namebuf + cb, nbuflen - cb, "."); + break; + } + + if (namebuf[0] != '\0') + cb += RTStrPrintf(namebuf + cb, nbuflen - cb, "."); + + cb += RTStrPrintf(namebuf + cb, nbuflen - cb, + "%.*s", llen, (char *)&msg[off+1]); + off = off + 1 + llen; + } +} + + +#if 0 /* unused */ +static void +LogLabelsTree(const char *before, struct label *l, const char *after) +{ + size_t llen; + + if (before != NULL) + LogDbg(("%s", before)); + + if (l == NULL) + { + LogDbg(("NULL%s", after ? after : "")); + return; + } + + if (l->children) + LogDbg(("(")); + + if (l->buf != NULL) + { + llen = l->buf[l->off]; + if ((llen & DNS_LABEL_PTR) == 0) + { + LogDbg(("\"%.*s\"@%zu", llen, &l->buf[l->off+1], l->off)); + } + else + { + LogDbg(("<invalid byte 0t%zu/0x%zf at offset %zd>", + llen, llen, l->off)); + } + } + else + { + LogDbg(("<*>")); + } + + if (l->children) + LogLabelsTree(" ", l->children, ")"); + + if (l->sibling) + LogLabelsTree(" ", l->sibling, NULL); + + if (after != NULL) + LogDbg(("%s", after)); +} +#endif /* unused */ + + +static void +free_labels(struct label *root) +{ + struct label TOP; /* traverse the tree with pointer reversal */ + struct label *b, *f; + + if (root == NULL) + return; + + RT_ZERO(TOP); + + b = &TOP; + f = root; + + while (f != &TOP) { + if (f->children) { /* recurse left */ + struct label *oldf = f; + struct label *newf = f->children; + oldf->children = b; /* reverse the pointer */ + b = oldf; + f = newf; + } + else if (f->sibling) { /* turn right */ + f->children = f->sibling; + f->sibling = NULL; + } + else { /* backtrack */ + struct label *oldf = f; /* garbage */ + struct label *oldb = b; + b = oldb->children; + oldb->children = NULL; /* oldf, but we are g/c'ing it */ + f = oldb; + + RTMemFree(oldf); + } + } +} + +#ifdef VBOX_WITH_DNSMAPPING_IN_HOSTRESOLVER +void +slirp_add_host_resolver_mapping(PNATState pData, + const char *pszHostName, bool fPattern, + uint32_t u32HostIP) +{ + LogRel(("ENTER: pszHostName:%s%s, u32HostIP:%RTnaipv4\n", + pszHostName ? pszHostName : "(null)", + fPattern ? " (pattern)" : "", + u32HostIP)); + + if ( pszHostName != NULL + && u32HostIP != INADDR_ANY + && u32HostIP != INADDR_BROADCAST) + { + PDNSMAPPINGENTRY pDnsMapping = RTMemAllocZ(sizeof(DNSMAPPINGENTRY)); + if (!pDnsMapping) + { + LogFunc(("Can't allocate DNSMAPPINGENTRY\n")); + LogFlowFuncLeave(); + return; + } + + pDnsMapping->u32IpAddress = u32HostIP; + pDnsMapping->fPattern = fPattern; + pDnsMapping->pszName = RTStrDup(pszHostName); + + if (pDnsMapping->pszName == NULL) + { + LogFunc(("Can't allocate enough room for host name\n")); + RTMemFree(pDnsMapping); + LogFlowFuncLeave(); + return; + } + + if (fPattern) /* there's no case-insensitive pattern-match function */ + RTStrToLower(pDnsMapping->pszName); + + STAILQ_INSERT_TAIL(fPattern ? &pData->DNSMapPatterns : &pData->DNSMapNames, + pDnsMapping, MapList); + + LogRel(("NAT: User-defined mapping %s%s = %RTnaipv4 is registered\n", + pDnsMapping->pszName, + pDnsMapping->fPattern ? " (pattern)" : "", + pDnsMapping->u32IpAddress)); + } + LogFlowFuncLeave(); +} + + +static PDNSMAPPINGENTRY +getDNSMapByName(PNATState pData, const char *pszName) +{ + PDNSMAPPINGENTRY pDNSMapingEntry; + char *pszNameLower; + + pszNameLower = RTStrDup(pszName); + if (RT_UNLIKELY(pszNameLower == NULL)) + return NULL; + RTStrToLower(pszNameLower); + + STAILQ_FOREACH(pDNSMapingEntry, &pData->DNSMapNames, MapList) + { + if (RTStrICmp(pDNSMapingEntry->pszName, pszNameLower) == 0) + goto done; + } + + STAILQ_FOREACH(pDNSMapingEntry, &pData->DNSMapPatterns, MapList) + { + if (RTStrSimplePatternMultiMatch(pDNSMapingEntry->pszName, RTSTR_MAX, + pszNameLower, RTSTR_MAX, NULL)) + goto done; + } + + done: + RTStrFree(pszNameLower); + return pDNSMapingEntry; +} + + +static PDNSMAPPINGENTRY +getDNSMapByAddr(PNATState pData, const uint32_t *pu32IpAddress) +{ + PDNSMAPPINGENTRY pDNSMapingEntry; + + if (pu32IpAddress == NULL) + return NULL; + + STAILQ_FOREACH(pDNSMapingEntry, &pData->DNSMapNames, MapList) + { + if (pDNSMapingEntry->u32IpAddress == *pu32IpAddress) + return pDNSMapingEntry; + } + + return NULL; +} + + +static void +alterHostentWithDataFromDNSMap(PNATState pData, struct hostent *h) +{ + PDNSMAPPINGENTRY pDNSMapingEntry = NULL; + char **ppszAlias; + + if (h->h_name != NULL) + { + pDNSMapingEntry = getDNSMapByName(pData, h->h_name); + if (pDNSMapingEntry != NULL) + goto done; + } + + for (ppszAlias = h->h_aliases; *ppszAlias != NULL; ++ppszAlias) + { + pDNSMapingEntry = getDNSMapByName(pData, *ppszAlias); + if (pDNSMapingEntry != NULL) + goto done; + } + + done: + if (pDNSMapingEntry != NULL) + { + *(uint32_t *)h->h_addr_list[0] = pDNSMapingEntry->u32IpAddress; + h->h_addr_list[1] = NULL; + } +} +#endif /* VBOX_WITH_DNSMAPPING_IN_HOSTRESOLVER */ diff --git a/src/VBox/Devices/Network/slirp/icmp_var.h b/src/VBox/Devices/Network/slirp/icmp_var.h new file mode 100644 index 00000000..e9a0cecb --- /dev/null +++ b/src/VBox/Devices/Network/slirp/icmp_var.h @@ -0,0 +1,93 @@ +/* $Id: icmp_var.h $ */ +/** @file + * NAT - ICMP handling (declarations/defines). + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)icmp_var.h 8.1 (Berkeley) 6/10/93 + * icmp_var.h,v 1.4 1995/02/16 00:27:40 wollman Exp + */ + +#ifndef _NETINET_ICMP_VAR_H_ +#define _NETINET_ICMP_VAR_H_ + +/* + * Variables related to this implementation + * of the internet control message protocol. + */ +struct icmpstat_t +{ +/* statistics related to input messages processed */ + u_long icps_received; /* #ICMP packets received */ + u_long icps_tooshort; /* packet < ICMP_MINLEN */ + u_long icps_checksum; /* bad checksum */ + u_long icps_notsupp; /* #ICMP packets not supported */ + u_long icps_badtype; /* #with bad type feild */ + u_long icps_reflect; /* number of responses */ +}; + +/* + * Names for ICMP sysctl objects + */ +#define ICMPCTL_MASKREPL 1 /* allow replies to netmask requests */ +#define ICMPCTL_STATS 2 /* statistics (read-only) */ +#define ICMPCTL_MAXID 3 + +#define ICMPCTL_NAMES { \ + { 0, 0 }, \ + { "maskrepl", CTLTYPE_INT }, \ + { "stats", CTLTYPE_STRUCT }, \ +} + +#endif diff --git a/src/VBox/Devices/Network/slirp/if.h b/src/VBox/Devices/Network/slirp/if.h new file mode 100644 index 00000000..2eec22a7 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/if.h @@ -0,0 +1,61 @@ +/* $Id: if.h $ */ +/** @file + * NAT - if_*. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef _IF_H_ +#define _IF_H_ + +#define IF_COMPRESS 0x01 /* We want compression */ +#define IF_NOCOMPRESS 0x02 /* Do not do compression */ +#define IF_AUTOCOMP 0x04 /* Autodetect (default) */ +#define IF_NOCIDCOMP 0x08 /* CID compression */ + + +#ifdef ETH_P_ARP +# undef ETH_P_ARP +#endif /* ETH_P_ARP*/ +#define ETH_P_ARP 0x0806 /* Address Resolution packet */ + +#ifdef ETH_P_IP +# undef ETH_P_IP +#endif /* ETH_P_IP */ +#define ETH_P_IP 0x0800 /* Internet Protocol packet */ + +#ifdef ETH_P_IPV6 +# undef ETH_P_IPV6 +#endif /* ETH_P_IPV6 */ +#define ETH_P_IPV6 0x86DD /* IPv6 */ + +#endif diff --git a/src/VBox/Devices/Network/slirp/ip.h b/src/VBox/Devices/Network/slirp/ip.h new file mode 100644 index 00000000..ef1ea281 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/ip.h @@ -0,0 +1,285 @@ +/* $Id: ip.h $ */ +/** @file + * NAT - IP handling (declarations/defines). + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip.h 8.1 (Berkeley) 6/10/93 + * ip.h,v 1.3 1994/08/21 05:27:30 paul Exp + */ + +#ifndef _IP_H_ +#define _IP_H_ + +#include "queue.h" + +#ifdef WORDS_BIGENDIAN +# ifndef NTOHL +# define NTOHL(d) +# endif +# ifndef NTOHS +# define NTOHS(d) +# endif +# ifndef HTONL +# define HTONL(d) +# endif +# ifndef HTONS +# define HTONS(d) +# endif +#else +# ifndef NTOHL +# define NTOHL(d) ((d) = RT_N2H_U32((d))) +# endif +# ifndef NTOHS +# define NTOHS(d) ((d) = RT_N2H_U16((u_int16_t)(d))) +# endif +# ifndef HTONL +# define HTONL(d) ((d) = RT_H2N_U32((d))) +# endif +# ifndef HTONS +# define HTONS(d) ((d) = RT_H2N_U16((u_int16_t)(d))) +# endif +#endif + +/* + * Definitions for internet protocol version 4. + * Per RFC 791, September 1981. + */ +#define IPVERSION 4 + +/* + * Structure of an internet header, naked of options. + */ +struct ip +{ +#ifdef WORDS_BIGENDIAN +# ifdef _MSC_VER + uint8_t ip_v:4; /* version */ + uint8_t ip_hl:4; /* header length */ +# else + unsigned ip_v:4; /* version */ + unsigned ip_hl:4; /* header length */ +# endif +#else +# ifdef _MSC_VER + uint8_t ip_hl:4; /* header length */ + uint8_t ip_v:4; /* version */ +# else + unsigned ip_hl:4; /* header length */ + unsigned ip_v:4; /* version */ +# endif +#endif + uint8_t ip_tos; /* type of service */ + uint16_t ip_len; /* total length */ + uint16_t ip_id; /* identification */ + uint16_t ip_off; /* fragment offset field */ +#define IP_DF 0x4000 /* don't fragment flag */ +#define IP_MF 0x2000 /* more fragments flag */ +#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ + uint8_t ip_ttl; /* time to live */ + uint8_t ip_p; /* protocol */ + uint16_t ip_sum; /* checksum */ + struct in_addr ip_src; /* source address */ + struct in_addr ip_dst; /* destination address */ +}; +AssertCompileSize(struct ip, 20); + +#define IP_MAXPACKET 65535 /* maximum packet size */ + +/* + * Definitions for IP type of service (ip_tos) + */ +#define IPTOS_LOWDELAY 0x10 +#define IPTOS_THROUGHPUT 0x08 +#define IPTOS_RELIABILITY 0x04 + + +/* + * Time stamp option structure. + */ +struct ip_timestamp +{ + uint8_t ipt_code; /* IPOPT_TS */ + uint8_t ipt_len; /* size of structure (variable) */ + uint8_t ipt_ptr; /* index of current entry */ +#ifdef WORDS_BIGENDIAN +# ifdef _MSC_VER + uint8_t ipt_oflw:4; /* overflow counter */ + uint8_t ipt_flg:4; /* flags, see below */ +# else + unsigned ipt_oflw:4; /* overflow counter */ + unsigned ipt_flg:4; /* flags, see below */ +# endif +#else +# ifdef _MSC_VER + uint8_t ipt_flg:4; /* flags, see below */ + uint8_t ipt_oflw:4; /* overflow counter */ +# else + unsigned ipt_flg:4; /* flags, see below */ + unsigned ipt_oflw:4; /* overflow counter */ +# endif +#endif + union ipt_timestamp + { + uint32_t ipt_time[1]; + struct ipt_ta + { + struct in_addr ipt_addr; + uint32_t ipt_time; + } ipt_ta[1]; + } ipt_timestamp; +}; +AssertCompileSize(struct ip_timestamp, 12); + +/* + * Internet implementation parameters. + */ +#define MAXTTL 255 /* maximum time to live (seconds) */ +#define IPDEFTTL 64 /* default ttl, from RFC 1340 */ +#define IPFRAGTTL 60 /* time to live for frags, slowhz */ +#define IPTTLDEC 1 /* subtracted when forwarding */ + +#define IP_MSS 576 /* default maximum segment size */ + +#ifdef HAVE_SYS_TYPES32_H /* Overcome some Solaris 2.x junk */ +# include <sys/types32.h> +#else +typedef caddr_t caddr32_t; +#endif + +#if SIZEOF_CHAR_P == 4 +typedef struct ipq_t *ipqp_32; +typedef struct ipasfrag *ipasfragp_32; +#else +typedef caddr32_t ipqp_32; +typedef caddr32_t ipasfragp_32; +#endif + +/* + * Overlay for ip header used by other protocols (tcp, udp). + */ +struct ipovly +{ + u_int8_t ih_x1[9]; /* (unused) */ + u_int8_t ih_pr; /* protocol */ + u_int16_t ih_len; /* protocol length */ + struct in_addr ih_src; /* source internet address */ + struct in_addr ih_dst; /* destination internet address */ +}; +AssertCompileSize(struct ipovly, 20); + +/* + * Ip reassembly queue structure. Each fragment being reassembled is + * attached to one of these structures. They are timed out after ipq_ttl + * drops to 0, and may also be reclaimed if memory becomes tight. + * size 28 bytes + */ +struct ipq_t +{ + TAILQ_ENTRY(ipq_t) ipq_list; + u_int8_t ipq_ttl; /* time for reass q to live */ + u_int8_t ipq_p; /* protocol of this fragment */ + u_int16_t ipq_id; /* sequence id for reassembly */ + struct mbuf *ipq_frags; /* to ip headers of fragments */ + uint8_t ipq_nfrags; /* # of fragments in this packet */ + struct in_addr ipq_src; + struct in_addr ipq_dst; +}; + + +/* +* IP datagram reassembly. +*/ +#define IPREASS_NHASH_LOG2 6 +#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2) +#define IPREASS_HMASK (IPREASS_NHASH - 1) +#define IPREASS_HASH(x,y) \ +(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK) +TAILQ_HEAD(ipqhead, ipq_t); + +/* + * Structure attached to inpcb.ip_moptions and + * passed to ip_output when IP multicast options are in use. + */ + +struct ipstat_t +{ + u_long ips_total; /* total packets received */ + u_long ips_badsum; /* checksum bad */ + u_long ips_tooshort; /* packet too short */ + u_long ips_toosmall; /* not enough data */ + u_long ips_badhlen; /* ip header length < data size */ + u_long ips_badlen; /* ip length < ip header length */ + u_long ips_fragments; /* fragments received */ + u_long ips_fragdropped; /* frags dropped (dups, out of space) */ + u_long ips_fragtimeout; /* fragments timed out */ + u_long ips_forward; /* packets forwarded */ + u_long ips_cantforward; /* packets rcvd for unreachable dest */ + u_long ips_redirectsent; /* packets forwarded on same net */ + u_long ips_noproto; /* unknown or unsupported protocol */ + u_long ips_delivered; /* datagrams delivered to upper level*/ + u_long ips_localout; /* total ip packets generated here */ + u_long ips_odropped; /* lost packets due to nobufs, etc. */ + u_long ips_reassembled; /* total packets reassembled ok */ + u_long ips_fragmented; /* datagrams successfully fragmented */ + u_long ips_ofragments; /* output fragments created */ + u_long ips_cantfrag; /* don't fragment flag was set, etc. */ + u_long ips_badoptions; /* error in option processing */ + u_long ips_noroute; /* packets discarded due to no route */ + u_long ips_badvers; /* ip version != 4 */ + u_long ips_rawout; /* total raw ip packets generated */ + u_long ips_unaligned; /* times the ip packet was not aligned */ +}; + +#endif diff --git a/src/VBox/Devices/Network/slirp/ip_icmp.c b/src/VBox/Devices/Network/slirp/ip_icmp.c new file mode 100644 index 00000000..f61eaaa8 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/ip_icmp.c @@ -0,0 +1,795 @@ +/* $Id: ip_icmp.c $ */ +/** @file + * NAT - IP/ICMP handling. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 + * ip_icmp.c,v 1.7 1995/05/30 08:09:42 rgrimes Exp + */ + +#include "slirp.h" +#include "ip_icmp.h" + +#ifdef VBOX_RAWSOCK_DEBUG_HELPER +int getrawsock(int type); +#endif + + +/* The message sent when emulating PING */ +/* Be nice and tell them it's just a psuedo-ping packet */ +#if 0 /* unused */ +static const char icmp_ping_msg[] = "This is a psuedo-PING packet used by Slirp to emulate ICMP ECHO-REQUEST packets.\n"; +#endif + +/* list of actions for icmp_error() on RX of an icmp message */ +static const int icmp_flush[19] = +{ +/* ECHO REPLY (0) */ 0, + 1, + 1, +/* DEST UNREACH (3) */ 1, +/* SOURCE QUENCH (4)*/ 1, +/* REDIRECT (5) */ 1, + 1, + 1, +/* ECHO (8) */ 0, +/* ROUTERADVERT (9) */ 1, +/* ROUTERSOLICIT (10) */ 1, +/* TIME EXCEEDED (11) */ 1, +/* PARAMETER PROBLEM (12) */ 1, +/* TIMESTAMP (13) */ 0, +/* TIMESTAMP REPLY (14) */ 0, +/* INFO (15) */ 0, +/* INFO REPLY (16) */ 0, +/* ADDR MASK (17) */ 0, +/* ADDR MASK REPLY (18) */ 0 +}; + + +int +icmp_init(PNATState pData, int iIcmpCacheLimit) +{ + pData->icmp_socket.so_type = IPPROTO_ICMP; + pData->icmp_socket.so_state = SS_ISFCONNECTED; + +#ifndef RT_OS_WINDOWS + TAILQ_INIT(&pData->icmp_msg_head); + + if (iIcmpCacheLimit < 0) + { + LogRel(("NAT: iIcmpCacheLimit is invalid %d, will be alter to default value 100\n", iIcmpCacheLimit)); + iIcmpCacheLimit = 100; + } + pData->iIcmpCacheLimit = iIcmpCacheLimit; +# ifndef RT_OS_DARWIN + pData->icmp_socket.s = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP); +# else /* !RT_OS_DARWIN */ + pData->icmp_socket.s = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); +# endif /* RT_OS_DARWIN */ + if (pData->icmp_socket.s == -1) + { + int rc = RTErrConvertFromErrno(errno); +# if defined(RT_OS_DARWIN) || !defined(VBOX_RAWSOCK_DEBUG_HELPER) + LogRel(("NAT: ICMP/ping not available (could not open ICMP socket, error %Rrc)\n", rc)); + return 1; +# else + /* try to get it from privileged helper */ + LogRel(("NAT: ICMP/ping raw socket error %Rrc, asking helper...\n", rc)); + pData->icmp_socket.s = getrawsock(AF_INET); + if (pData->icmp_socket.s == -1) + { + LogRel(("NAT: ICMP/ping not available\n")); + return 1; + } +# endif /* !RT_OS_DARWIN && VBOX_RAWSOCK_DEBUG_HELPER */ + } + fd_nonblock(pData->icmp_socket.s); + NSOCK_INC(); + +#else /* RT_OS_WINDOWS */ + RT_NOREF(iIcmpCacheLimit); + + if (icmpwin_init(pData) != 0) + return 1; +#endif /* RT_OS_WINDOWS */ + + return 0; +} + +/** + * Cleans ICMP cache. + */ +void +icmp_finit(PNATState pData) +{ +#ifdef RT_OS_WINDOWS + icmpwin_finit(pData); +#else + while (!TAILQ_EMPTY(&pData->icmp_msg_head)) + { + struct icmp_msg *icm = TAILQ_FIRST(&pData->icmp_msg_head); + icmp_msg_delete(pData, icm); + } + closesocket(pData->icmp_socket.s); +#endif +} + + +#if !defined(RT_OS_WINDOWS) +static struct icmp_msg * +icmp_msg_alloc(PNATState pData) +{ + struct icmp_msg *icm; + +#ifdef DEBUG + { + int iTally = 0; + TAILQ_FOREACH(icm, &pData->icmp_msg_head, im_queue) + ++iTally; + Assert(pData->cIcmpCacheSize == iTally); + } +#endif + + if (pData->cIcmpCacheSize >= pData->iIcmpCacheLimit) + { + int cTargetCacheSize = pData->iIcmpCacheLimit/2; + + while (pData->cIcmpCacheSize > cTargetCacheSize) + { + icm = TAILQ_FIRST(&pData->icmp_msg_head); + icmp_msg_delete(pData, icm); + } + } + + icm = RTMemAlloc(sizeof(struct icmp_msg)); + if (RT_UNLIKELY(icm == NULL)) + return NULL; + + TAILQ_INSERT_TAIL(&pData->icmp_msg_head, icm, im_queue); + pData->cIcmpCacheSize++; + + return icm; +} + + +static void +icmp_attach(PNATState pData, struct mbuf *m) +{ + struct icmp_msg *icm; + +#ifdef DEBUG + { + /* only used for ping */ + struct ip *ip = mtod(m, struct ip *); + Assert(ip->ip_p == IPPROTO_ICMP); + } +#endif + + icm = icmp_msg_alloc(pData); + if (RT_UNLIKELY(icm == NULL)) + return; + + icm->im_so = &pData->icmp_socket; + icm->im_m = m; +} + + +void +icmp_msg_delete(PNATState pData, struct icmp_msg *icm) +{ + if (RT_UNLIKELY(icm == NULL)) + return; + +#ifdef DEBUG + { + struct icmp_msg *existing; + int iTally = 0; + + TAILQ_FOREACH(existing, &pData->icmp_msg_head, im_queue) + ++iTally; + Assert(pData->cIcmpCacheSize == iTally); + + Assert(pData->cIcmpCacheSize > 0); + TAILQ_FOREACH(existing, &pData->icmp_msg_head, im_queue) + { + if (existing == icm) + break; + } + Assert(existing != NULL); + } +#endif + + TAILQ_REMOVE(&pData->icmp_msg_head, icm, im_queue); + pData->cIcmpCacheSize--; + + icm->im_so->so_m = NULL; + if (icm->im_m != NULL) + m_freem(pData, icm->im_m); + + RTMemFree(icm); +} + + +/* + * ip here is ip header + 64bytes readed from ICMP packet + */ +struct icmp_msg * +icmp_find_original_mbuf(PNATState pData, struct ip *ip) +{ + struct mbuf *m0; + struct ip *ip0; + struct icmp *icp, *icp0; + struct icmp_msg *icm = NULL; + int found = 0; + struct udphdr *udp; + struct tcphdr *tcp; + struct socket *head_socket = NULL; + struct socket *last_socket = NULL; + struct socket *so = NULL; + struct in_addr faddr; + u_short lport, fport; + + faddr.s_addr = ~0; + + lport = ~0; + fport = ~0; + + + LogFlowFunc(("ENTER: ip->ip_p:%d\n", ip->ip_p)); + switch (ip->ip_p) + { + case IPPROTO_ICMP: + icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2)); + TAILQ_FOREACH(icm, &pData->icmp_msg_head, im_queue) + { + m0 = icm->im_m; + ip0 = mtod(m0, struct ip *); + if (ip0->ip_p != IPPROTO_ICMP) + { + /* try next item */ + continue; + } + icp0 = (struct icmp *)((char *)ip0 + (ip0->ip_hl << 2)); + /* + * IP could pointer to ICMP_REPLY datagram (1) + * or pointer IP header in ICMP payload in case of + * ICMP_TIMXCEED or ICMP_UNREACH (2) + * + * if (1) and then ICMP (type should be ICMP_ECHOREPLY) and we need check that + * IP.IP_SRC == IP0.IP_DST received datagramm comes from destination. + * + * if (2) then check that payload ICMP has got type ICMP_ECHO and + * IP.IP_DST == IP0.IP_DST destination of returned datagram is the same as + * one was sent. + */ + if ( ( (icp->icmp_type != ICMP_ECHO && ip->ip_src.s_addr == ip0->ip_dst.s_addr) + || (icp->icmp_type == ICMP_ECHO && ip->ip_dst.s_addr == ip0->ip_dst.s_addr)) + && icp->icmp_id == icp0->icmp_id + && icp->icmp_seq == icp0->icmp_seq) + { + found = 1; + Log(("Have found %R[natsock]\n", icm->im_so)); + break; + } + Log(("Have found nothing\n")); + } + break; + + /* + * for TCP and UDP logic little bit reverted, we try to find the HOST socket + * from which the IP package has been sent. + */ + case IPPROTO_UDP: + head_socket = &udb; + udp = (struct udphdr *)((char *)ip + (ip->ip_hl << 2)); + faddr.s_addr = ip->ip_dst.s_addr; + fport = udp->uh_dport; + lport = udp->uh_sport; + last_socket = udp_last_so; + RT_FALL_THRU(); + + case IPPROTO_TCP: + if (head_socket == NULL) + { + tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); + head_socket = &tcb; /* head_socket could be initialized with udb*/ + faddr.s_addr = ip->ip_dst.s_addr; + fport = tcp->th_dport; + lport = tcp->th_sport; + last_socket = tcp_last_so; + } + /* check last socket first */ + if ( last_socket->so_faddr.s_addr == faddr.s_addr + && last_socket->so_fport == fport + && last_socket->so_hlport == lport) + { + found = 1; + so = last_socket; + break; + } + for (so = head_socket->so_prev; so != head_socket; so = so->so_prev) + { + /* Should be replaced by hash here */ + Log(("trying:%R[natsock] against %RTnaipv4:%d lport=%d hlport=%d\n", + so, faddr.s_addr, ntohs(fport), ntohs(lport), ntohs(so->so_hlport))); + if ( so->so_faddr.s_addr == faddr.s_addr + && so->so_fport == fport + && so->so_hlport == lport) + { + found = 1; + break; + } + } + break; + + default: + Log(("NAT:ICMP: unsupported protocol(%d)\n", ip->ip_p)); + } + +#ifdef DEBUG + if (found) + Assert((icm != NULL) ^ (so != NULL)); +#endif + + if (found && icm == NULL) + { + /* + * XXX: Implies this is not a pong, found socket. This is, of + * course, wasteful since the caller will delete icmp_msg + * immediately after processing, so there's not much reason to + * clutter up the queue with it. + */ + AssertReturn(so != NULL, NULL); + + /* + * XXX: FIXME: If the very first send(2) fails, the socket is + * still in SS_NOFDREF and so we will not report this too. + */ + if (so->so_state == SS_NOFDREF) + { + /* socket is shutting down we've already sent ICMP on it. */ + Log(("NAT:ICMP: disconnected %R[natsock]\n", so)); + LogFlowFunc(("LEAVE: icm:NULL\n")); + return NULL; + } + + if (so->so_m == NULL) + { + Log(("NAT:ICMP: no saved mbuf for %R[natsock]\n", so)); + LogFlowFunc(("LEAVE: icm:NULL\n")); + return NULL; + } + + icm = icmp_msg_alloc(pData); + if (RT_UNLIKELY(icm == NULL)) + { + LogFlowFunc(("LEAVE: icm:NULL\n")); + return NULL; + } + + Log(("NAT:ICMP: for %R[natsock]\n", so)); + icm->im_so = so; + icm->im_m = so->so_m; + } + LogFlowFunc(("LEAVE: icm:%p\n", icm)); + return icm; +} +#endif /* !RT_OS_WINDOWS */ + + +/* + * Process a received ICMP message. + */ +void +icmp_input(PNATState pData, struct mbuf *m, int hlen) +{ + register struct ip *ip = mtod(m, struct ip *); + int icmplen = ip->ip_len; + uint8_t icmp_type; + void *icp_buf = NULL; + uint32_t dst; + + /* int code; */ + + LogFlowFunc(("ENTER: m = %p, m_len = %d\n", m, m ? m->m_len : 0)); + + icmpstat.icps_received++; + + /* + * Locate icmp structure in mbuf, and check + * that its not corrupted and of at least minimum length. + */ + if (icmplen < ICMP_MINLEN) + { + /* min 8 bytes payload */ + icmpstat.icps_tooshort++; + goto end_error_free_m; + } + + m->m_len -= hlen; + m->m_data += hlen; + + if (cksum(m, icmplen)) + { + icmpstat.icps_checksum++; + goto end_error_free_m; + } + + /* are we guaranteed to have ICMP header in first mbuf? be safe. */ + m_copydata(m, 0, sizeof(icmp_type), (caddr_t)&icmp_type); + + m->m_len += hlen; + m->m_data -= hlen; + + /* icmpstat.icps_inhist[icp->icmp_type]++; */ + /* code = icp->icmp_code; */ + + LogFlow(("icmp_type = %d\n", icmp_type)); + switch (icmp_type) + { + case ICMP_ECHO: + ip->ip_len += hlen; /* since ip_input subtracts this */ + dst = ip->ip_dst.s_addr; + if ( CTL_CHECK(dst, CTL_ALIAS) + || CTL_CHECK(dst, CTL_DNS) + || CTL_CHECK(dst, CTL_TFTP)) + { + /* Don't reply to ping requests for the hosts loopback interface if it is disabled. */ + if ( CTL_CHECK(dst, CTL_ALIAS) + && !pData->fLocalhostReachable) + goto done; + + uint8_t echo_reply = ICMP_ECHOREPLY; + m_copyback(pData, m, hlen + RT_OFFSETOF(struct icmp, icmp_type), + sizeof(echo_reply), (caddr_t)&echo_reply); + ip->ip_dst.s_addr = ip->ip_src.s_addr; + ip->ip_src.s_addr = dst; + icmp_reflect(pData, m); + m = NULL; /* m was consumed and freed */ + goto done; + } + +#ifdef RT_OS_WINDOWS + { + icmpwin_ping(pData, m, hlen); + break; /* free mbuf */ + } +#else + { + struct icmp *icp; + struct sockaddr_in addr; + + /* XXX: FIXME: this is bogus, see CTL_CHECKs above */ + addr.sin_family = AF_INET; + if ((ip->ip_dst.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr) + { + /* It's an alias */ + switch (RT_N2H_U32(ip->ip_dst.s_addr) & ~pData->netmask) + { + case CTL_DNS: + case CTL_ALIAS: + default: + addr.sin_addr = loopback_addr; + break; + } + } + else + addr.sin_addr.s_addr = ip->ip_dst.s_addr; + + if (m->m_next) + { + icp_buf = RTMemAlloc(icmplen); + if (!icp_buf) + { + Log(("NAT: not enought memory to allocate the buffer\n")); + goto end_error_free_m; + } + m_copydata(m, hlen, icmplen, icp_buf); + icp = (struct icmp *)icp_buf; + } + else + icp = (struct icmp *)(mtod(m, char *) + hlen); + + if (pData->icmp_socket.s != -1) + { + static bool fIcmpSocketErrorReported; + int ttl; + int status; + ssize_t rc; + + ttl = ip->ip_ttl; + Log(("NAT/ICMP: try to set TTL(%d)\n", ttl)); + status = setsockopt(pData->icmp_socket.s, IPPROTO_IP, IP_TTL, + (void *)&ttl, sizeof(ttl)); + if (status < 0) + Log(("NAT: Error (%s) occurred while setting TTL attribute of IP packet\n", + strerror(errno))); + rc = sendto(pData->icmp_socket.s, icp, icmplen, 0, + (struct sockaddr *)&addr, sizeof(addr)); + if (rc >= 0) + { + icmp_attach(pData, m); + m = NULL; /* m was stashed away for safekeeping */ + goto done; + } + + + if (!fIcmpSocketErrorReported) + { + LogRel(("NAT: icmp_input udp sendto tx errno = %d (%s)\n", + errno, strerror(errno))); + fIcmpSocketErrorReported = true; + } + icmp_error(pData, m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); + m = NULL; /* m was consumed and freed */ + goto done; + } + } +#endif /* !RT_OS_WINDOWS */ + break; + case ICMP_UNREACH: + case ICMP_TIMXCEED: + /* @todo(vvl): both up cases comes from guest, + * indeed right solution would be find the socket + * corresponding to ICMP data and close it. + */ + case ICMP_PARAMPROB: + case ICMP_SOURCEQUENCH: + case ICMP_TSTAMP: + case ICMP_MASKREQ: + case ICMP_REDIRECT: + icmpstat.icps_notsupp++; + break; + + default: + icmpstat.icps_badtype++; + } /* switch */ + +end_error_free_m: + if (m != NULL) + m_freem(pData, m); + +done: + if (icp_buf) + RTMemFree(icp_buf); +} + + +/** + * Send an ICMP message in response to a situation + * + * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. MAY send more (we do). + * MUST NOT change this header information. + * MUST NOT reply to a multicast/broadcast IP address. + * MUST NOT reply to a multicast/broadcast MAC address. + * MUST reply to only the first fragment. + * + * Send ICMP_UNREACH back to the source regarding msrc. + * It is reported as the bad ip packet. The header should + * be fully correct and in host byte order. + * ICMP fragmentation is illegal. + * + * @note: implementation note: MSIZE is 256 bytes (minimal buffer). + * We always truncate original payload to 8 bytes required by the RFC, + * so the largest possible datagram is 14 (ethernet) + 20 (ip) + + * 8 (icmp) + 60 (max original ip with options) + 8 (original payload) + * = 110 bytes which fits into sinlge mbuf. + * + * @note This function will free msrc! + */ + +void icmp_error(PNATState pData, struct mbuf *msrc, u_char type, u_char code, int minsize, const char *message) +{ + unsigned ohlen, olen; + struct mbuf *m; + struct ip *oip, *ip; + struct icmp *icp; + void *payload; + RT_NOREF(minsize); + + LogFlow(("icmp_error: msrc = %p, msrc_len = %d\n", + (void *)msrc, msrc ? msrc->m_len : 0)); + + if (RT_UNLIKELY(msrc == NULL)) + goto end_error; + + M_ASSERTPKTHDR(msrc); + + if ( type != ICMP_UNREACH + && type != ICMP_TIMXCEED + && type != ICMP_SOURCEQUENCH) + goto end_error; + + oip = mtod(msrc, struct ip *); + LogFunc(("msrc: %RTnaipv4 -> %RTnaipv4\n", oip->ip_src, oip->ip_dst)); + + if (oip->ip_src.s_addr == INADDR_ANY) + goto end_error; + + if (oip->ip_off & IP_OFFMASK) + goto end_error; /* Only reply to fragment 0 */ + + ohlen = oip->ip_hl * 4; + AssertStmt(ohlen >= sizeof(struct ip), goto end_error); + + olen = oip->ip_len; + AssertStmt(olen >= ohlen, goto end_error); + + if (oip->ip_p == IPPROTO_ICMP) + { + struct icmp *oicp = (struct icmp *)((char *)oip + ohlen); + /* + * Assume any unknown ICMP type is an error. This isn't + * specified by the RFC, but think about it.. + */ + if (oicp->icmp_type > ICMP_MAXTYPE || icmp_flush[oicp->icmp_type]) + goto end_error; + } + + /* undo byte order conversions done in ip_input() */ + HTONS(oip->ip_len); + HTONS(oip->ip_id); + HTONS(oip->ip_off); + + m = m_gethdr(pData, M_NOWAIT, MT_HEADER); + if (RT_UNLIKELY(m == NULL)) + goto end_error; + + m->m_flags |= M_SKIP_FIREWALL; + m->m_data += if_maxlinkhdr; + + ip = mtod(m, struct ip *); + m->m_pkthdr.header = (void *)ip; + + /* fill in ip (ip_output0() does the boilerplate for us) */ + ip->ip_tos = ((oip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */ + /* ip->ip_len will be set later */ + ip->ip_off = 0; + ip->ip_ttl = MAXTTL; + ip->ip_p = IPPROTO_ICMP; + ip->ip_src = alias_addr; + ip->ip_dst = oip->ip_src; + + /* fill in icmp */ + icp = (struct icmp *)((char *)ip + sizeof(*ip)); + icp->icmp_type = type; + icp->icmp_code = code; + icp->icmp_id = 0; + icp->icmp_seq = 0; + + /* fill in icmp payload: original ip header plus 8 bytes of its payload */ + if (olen > ohlen + 8) + olen = ohlen + 8; + payload = (void *)((char *)icp + ICMP_MINLEN); + memcpy(payload, oip, olen); + + /* + * Original code appended this message after the payload. This + * might have been a good idea for real slirp, as it provided a + * communication channel with the remote host. But 90s are over. + */ + NOREF(message); + + /* hide ip header for icmp checksum calculation */ + m->m_data += sizeof(struct ip); + m->m_len = ICMP_MINLEN + /* truncated */ olen; + + icp->icmp_cksum = 0; + icp->icmp_cksum = cksum(m, m->m_len); + + /* reveal ip header */ + m->m_data -= sizeof(struct ip); + m->m_len += sizeof(struct ip); + ip->ip_len = m->m_len; + + (void) ip_output0(pData, (struct socket *)NULL, m, 1); + + icmpstat.icps_reflect++; + + /* clear source datagramm in positive branch */ + m_freem(pData, msrc); + LogFlowFuncLeave(); + return; + +end_error: + + /* + * clear source datagramm in case if some of requirement haven't been met. + */ + if (msrc) + m_freem(pData, msrc); + + { + static bool fIcmpErrorReported; + if (!fIcmpErrorReported) + { + LogRel(("NAT: Error occurred while sending ICMP error message\n")); + fIcmpErrorReported = true; + } + } + LogFlowFuncLeave(); +} + +/* + * Reflect the ip packet back to the source + * Note: m isn't duplicated by this method and more delivered to ip_output then. + */ +void +icmp_reflect(PNATState pData, struct mbuf *m) +{ + register struct ip *ip = mtod(m, struct ip *); + int hlen = ip->ip_hl << 2; + register struct icmp *icp; + LogFlowFunc(("ENTER: m:%p\n", m)); + + /* + * Send an icmp packet back to the ip level, + * after supplying a checksum. + */ + m->m_data += hlen; + m->m_len -= hlen; + icp = mtod(m, struct icmp *); + + icp->icmp_cksum = 0; + icp->icmp_cksum = cksum(m, ip->ip_len - hlen); + + m->m_data -= hlen; + m->m_len += hlen; + + (void) ip_output(pData, (struct socket *)NULL, m); + + icmpstat.icps_reflect++; + LogFlowFuncLeave(); +} diff --git a/src/VBox/Devices/Network/slirp/ip_icmp.h b/src/VBox/Devices/Network/slirp/ip_icmp.h new file mode 100644 index 00000000..2dba66c1 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/ip_icmp.h @@ -0,0 +1,220 @@ +/* $Id: ip_icmp.h $ */ +/** @file + * NAT - IP/ICMP handling (declarations/defines). + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 + * ip_icmp.h,v 1.4 1995/05/30 08:09:43 rgrimes Exp + */ + +#ifndef _NETINET_IP_ICMP_H_ +#define _NETINET_IP_ICMP_H_ +#include <queue.h> + +/* + * Interface Control Message Protocol Definitions. + * Per RFC 792, September 1981. + */ + +typedef u_int32_t n_time; + +/* + * Structure of an icmp header. + */ +struct icmp +{ + uint8_t icmp_type; /* type of message, see below */ + uint8_t icmp_code; /* type sub code */ + uint16_t icmp_cksum; /* ones complement cksum of struct */ + union + { + uint8_t ih_pptr; /* ICMP_PARAMPROB */ + struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ + struct ih_idseq + { + uint16_t icd_id; + uint16_t icd_seq; + } ih_idseq; + int ih_void; + + /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ + struct ih_pmtu + { + uint16_t ipm_void; + uint16_t ipm_nextmtu; + } ih_pmtu; + } icmp_hun; +#define icmp_pptr icmp_hun.ih_pptr +#define icmp_gwaddr icmp_hun.ih_gwaddr +#define icmp_id icmp_hun.ih_idseq.icd_id +#define icmp_seq icmp_hun.ih_idseq.icd_seq +#define icmp_void icmp_hun.ih_void +#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void +#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu + union + { + struct id_ts + { + n_time its_otime; + n_time its_rtime; + n_time its_ttime; + } id_ts; + struct id_ip + { + struct ip idi_ip; + /* options and then 64 bits of data */ + } id_ip; + uint32_t id_mask; + char id_data[1]; + } icmp_dun; +#define icmp_otime icmp_dun.id_ts.its_otime +#define icmp_rtime icmp_dun.id_ts.its_rtime +#define icmp_ttime icmp_dun.id_ts.its_ttime +#define icmp_ip icmp_dun.id_ip.idi_ip +#define icmp_mask icmp_dun.id_mask +#define icmp_data icmp_dun.id_data +}; +AssertCompileSize(struct icmp, 28); + +/* + * Lower bounds on packet lengths for various types. + * For the error advice packets must first insure that the + * packet is large enought to contain the returned ip header. + * Only then can we do the check to see if 64 bits of packet + * data have been returned, since we need to check the returned + * ip header length. + */ +#define ICMP_MINLEN 8 /* abs minimum */ +#define ICMP_TSLEN (8 + 3 * sizeof (n_time)) /* timestamp */ +#define ICMP_MASKLEN 12 /* address mask */ +#define ICMP_ADVLENMIN (8 + sizeof (struct ip) + 8) /* min */ +#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8) + /* N.B.: must separately check that ip_hl >= 5 */ + +/* + * Definition of type and code field values. + */ +#define ICMP_ECHOREPLY 0 /* echo reply */ +#define ICMP_UNREACH 3 /* dest unreachable, codes: */ +#define ICMP_UNREACH_NET 0 /* bad net */ +#define ICMP_UNREACH_HOST 1 /* bad host */ +#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ +#define ICMP_UNREACH_PORT 3 /* bad port */ +#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ +#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ +#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ +#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ +#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ +#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ +#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ +#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ +#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ +#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ +#define ICMP_REDIRECT 5 /* shorter route, codes: */ +#define ICMP_REDIRECT_NET 0 /* for network */ +#define ICMP_REDIRECT_HOST 1 /* for host */ +#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ +#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ +#define ICMP_ECHO 8 /* echo service */ +#define ICMP_ROUTERADVERT 9 /* router advertisement */ +#define ICMP_ROUTERSOLICIT 10 /* router solicitation */ +#define ICMP_TIMXCEED 11 /* time exceeded, code: */ +#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ +#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ +#define ICMP_PARAMPROB 12 /* ip header bad */ +#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ +#define ICMP_TSTAMP 13 /* timestamp request */ +#define ICMP_TSTAMPREPLY 14 /* timestamp reply */ +#define ICMP_IREQ 15 /* information request */ +#define ICMP_IREQREPLY 16 /* information reply */ +#define ICMP_MASKREQ 17 /* address mask request */ +#define ICMP_MASKREPLY 18 /* address mask reply */ + +#define ICMP_MAXTYPE 18 + +#define ICMP_INFOTYPE(type) \ + ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ + (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \ + (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ + (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ + (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) + +void icmp_input (PNATState, struct mbuf *, int); +void icmp_error (PNATState, struct mbuf *, u_char, u_char, int, const char *); +void icmp_reflect (PNATState, struct mbuf *); + +struct icmp_msg +{ + TAILQ_ENTRY(icmp_msg) im_queue; + struct mbuf *im_m; + struct socket *im_so; +}; + +TAILQ_HEAD(icmp_storage, icmp_msg); + +int icmp_init (PNATState , int); +void icmp_finit (PNATState ); +struct icmp_msg * icmp_find_original_mbuf (PNATState , struct ip *); +void icmp_msg_delete(PNATState, struct icmp_msg *); + +#ifdef RT_OS_WINDOWS +/* Windows ICMP API code in ip_icmpwin.c */ +int icmpwin_init (PNATState); +void icmpwin_finit (PNATState); +void icmpwin_ping(PNATState, struct mbuf *, int); +void icmpwin_process(PNATState); +#endif + +#endif diff --git a/src/VBox/Devices/Network/slirp/ip_icmpwin.c b/src/VBox/Devices/Network/slirp/ip_icmpwin.c new file mode 100644 index 00000000..25467c8b --- /dev/null +++ b/src/VBox/Devices/Network/slirp/ip_icmpwin.c @@ -0,0 +1,558 @@ +/* $Id: ip_icmpwin.c $ */ +/** @file + * NAT - Windows ICMP API based ping proxy. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "slirp.h" +#include "ip_icmp.h" + +#include <winternl.h> /* for PIO_APC_ROUTINE &c */ +#ifndef PIO_APC_ROUTINE_DEFINED +# define PIO_APC_ROUTINE_DEFINED 1 +#endif +#include <iprt/win/iphlpapi.h> +#include <icmpapi.h> + +/* + * A header of ICMP ECHO. Intended for storage, unlike struct icmp + * which is intended to be overlayed onto a buffer. + */ +struct icmp_echo { + uint8_t icmp_type; + uint8_t icmp_code; + uint16_t icmp_cksum; + uint16_t icmp_echo_id; + uint16_t icmp_echo_seq; +}; + +AssertCompileSize(struct icmp_echo, 8); + + +struct pong { + PNATState pData; + + TAILQ_ENTRY(pong) queue_entry; + + union { + struct ip ip; + uint8_t au[60]; + } reqiph; + struct icmp_echo reqicmph; + + size_t bufsize; + uint8_t buf[1]; +}; + + +static VOID WINAPI icmpwin_callback_apc(void *ctx, PIO_STATUS_BLOCK iob, ULONG reserved); +static VOID WINAPI icmpwin_callback_old(void *ctx); + +static void icmpwin_callback(struct pong *pong); +static void icmpwin_pong(struct pong *pong); + +static struct mbuf *icmpwin_get_error(struct pong *pong, int type, int code); +static struct mbuf *icmpwin_get_mbuf(PNATState pData, size_t reqsize); + + +/* + * On Windows XP and Windows Server 2003 IcmpSendEcho2() callback + * is FARPROC, but starting from Vista it's PIO_APC_ROUTINE with + * two extra arguments. Callbacks use WINAPI (stdcall) calling + * convention with callee responsible for popping the arguments, + * so to avoid stack corruption we check windows version at run + * time and provide correct callback. + * + * XXX: this is system-wide, but what about multiple NAT threads? + */ +static PIO_APC_ROUTINE g_pfnIcmpCallback; + + +int +icmpwin_init(PNATState pData) +{ + if (g_pfnIcmpCallback == NULL) + { + OSVERSIONINFO osvi; + int status; + + ZeroMemory(&osvi, sizeof(OSVERSIONINFO)); + osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + status = GetVersionEx(&osvi); + if (status == 0) + return 1; + + if (osvi.dwMajorVersion >= 6) + g_pfnIcmpCallback = icmpwin_callback_apc; + else + g_pfnIcmpCallback = (PIO_APC_ROUTINE)icmpwin_callback_old; + } + + TAILQ_INIT(&pData->pongs_expected); + TAILQ_INIT(&pData->pongs_received); + + pData->icmp_socket.sh = IcmpCreateFile(); + pData->phEvents[VBOX_ICMP_EVENT_INDEX] = CreateEvent(NULL, FALSE, FALSE, NULL); + + return 0; +} + + +void +icmpwin_finit(PNATState pData) +{ + IcmpCloseHandle(pData->icmp_socket.sh); + + while (!TAILQ_EMPTY(&pData->pongs_received)) { + struct pong *pong = TAILQ_FIRST(&pData->pongs_received); + TAILQ_REMOVE(&pData->pongs_received, pong, queue_entry); + RTMemFree(pong); + } + + /* this should be empty */ + while (!TAILQ_EMPTY(&pData->pongs_expected)) { + struct pong *pong = TAILQ_FIRST(&pData->pongs_expected); + TAILQ_REMOVE(&pData->pongs_expected, pong, queue_entry); + pong->pData = NULL; + } +} + + +/* + * Outgoing ping from guest. + */ +void +icmpwin_ping(PNATState pData, struct mbuf *m, int hlen) +{ + struct ip *ip = mtod(m, struct ip *); + size_t reqsize, pongsize; + uint8_t ttl; + size_t bufsize; + struct pong *pong; + IPAddr dst; + IP_OPTION_INFORMATION opts; + void *reqdata; + int status; + + ttl = ip->ip_ttl; + AssertReturnVoid(ttl > 0); + + size_t hdrsize = hlen + sizeof(struct icmp_echo); + reqsize = ip->ip_len - hdrsize; + + bufsize = sizeof(ICMP_ECHO_REPLY); + if (reqsize < sizeof(IO_STATUS_BLOCK) + sizeof(struct icmp_echo)) + bufsize += sizeof(IO_STATUS_BLOCK) + sizeof(struct icmp_echo); + else + bufsize += reqsize; + bufsize += 16; /* whatever that is; empirically at least XP needs it */ + + pongsize = RT_UOFFSETOF(struct pong, buf) + bufsize; + if (pData->cbIcmpPending + pongsize > 1024 * 1024) + return; + + pong = RTMemAlloc(pongsize); + if (RT_UNLIKELY(pong == NULL)) + return; + + pong->pData = pData; + pong->bufsize = bufsize; + m_copydata(m, 0, hlen, (caddr_t)&pong->reqiph); + m_copydata(m, hlen, sizeof(struct icmp_echo), (caddr_t)&pong->reqicmph); + AssertReturnVoid(pong->reqicmph.icmp_type == ICMP_ECHO); + + if (m->m_next == NULL) + { + /* already in single contiguous buffer */ + reqdata = mtod(m, char *) + hdrsize; + } + else + { + /* use reply buffer as temporary storage */ + reqdata = pong->buf; + m_copydata(m, (int)hdrsize, (int)reqsize, reqdata); + } + + dst = ip->ip_dst.s_addr; + + opts.Ttl = ttl; + opts.Tos = ip->ip_tos; /* affected by DisableUserTOSSetting key */ + opts.Flags = (ip->ip_off & IP_DF) != 0 ? IP_FLAG_DF : 0; + opts.OptionsSize = 0; + opts.OptionsData = 0; + + + status = IcmpSendEcho2(pData->icmp_socket.sh, NULL, + g_pfnIcmpCallback, pong, + dst, reqdata, (WORD)reqsize, &opts, + pong->buf, (DWORD)pong->bufsize, + 5 * 1000 /* ms */); + + if (RT_UNLIKELY(status != 0)) + { + Log2(("NAT: IcmpSendEcho2: unexpected status %d\n", status)); + } + else if ((status = GetLastError()) != ERROR_IO_PENDING) + { + int code; + + Log2(("NAT: IcmpSendEcho2: error %d\n", status)); + switch (status) { + case ERROR_NETWORK_UNREACHABLE: + code = ICMP_UNREACH_NET; + break; + case ERROR_HOST_UNREACHABLE: + code = ICMP_UNREACH_HOST; + break; + default: + code = -1; + break; + } + + if (code != -1) /* send icmp error */ + { + struct mbuf *em = icmpwin_get_error(pong, ICMP_UNREACH, code); + if (em != NULL) + { + struct ip *eip = mtod(em, struct ip *); + eip->ip_src = alias_addr; + ip_output(pData, NULL, em); + } + } + } + else /* success */ + { + Log2(("NAT: pong %p for ping %RTnaipv4 id 0x%04x seq %d len %zu (%zu)\n", + pong, dst, + RT_N2H_U16(pong->reqicmph.icmp_echo_id), + RT_N2H_U16(pong->reqicmph.icmp_echo_seq), + pongsize, reqsize)); + + pData->cbIcmpPending += pongsize; + TAILQ_INSERT_TAIL(&pData->pongs_expected, pong, queue_entry); + pong = NULL; /* callback owns it now */ + } + + if (pong != NULL) + RTMemFree(pong); +} + + +static VOID WINAPI +icmpwin_callback_apc(void *ctx, PIO_STATUS_BLOCK iob, ULONG reserved) +{ + struct pong *pong = (struct pong *)ctx; + if (pong != NULL) + icmpwin_callback(pong); + RT_NOREF2(iob, reserved); +} + + +static VOID WINAPI +icmpwin_callback_old(void *ctx) +{ + struct pong *pong = (struct pong *)ctx; + if (pong != NULL) + icmpwin_callback(pong); +} + + +/* + * Actual callback code for IcmpSendEcho2(). OS version specific + * trampoline will free "pong" argument for us. + * + * Since async callback can be called anytime the thread is alertable, + * it's not safe to do any processing here. Instead queue it and + * notify the main loop. + */ +static void +icmpwin_callback(struct pong *pong) +{ + PNATState pData = pong->pData; + + if (pData == NULL) + { + RTMemFree(pong); + return; + } + +#ifdef DEBUG + { + struct pong *expected, *already; + + TAILQ_FOREACH(expected, &pData->pongs_expected, queue_entry) + { + if (expected == pong) + break; + } + Assert(expected); + + TAILQ_FOREACH(already, &pData->pongs_received, queue_entry) + { + if (already == pong) + break; + } + Assert(!already); + } +#endif + + TAILQ_REMOVE(&pData->pongs_expected, pong, queue_entry); + TAILQ_INSERT_TAIL(&pData->pongs_received, pong, queue_entry); + + WSASetEvent(pData->phEvents[VBOX_ICMP_EVENT_INDEX]); +} + + +void +icmpwin_process(PNATState pData) +{ + struct pong_tailq pongs; + + if (TAILQ_EMPTY(&pData->pongs_received)) + return; + + TAILQ_INIT(&pongs); + TAILQ_CONCAT(&pongs, &pData->pongs_received, queue_entry); + + while (!TAILQ_EMPTY(&pongs)) { + struct pong *pong = TAILQ_FIRST(&pongs); + size_t sz; + + sz = RT_UOFFSETOF(struct pong, buf) + pong->bufsize; + Assert(pData->cbIcmpPending >= sz); + pData->cbIcmpPending -= sz; + + icmpwin_pong(pong); + + TAILQ_REMOVE(&pongs, pong, queue_entry); + RTMemFree(pong); + } +} + + +void +icmpwin_pong(struct pong *pong) +{ + PNATState pData; + DWORD nreplies; + ICMP_ECHO_REPLY *reply; + struct mbuf *m; + struct ip *ip; + struct icmp_echo *icmp; + size_t reqsize; + + pData = pong->pData; /* to make slirp_state.h macro hackery work */ + + nreplies = IcmpParseReplies(pong->buf, (DWORD)pong->bufsize); + if (nreplies == 0) + { + DWORD error = GetLastError(); + if (error == IP_REQ_TIMED_OUT) + Log2(("NAT: ping %p timed out\n", (void *)pong)); + else + Log2(("NAT: ping %p: IcmpParseReplies: error %d\n", + (void *)pong, error)); + return; + } + + reply = (ICMP_ECHO_REPLY *)pong->buf; + + if (reply->Status == IP_SUCCESS) + { + if (reply->Options.OptionsSize != 0) /* don't do options */ + return; + + /* need to remap &reply->Address ? */ + if (/* not a mapped loopback */ 1) + { + if (reply->Options.Ttl <= 1) + return; + --reply->Options.Ttl; + } + + reqsize = reply->DataSize; + if ( (reply->Options.Flags & IP_FLAG_DF) != 0 + && sizeof(struct ip) + sizeof(struct icmp_echo) + reqsize > (size_t)if_mtu) + return; + + m = icmpwin_get_mbuf(pData, reqsize); + if (m == NULL) + return; + + ip = mtod(m, struct ip *); + icmp = (struct icmp_echo *)(mtod(m, char *) + sizeof(*ip)); + + /* fill in ip (ip_output0() does the boilerplate for us) */ + ip->ip_tos = reply->Options.Tos; + ip->ip_len = sizeof(*ip) + sizeof(*icmp) + (int)reqsize; + ip->ip_off = 0; + ip->ip_ttl = reply->Options.Ttl; + ip->ip_p = IPPROTO_ICMP; + ip->ip_src.s_addr = reply->Address; + ip->ip_dst = pong->reqiph.ip.ip_src; + + icmp->icmp_type = ICMP_ECHOREPLY; + icmp->icmp_code = 0; + icmp->icmp_cksum = 0; + icmp->icmp_echo_id = pong->reqicmph.icmp_echo_id; + icmp->icmp_echo_seq = pong->reqicmph.icmp_echo_seq; + + m_append(pData, m, (int)reqsize, reply->Data); + + icmp->icmp_cksum = in_cksum_skip(m, ip->ip_len, sizeof(*ip)); + } + else { + uint8_t type, code; + + switch (reply->Status) { + case IP_DEST_NET_UNREACHABLE: + type = ICMP_UNREACH; code = ICMP_UNREACH_NET; + break; + case IP_DEST_HOST_UNREACHABLE: + type = ICMP_UNREACH; code = ICMP_UNREACH_HOST; + break; + case IP_DEST_PROT_UNREACHABLE: + type = ICMP_UNREACH; code = ICMP_UNREACH_PROTOCOL; + break; + case IP_PACKET_TOO_BIG: + type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; + break; + case IP_SOURCE_QUENCH: + type = ICMP_SOURCEQUENCH; code = 0; + break; + case IP_TTL_EXPIRED_TRANSIT: + type = ICMP_TIMXCEED; code = ICMP_TIMXCEED_INTRANS; + break; + case IP_TTL_EXPIRED_REASSEM: + type = ICMP_TIMXCEED; code = ICMP_TIMXCEED_REASS; + break; + default: + Log2(("NAT: ping reply status %d, dropped\n", reply->Status)); + return; + } + + Log2(("NAT: ping status %d -> type %d/code %d\n", + reply->Status, type, code)); + + /* + * XXX: we don't know the TTL of the request at the time this + * ICMP error was generated (we can guess it was 1 for ttl + * exceeded, but don't bother faking it). + */ + m = icmpwin_get_error(pong, type, code); + if (m == NULL) + return; + + ip = mtod(m, struct ip *); + + ip->ip_tos = reply->Options.Tos; + ip->ip_ttl = reply->Options.Ttl; /* XXX: decrement */ + ip->ip_src.s_addr = reply->Address; + } + + Assert(ip->ip_len == m_length(m, NULL)); + ip_output(pData, NULL, m); +} + + +/* + * Prepare mbuf with ICMP error type/code. + * IP source must be filled by the caller. + */ +static struct mbuf * +icmpwin_get_error(struct pong *pong, int type, int code) +{ + PNATState pData = pong->pData; + struct mbuf *m; + struct ip *ip; + struct icmp_echo *icmp; + size_t reqsize; + + Log2(("NAT: ping error type %d/code %d\n", type, code)); + + size_t reqhlen = pong->reqiph.ip.ip_hl << 2; + reqsize = reqhlen + sizeof(pong->reqicmph); + + m = icmpwin_get_mbuf(pData, reqsize); + if (m == NULL) + return NULL; + + ip = mtod(m, struct ip *); + icmp = (struct icmp_echo *)(mtod(m, char *) + sizeof(*ip)); + + ip->ip_tos = 0; + ip->ip_len = sizeof(*ip) + sizeof(*icmp) + (int)reqsize; + ip->ip_off = 0; + ip->ip_ttl = IPDEFTTL; + ip->ip_p = IPPROTO_ICMP; + ip->ip_src.s_addr = 0; /* NB */ + ip->ip_dst = pong->reqiph.ip.ip_src; + + icmp->icmp_type = type; + icmp->icmp_code = code; + icmp->icmp_cksum = 0; + icmp->icmp_echo_id = 0; + icmp->icmp_echo_seq = 0; + + /* payload: the IP and ICMP headers of the original request */ + m_append(pData, m, (int)reqhlen, (caddr_t)&pong->reqiph); + m_append(pData, m, sizeof(pong->reqicmph), (caddr_t)&pong->reqicmph); + + icmp->icmp_cksum = in_cksum_skip(m, ip->ip_len, sizeof(*ip)); + + return m; +} + + +/* + * Replacing original simple slirp mbufs with real mbufs from freebsd + * was a bit messy since assumption are different. This leads to + * rather ugly code at times. Hide the gore here. + */ +static struct mbuf * +icmpwin_get_mbuf(PNATState pData, size_t reqsize) +{ + struct mbuf *m; + + reqsize += if_maxlinkhdr; + reqsize += sizeof(struct ip) + sizeof(struct icmp_echo); + + if (reqsize <= MHLEN) + /* good pings come in small packets */ + m = m_gethdr(pData, M_NOWAIT, MT_HEADER); + else + m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, (int)slirp_size(pData)); + + if (m == NULL) + return NULL; + + m->m_flags |= M_SKIP_FIREWALL; + m->m_data += if_maxlinkhdr; /* reserve leading space for ethernet header */ + + m->m_pkthdr.header = mtod(m, void *); + m->m_len = sizeof(struct ip) + sizeof(struct icmp_echo); + + return m; +} + diff --git a/src/VBox/Devices/Network/slirp/ip_input.c b/src/VBox/Devices/Network/slirp/ip_input.c new file mode 100644 index 00000000..b13f0cb0 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/ip_input.c @@ -0,0 +1,693 @@ +/* $Id: ip_input.c $ */ +/** @file + * NAT - IP input. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 + * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp + */ + +/* + * Changes and additions relating to SLiRP are + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> +#include "ip_icmp.h" +#include "alias.h" + + +/* + * IP initialization: fill in IP protocol switch table. + * All protocols not implemented in kernel go to raw IP protocol handler. + */ +void +ip_init(PNATState pData) +{ + int i = 0; + for (i = 0; i < IPREASS_NHASH; ++i) + TAILQ_INIT(&ipq[i]); + maxnipq = 100; /* ??? */ + maxfragsperpacket = 16; + nipq = 0; + ip_currid = tt.tv_sec & 0xffff; + udp_init(pData); + tcp_init(pData); +} + +/* + * Ip input routine. Checksum and byte swap header. If fragmented + * try to reassemble. Process options. Pass to next level. + */ +void +ip_input(PNATState pData, struct mbuf *m) +{ + register struct ip *ip; + int hlen = 0; + int mlen = 0; + int iplen = 0; + + STAM_PROFILE_START(&pData->StatIP_input, a); + + LogFlowFunc(("ENTER: m = %p\n", m)); + ip = mtod(m, struct ip *); + Log2(("ip_dst=%RTnaipv4(len:%d) m_len = %d\n", ip->ip_dst, RT_N2H_U16(ip->ip_len), m->m_len)); + + ipstat.ips_total++; + + mlen = m->m_len; + + if (mlen < sizeof(struct ip)) + { + ipstat.ips_toosmall++; + goto bad_free_m; + } + + ip = mtod(m, struct ip *); + if (ip->ip_v != IPVERSION) + { + ipstat.ips_badvers++; + goto bad_free_m; + } + + hlen = ip->ip_hl << 2; + if ( hlen < sizeof(struct ip) + || hlen > mlen) + { + /* min header length */ + ipstat.ips_badhlen++; /* or packet too short */ + goto bad_free_m; + } + + /* keep ip header intact for ICMP reply + * ip->ip_sum = cksum(m, hlen); + * if (ip->ip_sum) { + */ + if (cksum(m, hlen)) + { + ipstat.ips_badsum++; + goto bad_free_m; + } + + iplen = RT_N2H_U16(ip->ip_len); + if (iplen < hlen) + { + ipstat.ips_badlen++; + goto bad_free_m; + } + + /* + * Check that the amount of data in the buffers + * is as at least much as the IP header would have us expect. + * Trim mbufs if longer than we expect. + * Drop packet if shorter than we expect. + */ + if (mlen < iplen) + { + ipstat.ips_tooshort++; + goto bad_free_m; + } + + /* Should drop packet if mbuf too long? hmmm... */ + if (mlen > iplen) + { + m_adj(m, iplen - mlen); + mlen = m->m_len; + } + + /* source must be unicast */ + if ((ip->ip_src.s_addr & RT_N2H_U32_C(0xe0000000)) == RT_N2H_U32_C(0xe0000000)) + goto free_m; + + /* + * Drop multicast (class d) and reserved (class e) here. The rest + * of the code is not yet prepared to deal with it. IGMP is not + * implemented either. + */ + if ( (ip->ip_dst.s_addr & RT_N2H_U32_C(0xe0000000)) == RT_N2H_U32_C(0xe0000000) + && ip->ip_dst.s_addr != 0xffffffff) + { + goto free_m; + } + + + /* do we need to "forward" this packet? */ + if (!CTL_CHECK_MINE(ip->ip_dst.s_addr)) + { + if (ip->ip_ttl <= 1) + { + /* icmp_error expects these in host order */ + NTOHS(ip->ip_len); + NTOHS(ip->ip_id); + NTOHS(ip->ip_off); + + icmp_error(pData, m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, "ttl"); + goto no_free_m; + } + + /* ignore packets to other nodes from our private network */ + if ( CTL_CHECK_NETWORK(ip->ip_dst.s_addr) + && !CTL_CHECK_BROADCAST(ip->ip_dst.s_addr)) + { + /* XXX: send ICMP_REDIRECT_HOST to be pedantic? */ + goto free_m; + } + + ip->ip_ttl--; + if (ip->ip_sum > RT_H2N_U16_C(0xffffU - (1 << 8))) + ip->ip_sum += RT_H2N_U16_C(1 << 8) + 1; + else + ip->ip_sum += RT_H2N_U16_C(1 << 8); + } + + /* run it through libalias */ + { + int rc; + if (!(m->m_flags & M_SKIP_FIREWALL)) + { + STAM_PROFILE_START(&pData->StatALIAS_input, b); + rc = LibAliasIn(pData->proxy_alias, mtod(m, char *), mlen); + STAM_PROFILE_STOP(&pData->StatALIAS_input, b); + Log2(("NAT: LibAlias return %d\n", rc)); + } + else + m->m_flags &= ~M_SKIP_FIREWALL; + +#if 0 /* disabled: no module we use does it in this direction */ + /* + * XXX: spooky action at a distance - libalias may modify the + * packet and will update ip_len to reflect the new length. + */ + if (iplen != RT_N2H_U16(ip->ip_len)) + { + iplen = RT_N2H_U16(ip->ip_len); + m->m_len = iplen; + mlen = m->m_len; + } +#endif + } + + /* + * Convert fields to host representation. + */ + NTOHS(ip->ip_len); + NTOHS(ip->ip_id); + NTOHS(ip->ip_off); + + /* + * If offset or IP_MF are set, must reassemble. + * Otherwise, nothing need be done. + * (We could look in the reassembly queue to see + * if the packet was previously fragmented, + * but it's not worth the time; just let them time out.) + * + */ + if (ip->ip_off & (IP_MF | IP_OFFMASK)) + { + m = ip_reass(pData, m); + if (m == NULL) + goto no_free_m; + ip = mtod(m, struct ip *); + hlen = ip->ip_hl << 2; + } + else + ip->ip_len -= hlen; + + /* + * Switch out to protocol's input routine. + */ + ipstat.ips_delivered++; + switch (ip->ip_p) + { + case IPPROTO_TCP: + tcp_input(pData, m, hlen, (struct socket *)NULL); + break; + case IPPROTO_UDP: + udp_input(pData, m, hlen); + break; + case IPPROTO_ICMP: + icmp_input(pData, m, hlen); + break; + default: + ipstat.ips_noproto++; + m_freem(pData, m); + } + goto no_free_m; + +bad_free_m: + Log2(("NAT: IP datagram to %RTnaipv4 with size(%d) claimed as bad\n", + ip->ip_dst, ip->ip_len)); +free_m: + m_freem(pData, m); +no_free_m: + STAM_PROFILE_STOP(&pData->StatIP_input, a); + LogFlowFuncLeave(); + return; +} + +struct mbuf * +ip_reass(PNATState pData, struct mbuf* m) +{ + struct ip *ip; + struct mbuf *p, *q, *nq; + struct ipq_t *fp = NULL; + struct ipqhead *head; + int i, hlen, next; + u_short hash; + + /* If maxnipq or maxfragsperpacket are 0, never accept fragments. */ + LogFlowFunc(("ENTER: m:%p\n", m)); + if ( maxnipq == 0 + || maxfragsperpacket == 0) + { + ipstat.ips_fragments++; + ipstat.ips_fragdropped++; + m_freem(pData, m); + LogFlowFunc(("LEAVE: NULL\n")); + return (NULL); + } + + ip = mtod(m, struct ip *); + hlen = ip->ip_hl << 2; + + hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); + head = &ipq[hash]; + + /* + * Look for queue of fragments + * of this datagram. + */ + TAILQ_FOREACH(fp, head, ipq_list) + if (ip->ip_id == fp->ipq_id && + ip->ip_src.s_addr == fp->ipq_src.s_addr && + ip->ip_dst.s_addr == fp->ipq_dst.s_addr && + ip->ip_p == fp->ipq_p) + goto found; + + fp = NULL; + + /* + * Attempt to trim the number of allocated fragment queues if it + * exceeds the administrative limit. + */ + if ((nipq > maxnipq) && (maxnipq > 0)) + { + /* + * drop something from the tail of the current queue + * before proceeding further + */ + struct ipq_t *pHead = TAILQ_LAST(head, ipqhead); + if (pHead == NULL) + { + /* gak */ + for (i = 0; i < IPREASS_NHASH; i++) + { + struct ipq_t *pTail = TAILQ_LAST(&ipq[i], ipqhead); + if (pTail) + { + ipstat.ips_fragtimeout += pTail->ipq_nfrags; + ip_freef(pData, &ipq[i], pTail); + break; + } + } + } + else + { + ipstat.ips_fragtimeout += pHead->ipq_nfrags; + ip_freef(pData, head, pHead); + } + } + +found: + /* + * Adjust ip_len to not reflect header, + * convert offset of this to bytes. + */ + ip->ip_len -= hlen; + if (ip->ip_off & IP_MF) + { + /* + * Make sure that fragments have a data length + * that's a non-zero multiple of 8 bytes. + */ + if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) + { + ipstat.ips_toosmall++; /* XXX */ + goto dropfrag; + } + m->m_flags |= M_FRAG; + } + else + m->m_flags &= ~M_FRAG; + ip->ip_off <<= 3; + + + /* + * Attempt reassembly; if it succeeds, proceed. + * ip_reass() will return a different mbuf. + */ + ipstat.ips_fragments++; + + /* Previous ip_reass() started here. */ + /* + * Presence of header sizes in mbufs + * would confuse code below. + */ + m->m_data += hlen; + m->m_len -= hlen; + + /* + * If first fragment to arrive, create a reassembly queue. + */ + if (fp == NULL) + { + fp = RTMemAlloc(sizeof(struct ipq_t)); + if (fp == NULL) + goto dropfrag; + TAILQ_INSERT_HEAD(head, fp, ipq_list); + nipq++; + fp->ipq_nfrags = 1; + fp->ipq_ttl = IPFRAGTTL; + fp->ipq_p = ip->ip_p; + fp->ipq_id = ip->ip_id; + fp->ipq_src = ip->ip_src; + fp->ipq_dst = ip->ip_dst; + fp->ipq_frags = m; + m->m_nextpkt = NULL; + goto done; + } + else + { + fp->ipq_nfrags++; + } + +#define GETIP(m) ((struct ip*)((m)->m_pkthdr.header)) + + /* + * Find a segment which begins after this one does. + */ + for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) + if (GETIP(q)->ip_off > ip->ip_off) + break; + + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us, otherwise + * stick new segment in the proper place. + * + * If some of the data is dropped from the preceding + * segment, then it's checksum is invalidated. + */ + if (p) + { + i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off; + if (i > 0) + { + if (i >= ip->ip_len) + goto dropfrag; + m_adj(m, i); + ip->ip_off += i; + ip->ip_len -= i; + } + m->m_nextpkt = p->m_nextpkt; + p->m_nextpkt = m; + } + else + { + m->m_nextpkt = fp->ipq_frags; + fp->ipq_frags = m; + } + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off; + q = nq) + { + i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off; + if (i < GETIP(q)->ip_len) + { + GETIP(q)->ip_len -= i; + GETIP(q)->ip_off += i; + m_adj(q, i); + break; + } + nq = q->m_nextpkt; + m->m_nextpkt = nq; + ipstat.ips_fragdropped++; + fp->ipq_nfrags--; + m_freem(pData, q); + } + + /* + * Check for complete reassembly and perform frag per packet + * limiting. + * + * Frag limiting is performed here so that the nth frag has + * a chance to complete the packet before we drop the packet. + * As a result, n+1 frags are actually allowed per packet, but + * only n will ever be stored. (n = maxfragsperpacket.) + * + */ + next = 0; + for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) + { + if (GETIP(q)->ip_off != next) + { + if (fp->ipq_nfrags > maxfragsperpacket) + { + ipstat.ips_fragdropped += fp->ipq_nfrags; + ip_freef(pData, head, fp); + } + goto done; + } + next += GETIP(q)->ip_len; + } + /* Make sure the last packet didn't have the IP_MF flag */ + if (p->m_flags & M_FRAG) + { + if (fp->ipq_nfrags > maxfragsperpacket) + { + ipstat.ips_fragdropped += fp->ipq_nfrags; + ip_freef(pData, head, fp); + } + goto done; + } + + /* + * Reassembly is complete. Make sure the packet is a sane size. + */ + q = fp->ipq_frags; + ip = GETIP(q); + hlen = ip->ip_hl << 2; + if (next + hlen > IP_MAXPACKET) + { + ipstat.ips_fragdropped += fp->ipq_nfrags; + ip_freef(pData, head, fp); + goto done; + } + + /* + * Concatenate fragments. + */ + m = q; + nq = q->m_nextpkt; + q->m_nextpkt = NULL; + for (q = nq; q != NULL; q = nq) + { + nq = q->m_nextpkt; + q->m_nextpkt = NULL; + m_cat(pData, m, q); + + m->m_len += hlen; + m->m_data -= hlen; + ip = mtod(m, struct ip *); /*update ip pointer */ + hlen = ip->ip_hl << 2; + m->m_len -= hlen; + m->m_data += hlen; + } + m->m_len += hlen; + m->m_data -= hlen; + + /* + * Create header for new ip packet by modifying header of first + * packet; dequeue and discard fragment reassembly header. + * Make header visible. + */ + + ip->ip_len = next; + ip->ip_src = fp->ipq_src; + ip->ip_dst = fp->ipq_dst; + TAILQ_REMOVE(head, fp, ipq_list); + nipq--; + RTMemFree(fp); + + Assert((ip->ip_len == next)); + /* some debugging cruft by sklower, below, will go away soon */ +#if 0 + if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */ + m_fixhdr(m); +#endif + ipstat.ips_reassembled++; + LogFlowFunc(("LEAVE: %p\n", m)); + return (m); + +dropfrag: + ipstat.ips_fragdropped++; + if (fp != NULL) + fp->ipq_nfrags--; + m_freem(pData, m); + +done: + LogFlowFunc(("LEAVE: NULL\n")); + return NULL; + +#undef GETIP +} + +void +ip_freef(PNATState pData, struct ipqhead *fhp, struct ipq_t *fp) +{ + struct mbuf *q; + + while (fp->ipq_frags) + { + q = fp->ipq_frags; + fp->ipq_frags = q->m_nextpkt; + m_freem(pData, q); + } + TAILQ_REMOVE(fhp, fp, ipq_list); + RTMemFree(fp); + nipq--; +} + +/* + * IP timer processing; + * if a timer expires on a reassembly + * queue, discard it. + */ +void +ip_slowtimo(PNATState pData) +{ + register struct ipq_t *fp; + + /* XXX: the fragment expiration is the same but requier + * additional loop see (see ip_input.c in FreeBSD tree) + */ + int i; + LogFlow(("ip_slowtimo:\n")); + for (i = 0; i < IPREASS_NHASH; i++) + { + for(fp = TAILQ_FIRST(&ipq[i]); fp;) + { + struct ipq_t *fpp; + + fpp = fp; + fp = TAILQ_NEXT(fp, ipq_list); + if(--fpp->ipq_ttl == 0) + { + ipstat.ips_fragtimeout += fpp->ipq_nfrags; + ip_freef(pData, &ipq[i], fpp); + } + } + } + /* + * If we are over the maximum number of fragments + * (due to the limit being lowered), drain off + * enough to get down to the new limit. + */ + if (maxnipq >= 0 && nipq > maxnipq) + { + for (i = 0; i < IPREASS_NHASH; i++) + { + while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i])) + { + ipstat.ips_fragdropped += TAILQ_FIRST(&ipq[i])->ipq_nfrags; + ip_freef(pData, &ipq[i], TAILQ_FIRST(&ipq[i])); + } + } + } +} + + +/* + * Strip out IP options, at higher + * level protocol in the kernel. + * Second argument is buffer to which options + * will be moved, and return value is their length. + * (XXX) should be deleted; last arg currently ignored. + */ +void +ip_stripoptions(struct mbuf *m, struct mbuf *mopt) +{ + register int i; + struct ip *ip = mtod(m, struct ip *); + register caddr_t opts; + int olen; + NOREF(mopt); /** @todo do we really will need this options buffer? */ + + olen = (ip->ip_hl<<2) - sizeof(struct ip); + opts = (caddr_t)(ip + 1); + i = m->m_len - (sizeof(struct ip) + olen); + memcpy(opts, opts + olen, (unsigned)i); + m->m_len -= olen; + + ip->ip_hl = sizeof(struct ip) >> 2; +} diff --git a/src/VBox/Devices/Network/slirp/ip_output.c b/src/VBox/Devices/Network/slirp/ip_output.c new file mode 100644 index 00000000..3b37a47b --- /dev/null +++ b/src/VBox/Devices/Network/slirp/ip_output.c @@ -0,0 +1,354 @@ +/* $Id: ip_output.c $ */ +/** @file + * NAT - IP output. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 + * ip_output.c,v 1.9 1994/11/16 10:17:10 jkh Exp + */ + +/* + * Changes and additions relating to SLiRP are + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> +#include <iprt/errcore.h> +#include "alias.h" + +static const uint8_t broadcast_ethaddr[6] = +{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + +static int rt_lookup_in_cache(PNATState pData, uint32_t dst, uint8_t *ether) +{ + int rc; + LogFlowFunc(("ENTER: dst:%RTnaipv4, ether:%RTmac\n", dst, ether)); + if (dst == INADDR_BROADCAST) + { + memcpy(ether, broadcast_ethaddr, ETH_ALEN); + LogFlowFunc(("LEAVE: VINF_SUCCESS\n")); + return VINF_SUCCESS; + } + + rc = slirp_arp_lookup_ether_by_ip(pData, dst, ether); + if (RT_SUCCESS(rc)) + { + LogFlowFunc(("LEAVE: %Rrc\n", rc)); + return rc; + } + + rc = bootp_cache_lookup_ether_by_ip(pData, dst, ether); + if (RT_SUCCESS(rc)) + { + LogFlowFunc(("LEAVE: %Rrc\n", rc)); + return rc; + } + /* + * no chance to send this packet, sorry, we will request ether address via ARP + */ + slirp_arp_who_has(pData, dst); + LogFlowFunc(("LEAVE: VERR_NOT_FOUND\n")); + return VERR_NOT_FOUND; +} + +/* + * IP output. The packet in mbuf chain m contains a skeletal IP + * header (with len, off, ttl, proto, tos, src, dst). + * The mbuf chain containing the packet will be freed. + * The mbuf opt, if present, will not be freed. + */ +int +ip_output(PNATState pData, struct socket *so, struct mbuf *m0) +{ + return ip_output0(pData, so, m0, 0); +} + +/* This function will free m0! */ +int +ip_output0(PNATState pData, struct socket *so, struct mbuf *m0, int urg) +{ + register struct ip *ip; + register struct mbuf *m = m0; + register int hlen = sizeof(struct ip); + int len, off, error = 0; + struct ethhdr *eh = NULL; + uint8_t eth_dst[ETH_ALEN]; + int rc = 1; + + STAM_PROFILE_START(&pData->StatIP_output, a); + +#ifdef LOG_ENABLED + LogFlowFunc(("ip_output: so = %R[natsock], m0 = %p\n", so, m0)); +#else + NOREF(so); +#endif + + M_ASSERTPKTHDR(m); + Assert(m->m_pkthdr.header); + +#if 0 /* We do no options */ + if (opt) + { + m = ip_insertoptions(m, opt, &len); + hlen = len; + } +#endif + ip = mtod(m, struct ip *); + LogFunc(("ip(src:%RTnaipv4, dst:%RTnaipv4)\n", ip->ip_src, ip->ip_dst)); + /* + * Fill in IP header. + */ + ip->ip_v = IPVERSION; + ip->ip_off &= IP_DF; + ip->ip_id = RT_H2N_U16(ip_currid); + ip->ip_hl = hlen >> 2; + ip_currid++; + ipstat.ips_localout++; + + /* Current TCP/IP stack hasn't routing information at + * all so we need to calculate destination ethernet address + */ + rc = rt_lookup_in_cache(pData, ip->ip_dst.s_addr, eth_dst); + if (RT_FAILURE(rc)) + goto exit_drop_package; + + eh = (struct ethhdr *)(m->m_data - ETH_HLEN); + /* + * If small enough for interface, can just send directly. + */ + if ((u_int16_t)ip->ip_len <= if_mtu) + { + ip->ip_len = RT_H2N_U16((u_int16_t)ip->ip_len); + ip->ip_off = RT_H2N_U16((u_int16_t)ip->ip_off); + ip->ip_sum = 0; + ip->ip_sum = cksum(m, hlen); + + if (!(m->m_flags & M_SKIP_FIREWALL)){ + STAM_PROFILE_START(&pData->StatALIAS_output, b); + rc = LibAliasOut(pData->proxy_alias, mtod(m, char *), m_length(m, NULL)); + if (rc == PKT_ALIAS_IGNORED) + { + Log(("NAT: packet was droppped\n")); + goto exit_drop_package; + } + STAM_PROFILE_STOP(&pData->StatALIAS_output, b); + } + else + m->m_flags &= ~M_SKIP_FIREWALL; + + memcpy(eh->h_source, eth_dst, ETH_ALEN); + + LogFlowFunc(("ip(ip_src:%RTnaipv4, ip_dst:%RTnaipv4)\n", + ip->ip_src, ip->ip_dst)); + if_encap(pData, ETH_P_IP, m, urg? ETH_ENCAP_URG : 0); + goto done; + } + + /* + * Too large for interface; fragment if possible. + * Must be able to put at least 8 bytes per fragment. + */ + if (ip->ip_off & IP_DF) + { + error = -1; + ipstat.ips_cantfrag++; + goto exit_drop_package; + } + + len = (if_mtu - hlen) &~ 7; /* ip databytes per packet */ + if (len < 8) + { + error = -1; + goto exit_drop_package; + } + + { + int mhlen, firstlen = len; + struct mbuf **mnext = &m->m_nextpkt; + char *buf; /* intermediate buffer we'll use for a copy of the original packet */ + /* + * Loop through length of segment after first fragment, + * make new header and copy data of each part and link onto chain. + */ + m0 = m; + mhlen = ip->ip_hl << 2; + Log(("NAT:ip:frag: mhlen = %d\n", mhlen)); + for (off = hlen + len; off < (u_int16_t)ip->ip_len; off += len) + { + register struct ip *mhip; + m = m_getjcl(pData, M_NOWAIT, MT_HEADER , M_PKTHDR, slirp_size(pData)); + if (m == 0) + { + error = -1; + ipstat.ips_odropped++; + goto exit_drop_package; + } + m->m_data += if_maxlinkhdr; + mhip = mtod(m, struct ip *); + *mhip = *ip; + m->m_pkthdr.header = mtod(m, void *); + /* we've calculated eth_dst for first packet */ +#if 0 /* No options */ + if (hlen > sizeof (struct ip)) + { + mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); + mhip->ip_hl = mhlen >> 2; + } +#endif + m->m_len = mhlen; + mhip->ip_off = ((off - mhlen) >> 3) + (ip->ip_off & ~IP_MF); + if (ip->ip_off & IP_MF) + mhip->ip_off |= IP_MF; + if (off + len >= (u_int16_t)ip->ip_len) + len = (u_int16_t)ip->ip_len - off; + else + mhip->ip_off |= IP_MF; + mhip->ip_len = RT_H2N_U16((u_int16_t)(len + mhlen)); + + buf = RTMemAlloc(len); + Log(("NAT:ip:frag: alloc = %d\n", len)); + m_copydata(m0, off, len, buf); /* copy to buffer */ + Log(("NAT:ip:frag: m_copydata(m0 = %p,off = %d, len = %d,)\n", m0, off, len)); + + m->m_data += mhlen; + m->m_len -= mhlen; + m_copyback(pData, m, 0, len, buf); /* copy from buffer */ + Log(("NAT:ip:frag: m_copyback(m = %p,, len = %d,)\n", m, len)); + m->m_data -= mhlen; + m->m_len += mhlen; + RTMemFree(buf); + Assert((m->m_len == (mhlen + len))); + + mhip->ip_off = RT_H2N_U16((u_int16_t)(mhip->ip_off)); + mhip->ip_sum = 0; + mhip->ip_sum = cksum(m, mhlen); + *mnext = m; + mnext = &m->m_nextpkt; + ipstat.ips_ofragments++; + } + /* + * Update first fragment by trimming what's been copied out + * and updating header, then send each fragment (in order). + * + * note: m_adj do all required releases for chained mbufs. + */ + m = m0; + m_adj(m, mhlen + firstlen - (u_int16_t)ip->ip_len); + Log(("NAT:ip:frag: m_adj(m(m_len:%d) = %p, len = %d)\n", m->m_len, m, mhlen + firstlen - (u_int16_t)ip->ip_len)); + ip->ip_len = RT_H2N_U16((u_int16_t)mhlen + firstlen); + ip->ip_off = RT_H2N_U16((u_int16_t)(ip->ip_off | IP_MF)); + ip->ip_sum = 0; + ip->ip_sum = cksum(m, mhlen); + + if (!(m->m_flags & M_SKIP_FIREWALL)){ + /** @todo We can't alias all fragments because the way libalias processing + * the fragments brake the sequence. libalias put alias_address to the source + * address of IP header of fragment, while IP header of the first packet is + * is unmodified. That confuses guest's TCP/IP stack and guest drop the sequence. + * Here we're letting libalias to process the first packet and send the rest as is, + * it's exactly the way in of packet are processing in proxyonly way. + * Here we need investigate what should be done to avoid such behavior and find right + * solution. + */ + int rcLa; + + rcLa = LibAliasOut(pData->proxy_alias, mtod(m, char *), m->m_len); + if (rcLa == PKT_ALIAS_IGNORED) + { + Log(("NAT: packet was droppped\n")); + goto exit_drop_package; + } + Log2(("NAT: LibAlias return %d\n", rcLa)); + } + else + m->m_flags &= ~M_SKIP_FIREWALL; + for (m = m0; m; m = m0) + { + m0 = m->m_nextpkt; + m->m_nextpkt = 0; + if (error == 0) + { + m->m_data -= ETH_HLEN; + eh = mtod(m, struct ethhdr *); + m->m_data += ETH_HLEN; + memcpy(eh->h_source, eth_dst, ETH_ALEN); + + Log(("NAT:ip:frag: if_encap(,,m(m_len = %d) = %p,0)\n", m->m_len, m)); + if_encap(pData, ETH_P_IP, m, 0); + } + else + m_freem(pData, m); + } + + if (error == 0) + ipstat.ips_fragmented++; + } + +done: + STAM_PROFILE_STOP(&pData->StatIP_output, a); + LogFlowFunc(("LEAVE: %d\n", error)); + return error; + +exit_drop_package: + m_freem(pData, m0); + STAM_PROFILE_STOP(&pData->StatIP_output, a); + LogFlowFunc(("LEAVE: %d\n", error)); + return error; +} diff --git a/src/VBox/Devices/Network/slirp/libalias/HISTORY b/src/VBox/Devices/Network/slirp/libalias/HISTORY new file mode 100644 index 00000000..01744351 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/HISTORY @@ -0,0 +1,145 @@ +$FreeBSD: src/sys/netinet/libalias/HISTORY,v 1.9.32.1 2009/04/15 03:14:26 kensmith Exp $ + +Version 1.0: August 11, 1996 (cjm) + +Version 1.1: August 20, 1996 (cjm) + - Host accepts incoming connections for ports 0 to 1023. + +Version 1.2: September 7, 1996 (cjm) + - Fragment handling error in alias_db.c corrected. + +Version 1.3: September 15, 1996 (cjm) + - Generalized mechanism for handling incoming + connections (no more 0 to 1023 restriction). + + - Increased ICMP support (will handle traceroute now). + + - Improved TCP close connection logic. + +Version 1.4: September 16, 1996 (cjm) + +Version 1.5: September 17, 1996 (cjm) + - Corrected error in handling incoming UDP packets + with zero checksum. + +Version 1.6: September 18, 1996 + - Simplified ICMP data storage. Will now handle + tracert from Win95 and NT as well as FreeBSD + traceroute, which uses UDP packets to non-existent + ports. + +Version 1.7: January 9, 1997 (cjm) + - Reduced malloc() activity for ICMP echo and + timestamp requests. + + - Added handling for out-of-order IP fragments. + + - Switched to differential checksum computation + for IP headers (TCP, UDP and ICMP checksums + were already differential). + + - Accepts FTP data connections from other than + port 20. This allows one ftp connections + from two hosts which are both running packet + aliasing. + + - Checksum error on FTP transfers. Problem + in code located by Martin Renters and + Brian Somers. + +Version 1.8: January 14, 1997 (cjm) + - Fixed data type error in function StartPoint() + in alias_db.c (this bug did not exist before v1.7) + Problem in code located by Ari Suutari. + +Version 1.9: February 1, 1997 (Eivind Eklund <perhaps@yes.no>) + - Added support for IRC DCC (ee) + + - Changed the aliasing routines to use ANSI style + throughout (ee) + + - Minor API changes for integration with other + programs than PPP (ee) + + - Fixed minor security hole in alias_ftp.c for + other applications of the aliasing software. + Hole could _not_ manifest in ppp+pktAlias, but + could potentially manifest in other applications + of the aliasing. (ee) + + - Connections initiated from packet aliasing + host machine will not have their port number + aliased unless it conflicts with an aliasing + port already being used. (There is an option + to disable this for debugging) (cjm) + + - Sockets will be allocated in cases where + there might be port interference with the + host machine. This can be disabled in cases + where the ppp host will be acting purely as a + masquerading router and not generate any + traffic of its own. + (cjm) + +Version 2.0: March, 1997 (cjm) + - Aliasing links are cleared only when a host interface address + changes. + + - PacketAliasPermanentLink() API added. + + - Option for only aliasing private, unregistered + IP addresses added. + + - Substantial rework to the aliasing lookup engine. + +Version 2.1: May, 1997 (cjm) + - Continuing rework to the aliasing lookup engine + to support multiple incoming addresses and static + NAT. PacketAliasRedirectPort() and + PacketAliasRedirectAddr() added to API. + + - Now supports outgoing as well as incoming ICMP + error messages. + +Version 2.2: July, 1997 (cjm) + - Rationalized API function names to all begin with + "PacketAlias..." Old function names are retained + for backwards compatibility. + + - Packet aliasing engine will now free memory of + fragments which are never resolved after a timeout + period. Once a fragment is resolved, it becomes + the users responsibility to free the memory. + +Version 2.3: August 11, 1997 (cjm) + - Problem associated with socket file descriptor + accumulation in alias_db.c corrected. The sockets + had to be closed when a binding failed. Problem + in code located by Gordon Burditt. + +Version 2.4: September 1, 1997 (cjm) + - PKT_ALIAS_UNREGISTERED_ONLY option repaired. + This part of the code was incorrectly re-implemented + in version 2.1. + +Version 2.5: December, 1997 (ee) + - Added PKT_ALIAS_PUNCH_FW mode for firewall + bypass of FTP/IRC DCC data connections. Also added + improved TCP connection monitoring. + +Version 2.6: May, 1998 (amurai) + - Added supporting routine for NetBios over TCP/IP. + +Version 3.0: January 1, 1999 + - Transparent proxying support added. + - PPTP redirecting support added based on patches + contributed by Dru Nelson <dnelson@redwoodsoft.com>. + +Version 3.1: May, 2000 (Erik Salander, erik@whistle.com) + - Added support to alias 227 replies, allows aliasing for + FTP servers in passive mode. + - Added support for PPTP aliasing. + +Version 3.2: July, 2000 (Erik Salander, erik@whistle.com and + Junichi Satoh, junichi@junichi.org) + - Added support for streaming media (RTSP and PNA) aliasing. diff --git a/src/VBox/Devices/Network/slirp/libalias/Makefile.kup b/src/VBox/Devices/Network/slirp/libalias/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/Makefile.kup diff --git a/src/VBox/Devices/Network/slirp/libalias/alias.c b/src/VBox/Devices/Network/slirp/libalias/alias.c new file mode 100644 index 00000000..4694503f --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias.c @@ -0,0 +1,1758 @@ +/*- + * Copyright (c) 2001 Charles Mott <cm@linktel.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef VBOX +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias.c,v 1.58.2.1.4.1 2009/04/15 03:14:26 kensmith Exp $"); +#endif +/* + Alias.c provides supervisory control for the functions of the + packet aliasing software. It consists of routines to monitor + TCP connection state, protocol-specific aliasing routines, + fragment handling and the following outside world functional + interfaces: SaveFragmentPtr, GetFragmentPtr, FragmentAliasIn, + PacketAliasIn and PacketAliasOut. + + The other C program files are briefly described. The data + structure framework which holds information needed to translate + packets is encapsulated in alias_db.c. Data is accessed by + function calls, so other segments of the program need not know + about the underlying data structures. Alias_ftp.c contains + special code for modifying the ftp PORT command used to establish + data connections, while alias_irc.c does the same for IRC + DCC. Alias_util.c contains a few utility routines. + + Version 1.0 August, 1996 (cjm) + + Version 1.1 August 20, 1996 (cjm) + PPP host accepts incoming connections for ports 0 to 1023. + (Gary Roberts pointed out the need to handle incoming + connections.) + + Version 1.2 September 7, 1996 (cjm) + Fragment handling error in alias_db.c corrected. + (Tom Torrance helped fix this problem.) + + Version 1.4 September 16, 1996 (cjm) + - A more generalized method for handling incoming + connections, without the 0-1023 restriction, is + implemented in alias_db.c + - Improved ICMP support in alias.c. Traceroute + packet streams can now be correctly aliased. + - TCP connection closing logic simplified in + alias.c and now allows for additional 1 minute + "grace period" after FIN or RST is observed. + + Version 1.5 September 17, 1996 (cjm) + Corrected error in handling incoming UDP packets with 0 checksum. + (Tom Torrance helped fix this problem.) + + Version 1.6 September 18, 1996 (cjm) + Simplified ICMP aliasing scheme. Should now support + traceroute from Win95 as well as FreeBSD. + + Version 1.7 January 9, 1997 (cjm) + - Out-of-order fragment handling. + - IP checksum error fixed for ftp transfers + from aliasing host. + - Integer return codes added to all + aliasing/de-aliasing functions. + - Some obsolete comments cleaned up. + - Differential checksum computations for + IP header (TCP, UDP and ICMP were already + differential). + + Version 2.1 May 1997 (cjm) + - Added support for outgoing ICMP error + messages. + - Added two functions PacketAliasIn2() + and PacketAliasOut2() for dynamic address + control (e.g. round-robin allocation of + incoming packets). + + Version 2.2 July 1997 (cjm) + - Rationalized API function names to begin + with "PacketAlias..." + - Eliminated PacketAliasIn2() and + PacketAliasOut2() as poorly conceived. + + Version 2.3 Dec 1998 (dillon) + - Major bounds checking additions, see FreeBSD/CVS + + Version 3.1 May, 2000 (salander) + - Added hooks to handle PPTP. + + Version 3.2 July, 2000 (salander and satoh) + - Added PacketUnaliasOut routine. + - Added hooks to handle RTSP/RTP. + + See HISTORY file for additional revisions. +*/ + +#ifndef VBOX +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/mbuf.h> +#else +#include <sys/types.h> +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> +#include <dlfcn.h> +#include <errno.h> +#include <string.h> +#endif + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip_icmp.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> + +#ifdef _KERNEL +#include <netinet/libalias/alias.h> +#include <netinet/libalias/alias_local.h> +#include <netinet/libalias/alias_mod.h> +#else +#include <err.h> +#include "alias.h" +#include "alias_local.h" +#include "alias_mod.h" +#endif +#else /* VBOX */ +# include <slirp.h> +# include "alias.h" +# include "alias_local.h" +# include "alias_mod.h" + +# if 0 /* Clang 11 does not approve of this */ +# define return(x) \ +do { \ + Log2(("NAT:ALIAS: %s:%d return(%s:%d)\n", \ + RT_GCC_EXTENSION __FUNCTION__, __LINE__, #x,(x))); \ + return x; \ +} while(0) +# endif +#endif /* VBOX */ +static __inline int +twowords(void *p) +{ + uint8_t *c = p; + +#ifdef RT_LITTLE_ENDIAN /*BYTE_ORDER == LITTLE_ENDIAN*/ + uint16_t s1 = ((uint16_t)c[1] << 8) + (uint16_t)c[0]; + uint16_t s2 = ((uint16_t)c[3] << 8) + (uint16_t)c[2]; +#else + uint16_t s1 = ((uint16_t)c[0] << 8) + (uint16_t)c[1]; + uint16_t s2 = ((uint16_t)c[2] << 8) + (uint16_t)c[3]; +#endif + return (s1 + s2); +} + +/* TCP Handling Routines + + TcpMonitorIn() -- These routines monitor TCP connections, and + TcpMonitorOut() delete a link when a connection is closed. + +These routines look for SYN, FIN and RST flags to determine when TCP +connections open and close. When a TCP connection closes, the data +structure containing packet aliasing information is deleted after +a timeout period. +*/ + +/* Local prototypes */ +static void TcpMonitorIn(struct ip *, struct alias_link *); + +static void TcpMonitorOut(struct ip *, struct alias_link *); + + +static void +TcpMonitorIn(struct ip *pip, struct alias_link *lnk) +{ + struct tcphdr *tc; + + tc = (struct tcphdr *)ip_next(pip); + + switch (GetStateIn(lnk)) { + case ALIAS_TCP_STATE_NOT_CONNECTED: + if (tc->th_flags & TH_RST) + SetStateIn(lnk, ALIAS_TCP_STATE_DISCONNECTED); + else if (tc->th_flags & TH_SYN) + SetStateIn(lnk, ALIAS_TCP_STATE_CONNECTED); + break; + case ALIAS_TCP_STATE_CONNECTED: + if (tc->th_flags & (TH_FIN | TH_RST)) + SetStateIn(lnk, ALIAS_TCP_STATE_DISCONNECTED); + break; + } +} + +static void +TcpMonitorOut(struct ip *pip, struct alias_link *lnk) +{ + struct tcphdr *tc; + + tc = (struct tcphdr *)ip_next(pip); + + switch (GetStateOut(lnk)) { + case ALIAS_TCP_STATE_NOT_CONNECTED: + if (tc->th_flags & TH_RST) + SetStateOut(lnk, ALIAS_TCP_STATE_DISCONNECTED); + else if (tc->th_flags & TH_SYN) + SetStateOut(lnk, ALIAS_TCP_STATE_CONNECTED); + break; + case ALIAS_TCP_STATE_CONNECTED: + if (tc->th_flags & (TH_FIN | TH_RST)) + SetStateOut(lnk, ALIAS_TCP_STATE_DISCONNECTED); + break; + } +} + + + + + +/* Protocol Specific Packet Aliasing Routines + + IcmpAliasIn(), IcmpAliasIn1(), IcmpAliasIn2() + IcmpAliasOut(), IcmpAliasOut1(), IcmpAliasOut2() + ProtoAliasIn(), ProtoAliasOut() + UdpAliasIn(), UdpAliasOut() + TcpAliasIn(), TcpAliasOut() + +These routines handle protocol specific details of packet aliasing. +One may observe a certain amount of repetitive arithmetic in these +functions, the purpose of which is to compute a revised checksum +without actually summing over the entire data packet, which could be +unnecessarily time consuming. + +The purpose of the packet aliasing routines is to replace the source +address of the outgoing packet and then correctly put it back for +any incoming packets. For TCP and UDP, ports are also re-mapped. + +For ICMP echo/timestamp requests and replies, the following scheme +is used: the ID number is replaced by an alias for the outgoing +packet. + +ICMP error messages are handled by looking at the IP fragment +in the data section of the message. + +For TCP and UDP protocols, a port number is chosen for an outgoing +packet, and then incoming packets are identified by IP address and +port numbers. For TCP packets, there is additional logic in the event +that sequence and ACK numbers have been altered (as in the case for +FTP data port commands). + +The port numbers used by the packet aliasing module are not true +ports in the Unix sense. No sockets are actually bound to ports. +They are more correctly thought of as placeholders. + +All packets go through the aliasing mechanism, whether they come from +the gateway machine or other machines on a local area network. +*/ + + +/* Local prototypes */ +static int IcmpAliasIn1(struct libalias *, struct ip *); +static int IcmpAliasIn2(struct libalias *, struct ip *); +static int IcmpAliasIn(struct libalias *, struct ip *); + +static int IcmpAliasOut1(struct libalias *, struct ip *, int create); +static int IcmpAliasOut2(struct libalias *, struct ip *); +static int IcmpAliasOut(struct libalias *, struct ip *, int create); + +static int ProtoAliasIn(struct libalias *, struct ip *); +static int ProtoAliasOut(struct libalias *, struct ip *, int create); + +static int UdpAliasIn(struct libalias *, struct ip *); +static int UdpAliasOut(struct libalias *, struct ip *, int create); + +static int TcpAliasIn(struct libalias *, struct ip *); +static int TcpAliasOut(struct libalias *, struct ip *, int, int create); + + +static int +IcmpAliasIn1(struct libalias *la, struct ip *pip) +{ + +/* + De-alias incoming echo and timestamp replies. + Alias incoming echo and timestamp requests. +*/ + struct alias_link *lnk; + struct icmp *ic; + + LIBALIAS_LOCK_ASSERT(la); + + ic = (struct icmp *)ip_next(pip); + +/* Get source address from ICMP data field and restore original data */ + lnk = FindIcmpIn(la, pip->ip_src, pip->ip_dst, ic->icmp_id, 1); + if (lnk != NULL) { + u_short original_id; + int accumulate; + + original_id = GetOriginalPort(lnk); + +/* Adjust ICMP checksum */ + accumulate = ic->icmp_id; + accumulate -= original_id; + ADJUST_CHECKSUM(accumulate, ic->icmp_cksum); + +/* Put original sequence number back in */ + ic->icmp_id = original_id; + +/* Put original address back into IP header */ + { + struct in_addr original_address; + + original_address = GetOriginalAddress(lnk); + DifferentialChecksum(&pip->ip_sum, + &original_address, &pip->ip_dst, 2); + pip->ip_dst = original_address; + } + + return (PKT_ALIAS_OK); + } + return (PKT_ALIAS_IGNORED); +} + +static int +IcmpAliasIn2(struct libalias *la, struct ip *pip) +{ + +/* + Alias incoming ICMP error messages containing + IP header and first 64 bits of datagram. +*/ + struct ip *ip; + struct icmp *ic, *ic2; + struct udphdr *ud; + struct tcphdr *tc; + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + + ic = (struct icmp *)ip_next(pip); + ip = &ic->icmp_ip; + + ud = (struct udphdr *)ip_next(ip); + tc = (struct tcphdr *)ip_next(ip); + ic2 = (struct icmp *)ip_next(ip); + + if (ip->ip_p == IPPROTO_UDP) + lnk = FindUdpTcpIn(la, ip->ip_dst, ip->ip_src, + ud->uh_dport, ud->uh_sport, + IPPROTO_UDP, 0); + else if (ip->ip_p == IPPROTO_TCP) + lnk = FindUdpTcpIn(la, ip->ip_dst, ip->ip_src, + tc->th_dport, tc->th_sport, + IPPROTO_TCP, 0); + else if (ip->ip_p == IPPROTO_ICMP) { + if (ic2->icmp_type == ICMP_ECHO || ic2->icmp_type == ICMP_TSTAMP) + lnk = FindIcmpIn(la, ip->ip_dst, ip->ip_src, ic2->icmp_id, 0); + else + lnk = NULL; + } else + lnk = NULL; + + if (lnk != NULL) { + if (ip->ip_p == IPPROTO_UDP || ip->ip_p == IPPROTO_TCP) { + int accumulate, accumulate2; + struct in_addr original_address; + u_short original_port; + + original_address = GetOriginalAddress(lnk); + original_port = GetOriginalPort(lnk); + +/* Adjust ICMP checksum */ + accumulate = twowords(&ip->ip_src); + accumulate -= twowords(&original_address); + accumulate += ud->uh_sport; + accumulate -= original_port; + accumulate2 = accumulate; + accumulate2 += ip->ip_sum; + ADJUST_CHECKSUM(accumulate, ip->ip_sum); + accumulate2 -= ip->ip_sum; + ADJUST_CHECKSUM(accumulate2, ic->icmp_cksum); + +/* Un-alias address in IP header */ + DifferentialChecksum(&pip->ip_sum, + &original_address, &pip->ip_dst, 2); + pip->ip_dst = original_address; + +/* Un-alias address and port number of original IP packet +fragment contained in ICMP data section */ + ip->ip_src = original_address; + ud->uh_sport = original_port; + } else if (ip->ip_p == IPPROTO_ICMP) { + int accumulate, accumulate2; + struct in_addr original_address; + u_short original_id; + + original_address = GetOriginalAddress(lnk); + original_id = GetOriginalPort(lnk); + +/* Adjust ICMP checksum */ + accumulate = twowords(&ip->ip_src); + accumulate -= twowords(&original_address); + accumulate += ic2->icmp_id; + accumulate -= original_id; + accumulate2 = accumulate; + accumulate2 += ip->ip_sum; + ADJUST_CHECKSUM(accumulate, ip->ip_sum); + accumulate2 -= ip->ip_sum; + ADJUST_CHECKSUM(accumulate2, ic->icmp_cksum); + +/* Un-alias address in IP header */ + DifferentialChecksum(&pip->ip_sum, + &original_address, &pip->ip_dst, 2); + pip->ip_dst = original_address; + +/* Un-alias address of original IP packet and sequence number of + embedded ICMP datagram */ + ip->ip_src = original_address; + ic2->icmp_id = original_id; + } + return (PKT_ALIAS_OK); + } + return (PKT_ALIAS_IGNORED); +} + + +static int +IcmpAliasIn(struct libalias *la, struct ip *pip) +{ + int iresult; + struct icmp *ic; + + LIBALIAS_LOCK_ASSERT(la); +/* Return if proxy-only mode is enabled */ + if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY) + return (PKT_ALIAS_OK); + + ic = (struct icmp *)ip_next(pip); + + iresult = PKT_ALIAS_IGNORED; + switch (ic->icmp_type) { + case ICMP_ECHOREPLY: + case ICMP_TSTAMPREPLY: + if (ic->icmp_code == 0) { + iresult = IcmpAliasIn1(la, pip); + } + break; + case ICMP_UNREACH: + case ICMP_SOURCEQUENCH: + case ICMP_TIMXCEED: + case ICMP_PARAMPROB: + iresult = IcmpAliasIn2(la, pip); + break; + case ICMP_ECHO: + case ICMP_TSTAMP: + iresult = IcmpAliasIn1(la, pip); + break; + } + return (iresult); +} + + +static int +IcmpAliasOut1(struct libalias *la, struct ip *pip, int create) +{ +/* + Alias outgoing echo and timestamp requests. + De-alias outgoing echo and timestamp replies. +*/ + struct alias_link *lnk; + struct icmp *ic; + + LIBALIAS_LOCK_ASSERT(la); + ic = (struct icmp *)ip_next(pip); + +/* Save overwritten data for when echo packet returns */ + lnk = FindIcmpOut(la, pip->ip_src, pip->ip_dst, ic->icmp_id, create); + if (lnk != NULL) { + u_short alias_id; + int accumulate; + + alias_id = GetAliasPort(lnk); + +/* Since data field is being modified, adjust ICMP checksum */ + accumulate = ic->icmp_id; + accumulate -= alias_id; + ADJUST_CHECKSUM(accumulate, ic->icmp_cksum); + +/* Alias sequence number */ + ic->icmp_id = alias_id; + +/* Change source address */ + { + struct in_addr alias_address; + + alias_address = GetAliasAddress(lnk); + DifferentialChecksum(&pip->ip_sum, + &alias_address, &pip->ip_src, 2); + pip->ip_src = alias_address; + } + + return (PKT_ALIAS_OK); + } + return (PKT_ALIAS_IGNORED); +} + + +static int +IcmpAliasOut2(struct libalias *la, struct ip *pip) +{ +/* + Alias outgoing ICMP error messages containing + IP header and first 64 bits of datagram. +*/ + struct ip *ip; + struct icmp *ic, *ic2; + struct udphdr *ud; + struct tcphdr *tc; + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + ic = (struct icmp *)ip_next(pip); + ip = &ic->icmp_ip; + + ud = (struct udphdr *)ip_next(ip); + tc = (struct tcphdr *)ip_next(ip); + ic2 = (struct icmp *)ip_next(ip); + + if (ip->ip_p == IPPROTO_UDP) + lnk = FindUdpTcpOut(la, ip->ip_dst, ip->ip_src, + ud->uh_dport, ud->uh_sport, + IPPROTO_UDP, 0); + else if (ip->ip_p == IPPROTO_TCP) + lnk = FindUdpTcpOut(la, ip->ip_dst, ip->ip_src, + tc->th_dport, tc->th_sport, + IPPROTO_TCP, 0); + else if (ip->ip_p == IPPROTO_ICMP) { + if (ic2->icmp_type == ICMP_ECHO || ic2->icmp_type == ICMP_TSTAMP) + lnk = FindIcmpOut(la, ip->ip_dst, ip->ip_src, ic2->icmp_id, 0); + else + lnk = NULL; + } else + lnk = NULL; + + if (lnk != NULL) { + if (ip->ip_p == IPPROTO_UDP || ip->ip_p == IPPROTO_TCP) { + int accumulate; + struct in_addr alias_address; + u_short alias_port; + + alias_address = GetAliasAddress(lnk); + alias_port = GetAliasPort(lnk); + +/* Adjust ICMP checksum */ + accumulate = twowords(&ip->ip_dst); + accumulate -= twowords(&alias_address); + accumulate += ud->uh_dport; + accumulate -= alias_port; + ADJUST_CHECKSUM(accumulate, ic->icmp_cksum); + +/* + * Alias address in IP header if it comes from the host + * the original TCP/UDP packet was destined for. + */ + if (pip->ip_src.s_addr == ip->ip_dst.s_addr) { + DifferentialChecksum(&pip->ip_sum, + &alias_address, &pip->ip_src, 2); + pip->ip_src = alias_address; + } +/* Alias address and port number of original IP packet +fragment contained in ICMP data section */ + ip->ip_dst = alias_address; + ud->uh_dport = alias_port; + } else if (ip->ip_p == IPPROTO_ICMP) { + int accumulate; + struct in_addr alias_address; + u_short alias_id; + + alias_address = GetAliasAddress(lnk); + alias_id = GetAliasPort(lnk); + +/* Adjust ICMP checksum */ + accumulate = twowords(&ip->ip_dst); + accumulate -= twowords(&alias_address); + accumulate += ic2->icmp_id; + accumulate -= alias_id; + ADJUST_CHECKSUM(accumulate, ic->icmp_cksum); + +/* + * Alias address in IP header if it comes from the host + * the original ICMP message was destined for. + */ + if (pip->ip_src.s_addr == ip->ip_dst.s_addr) { + DifferentialChecksum(&pip->ip_sum, + &alias_address, &pip->ip_src, 2); + pip->ip_src = alias_address; + } +/* Alias address of original IP packet and sequence number of + embedded ICMP datagram */ + ip->ip_dst = alias_address; + ic2->icmp_id = alias_id; + } + return (PKT_ALIAS_OK); + } + return (PKT_ALIAS_IGNORED); +} + + +static int +IcmpAliasOut(struct libalias *la, struct ip *pip, int create) +{ + int iresult; + struct icmp *ic; + + LIBALIAS_LOCK_ASSERT(la); + (void)create; + +/* Return if proxy-only mode is enabled */ + if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY) + return (PKT_ALIAS_OK); + + ic = (struct icmp *)ip_next(pip); + + iresult = PKT_ALIAS_IGNORED; + switch (ic->icmp_type) { + case ICMP_ECHO: + case ICMP_TSTAMP: + if (ic->icmp_code == 0) { + iresult = IcmpAliasOut1(la, pip, create); + } + break; + case ICMP_UNREACH: + case ICMP_SOURCEQUENCH: + case ICMP_TIMXCEED: + case ICMP_PARAMPROB: + iresult = IcmpAliasOut2(la, pip); + break; + case ICMP_ECHOREPLY: + case ICMP_TSTAMPREPLY: + iresult = IcmpAliasOut1(la, pip, create); + } + return (iresult); +} + + + +static int +ProtoAliasIn(struct libalias *la, struct ip *pip) +{ +/* + Handle incoming IP packets. The + only thing which is done in this case is to alias + the dest IP address of the packet to our inside + machine. +*/ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); +/* Return if proxy-only mode is enabled */ + if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY) + return (PKT_ALIAS_OK); + + lnk = FindProtoIn(la, pip->ip_src, pip->ip_dst, pip->ip_p); + if (lnk != NULL) { + struct in_addr original_address; + + original_address = GetOriginalAddress(lnk); + +/* Restore original IP address */ + DifferentialChecksum(&pip->ip_sum, + &original_address, &pip->ip_dst, 2); + pip->ip_dst = original_address; + + return (PKT_ALIAS_OK); + } + return (PKT_ALIAS_IGNORED); +} + + +static int +ProtoAliasOut(struct libalias *la, struct ip *pip, int create) +{ +/* + Handle outgoing IP packets. The + only thing which is done in this case is to alias + the source IP address of the packet. +*/ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + (void)create; + +/* Return if proxy-only mode is enabled */ + if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY) + return (PKT_ALIAS_OK); + + lnk = FindProtoOut(la, pip->ip_src, pip->ip_dst, pip->ip_p); + if (lnk != NULL) { + struct in_addr alias_address; + + alias_address = GetAliasAddress(lnk); + +/* Change source address */ + DifferentialChecksum(&pip->ip_sum, + &alias_address, &pip->ip_src, 2); + pip->ip_src = alias_address; + + return (PKT_ALIAS_OK); + } + return (PKT_ALIAS_IGNORED); +} + + +static int +UdpAliasIn(struct libalias *la, struct ip *pip) +{ + struct udphdr *ud; + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); +/* Return if proxy-only mode is enabled */ + if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY) + return (PKT_ALIAS_OK); + + ud = (struct udphdr *)ip_next(pip); + + lnk = FindUdpTcpIn(la, pip->ip_src, pip->ip_dst, + ud->uh_sport, ud->uh_dport, + IPPROTO_UDP, 1); + if (lnk != NULL) { + struct in_addr alias_address; + struct in_addr original_address; + u_short alias_port; + int accumulate; + int error; + struct alias_data ad; + ad.lnk = lnk; + ad.oaddr = &original_address; + ad.aaddr = &alias_address; + ad.aport = &alias_port; + ad.sport = &ud->uh_sport; + ad.dport = &ud->uh_dport; + ad.maxpktsize = 0; + + + alias_address = GetAliasAddress(lnk); + original_address = GetOriginalAddress(lnk); + alias_port = ud->uh_dport; + ud->uh_dport = GetOriginalPort(lnk); + + /* Walk out chain. */ + error = find_handler(IN, UDP, la, pip, &ad); + /* If we cannot figure out the packet, ignore it. */ + if (error < 0) + return (PKT_ALIAS_IGNORED); + +/* If UDP checksum is not zero, then adjust since destination port */ +/* is being unaliased and destination address is being altered. */ + if (ud->uh_sum != 0) { + accumulate = alias_port; + accumulate -= ud->uh_dport; + accumulate += twowords(&alias_address); + accumulate -= twowords(&original_address); + ADJUST_CHECKSUM(accumulate, ud->uh_sum); + } +/* Restore original IP address */ + DifferentialChecksum(&pip->ip_sum, + &original_address, &pip->ip_dst, 2); + pip->ip_dst = original_address; + + return (PKT_ALIAS_OK); + } + return (PKT_ALIAS_IGNORED); +} + +static int +UdpAliasOut(struct libalias *la, struct ip *pip, int create) +{ + struct udphdr *ud; + struct alias_link *lnk; + int error; + + LIBALIAS_LOCK_ASSERT(la); +/* Return if proxy-only mode is enabled */ + if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY) + return (PKT_ALIAS_OK); + + ud = (struct udphdr *)ip_next(pip); + + lnk = FindUdpTcpOut(la, pip->ip_src, pip->ip_dst, + ud->uh_sport, ud->uh_dport, + IPPROTO_UDP, create); + if (lnk != NULL) { + u_short alias_port; + struct in_addr alias_address; + struct alias_data ad; + ad.lnk = lnk; + ad.oaddr = NULL; + ad.aaddr = &alias_address; + ad.aport = &alias_port; + ad.sport = &ud->uh_sport; + ad.dport = &ud->uh_dport; + ad.maxpktsize = 0; + + alias_address = GetAliasAddress(lnk); + alias_port = GetAliasPort(lnk); + + /* Walk out chain. */ + error = find_handler(OUT, UDP, la, pip, &ad); + +/* If UDP checksum is not zero, adjust since source port is */ +/* being aliased and source address is being altered */ + if (ud->uh_sum != 0) { + int accumulate; + + accumulate = ud->uh_sport; + accumulate -= alias_port; + accumulate += twowords(&pip->ip_src); + accumulate -= twowords(&alias_address); + ADJUST_CHECKSUM(accumulate, ud->uh_sum); + } +/* Put alias port in UDP header */ + ud->uh_sport = alias_port; + +/* Change source address */ + DifferentialChecksum(&pip->ip_sum, + &alias_address, &pip->ip_src, 2); + pip->ip_src = alias_address; + + return (PKT_ALIAS_OK); + } + return (PKT_ALIAS_IGNORED); +} + + + +static int +TcpAliasIn(struct libalias *la, struct ip *pip) +{ + struct tcphdr *tc; + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + tc = (struct tcphdr *)ip_next(pip); + + lnk = FindUdpTcpIn(la, pip->ip_src, pip->ip_dst, + tc->th_sport, tc->th_dport, + IPPROTO_TCP, + !(la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)); + if (lnk != NULL) { + struct in_addr alias_address; + struct in_addr original_address; + struct in_addr proxy_address; + u_short alias_port; + u_short proxy_port; + int accumulate, error; + + /* + * The init of MANY vars is a bit below, but aliashandlepptpin + * seems to need the destination port that came within the + * packet and not the original one looks below [*]. + */ + + struct alias_data ad; + ad.lnk = lnk; + ad.oaddr = NULL; + ad.aaddr = NULL; + ad.aport = NULL; + ad.sport = &tc->th_sport; + ad.dport = &tc->th_dport; + ad.maxpktsize = 0; + + /* Walk out chain. */ + error = find_handler(IN, TCP, la, pip, &ad); + + alias_address = GetAliasAddress(lnk); + original_address = GetOriginalAddress(lnk); + proxy_address = GetProxyAddress(lnk); + alias_port = tc->th_dport; + tc->th_dport = GetOriginalPort(lnk); + proxy_port = GetProxyPort(lnk); + + /* + * Look above, if anyone is going to add find_handler AFTER + * this aliashandlepptpin/point, please redo alias_data too. + * Uncommenting the piece here below should be enough. + */ +#if 0 + struct alias_data ad = { + .lnk = lnk, + .oaddr = &original_address, + .aaddr = &alias_address, + .aport = &alias_port, + .sport = &ud->uh_sport, + .dport = &ud->uh_dport, + .maxpktsize = 0 + }; + + /* Walk out chain. */ + error = find_handler(la, pip, &ad); + if (error == EHDNOF) + printf("Protocol handler not found\n"); +#endif + +/* Adjust TCP checksum since destination port is being unaliased */ +/* and destination port is being altered. */ + accumulate = alias_port; + accumulate -= tc->th_dport; + accumulate += twowords(&alias_address); + accumulate -= twowords(&original_address); + +/* If this is a proxy, then modify the TCP source port and + checksum accumulation */ + if (proxy_port != 0) { + accumulate += tc->th_sport; + tc->th_sport = proxy_port; + accumulate -= tc->th_sport; + accumulate += twowords(&pip->ip_src); + accumulate -= twowords(&proxy_address); + } +/* See if ACK number needs to be modified */ + if (GetAckModified(lnk) == 1) { + int delta; + + delta = GetDeltaAckIn(pip, lnk); + if (delta != 0) { + accumulate += twowords(&tc->th_ack); + tc->th_ack = htonl(ntohl(tc->th_ack) - delta); + accumulate -= twowords(&tc->th_ack); + } + } + ADJUST_CHECKSUM(accumulate, tc->th_sum); + +/* Restore original IP address */ + accumulate = twowords(&pip->ip_dst); + pip->ip_dst = original_address; + accumulate -= twowords(&pip->ip_dst); + +/* If this is a transparent proxy packet, then modify the source + address */ + if (proxy_address.s_addr != 0) { + accumulate += twowords(&pip->ip_src); + pip->ip_src = proxy_address; + accumulate -= twowords(&pip->ip_src); + } + ADJUST_CHECKSUM(accumulate, pip->ip_sum); + +/* Monitor TCP connection state */ + TcpMonitorIn(pip, lnk); + + return (PKT_ALIAS_OK); + } + return (PKT_ALIAS_IGNORED); +} + +static int +TcpAliasOut(struct libalias *la, struct ip *pip, int maxpacketsize, int create) +{ + int proxy_type, error; + u_short dest_port; + u_short proxy_server_port = 0; /* Shut up MSC. */ + struct in_addr dest_address; + struct in_addr proxy_server_address; + struct tcphdr *tc; + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + tc = (struct tcphdr *)ip_next(pip); + + if (create) + proxy_type = + ProxyCheck(la, pip, &proxy_server_address, &proxy_server_port); + else + proxy_type = 0; + + if (proxy_type == 0 && (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)) + return (PKT_ALIAS_OK); + +/* If this is a transparent proxy, save original destination, + then alter the destination and adjust checksums */ + dest_port = tc->th_dport; + dest_address = pip->ip_dst; + if (proxy_type != 0) { + int accumulate; + + accumulate = tc->th_dport; + tc->th_dport = proxy_server_port; + accumulate -= tc->th_dport; + accumulate += twowords(&pip->ip_dst); + accumulate -= twowords(&proxy_server_address); + ADJUST_CHECKSUM(accumulate, tc->th_sum); + + accumulate = twowords(&pip->ip_dst); + pip->ip_dst = proxy_server_address; + accumulate -= twowords(&pip->ip_dst); + ADJUST_CHECKSUM(accumulate, pip->ip_sum); + } + lnk = FindUdpTcpOut(la, pip->ip_src, pip->ip_dst, + tc->th_sport, tc->th_dport, + IPPROTO_TCP, create); + if (lnk == NULL) + return (PKT_ALIAS_IGNORED); + if (lnk != NULL) { + u_short alias_port; + struct in_addr alias_address; + int accumulate; + struct alias_data ad; + ad.lnk = lnk; + ad.oaddr = NULL; + ad.aaddr = &alias_address; + ad.aport = &alias_port; + ad.sport = &tc->th_sport; + ad.dport = &tc->th_dport; + ad.maxpktsize = maxpacketsize; + +/* Save original destination address, if this is a proxy packet. + Also modify packet to include destination encoding. This may + change the size of IP header. */ + if (proxy_type != 0) { + SetProxyPort(lnk, dest_port); + SetProxyAddress(lnk, dest_address); + ProxyModify(la, lnk, pip, maxpacketsize, proxy_type); + tc = (struct tcphdr *)ip_next(pip); + } +/* Get alias address and port */ + alias_port = GetAliasPort(lnk); + alias_address = GetAliasAddress(lnk); + +/* Monitor TCP connection state */ + TcpMonitorOut(pip, lnk); + + /* Walk out chain. */ + error = find_handler(OUT, TCP, la, pip, &ad); + +/* Adjust TCP checksum since source port is being aliased */ +/* and source address is being altered */ + accumulate = tc->th_sport; + tc->th_sport = alias_port; + accumulate -= tc->th_sport; + accumulate += twowords(&pip->ip_src); + accumulate -= twowords(&alias_address); + +/* Modify sequence number if necessary */ + if (GetAckModified(lnk) == 1) { + int delta; + + delta = GetDeltaSeqOut(pip, lnk); + if (delta != 0) { + accumulate += twowords(&tc->th_seq); + tc->th_seq = htonl(ntohl(tc->th_seq) + delta); + accumulate -= twowords(&tc->th_seq); + } + } + ADJUST_CHECKSUM(accumulate, tc->th_sum); + +/* Change source address */ + accumulate = twowords(&pip->ip_src); + pip->ip_src = alias_address; + accumulate -= twowords(&pip->ip_src); + ADJUST_CHECKSUM(accumulate, pip->ip_sum); + + return (PKT_ALIAS_OK); + } + return (PKT_ALIAS_IGNORED); +} + + + + +/* Fragment Handling + + FragmentIn() + FragmentOut() + +The packet aliasing module has a limited ability for handling IP +fragments. If the ICMP, TCP or UDP header is in the first fragment +received, then the ID number of the IP packet is saved, and other +fragments are identified according to their ID number and IP address +they were sent from. Pointers to unresolved fragments can also be +saved and recalled when a header fragment is seen. +*/ + +/* Local prototypes */ +static int FragmentIn(struct libalias *, struct ip *); +static int FragmentOut(struct libalias *, struct ip *); + + +static int +FragmentIn(struct libalias *la, struct ip *pip) +{ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + lnk = FindFragmentIn2(la, pip->ip_src, pip->ip_dst, pip->ip_id); + if (lnk != NULL) { + struct in_addr original_address; + + GetFragmentAddr(lnk, &original_address); + DifferentialChecksum(&pip->ip_sum, + &original_address, &pip->ip_dst, 2); + pip->ip_dst = original_address; + + return (PKT_ALIAS_OK); + } + return (PKT_ALIAS_UNRESOLVED_FRAGMENT); +} + + +static int +FragmentOut(struct libalias *la, struct ip *pip) +{ + struct in_addr alias_address; + + LIBALIAS_LOCK_ASSERT(la); + alias_address = FindAliasAddress(la, pip->ip_src); + DifferentialChecksum(&pip->ip_sum, + &alias_address, &pip->ip_src, 2); + pip->ip_src = alias_address; + + return (PKT_ALIAS_OK); +} + + + + + + +/* Outside World Access + + PacketAliasSaveFragment() + PacketAliasGetFragment() + PacketAliasFragmentIn() + PacketAliasIn() + PacketAliasOut() + PacketUnaliasOut() + +(prototypes in alias.h) +*/ + + +int +LibAliasSaveFragment(struct libalias *la, char *ptr) +{ + int iresult; + struct alias_link *lnk; + struct ip *pip; + + LIBALIAS_LOCK(la); + pip = (struct ip *)ptr; + lnk = AddFragmentPtrLink(la, pip->ip_src, pip->ip_id); + iresult = PKT_ALIAS_ERROR; + if (lnk != NULL) { + SetFragmentPtr(lnk, ptr); + iresult = PKT_ALIAS_OK; + } + LIBALIAS_UNLOCK(la); + return (iresult); +} + + +char * +LibAliasGetFragment(struct libalias *la, char *ptr) +{ + struct alias_link *lnk; + char *fptr; + struct ip *pip; + + LIBALIAS_LOCK(la); + pip = (struct ip *)ptr; + lnk = FindFragmentPtr(la, pip->ip_src, pip->ip_id); + if (lnk != NULL) { + GetFragmentPtr(lnk, &fptr); + SetFragmentPtr(lnk, NULL); + SetExpire(lnk, 0); /* Deletes link */ + } else + fptr = NULL; + + LIBALIAS_UNLOCK(la); + return (fptr); +} + + +void +LibAliasFragmentIn(struct libalias *la, char *ptr, /* Points to correctly + * de-aliased header + * fragment */ + char *ptr_fragment /* Points to fragment which must be + * de-aliased */ +) +{ + struct ip *pip; + struct ip *fpip; + + LIBALIAS_LOCK(la); + (void)la; + pip = (struct ip *)ptr; + fpip = (struct ip *)ptr_fragment; + + DifferentialChecksum(&fpip->ip_sum, + &pip->ip_dst, &fpip->ip_dst, 2); + fpip->ip_dst = pip->ip_dst; + LIBALIAS_UNLOCK(la); +} + +/* Local prototypes */ +static int +LibAliasOutLocked(struct libalias *la, char *ptr, + int maxpacketsize, int create); +static int +LibAliasInLocked(struct libalias *la, char *ptr, + int maxpacketsize); + +int +LibAliasIn(struct libalias *la, char *ptr, int maxpacketsize) +{ + int res; + + LIBALIAS_LOCK(la); + res = LibAliasInLocked(la, ptr, maxpacketsize); + LIBALIAS_UNLOCK(la); + return (res); +} + +static int +LibAliasInLocked(struct libalias *la, char *ptr, int maxpacketsize) +{ + struct in_addr alias_addr; + struct ip *pip; +#ifndef VBOX + int iresult; +#else + int iresult = PKT_ALIAS_IGNORED; +#endif + + if (la->packetAliasMode & PKT_ALIAS_REVERSE) { + la->packetAliasMode &= ~PKT_ALIAS_REVERSE; + iresult = LibAliasOutLocked(la, ptr, maxpacketsize, 1); + la->packetAliasMode |= PKT_ALIAS_REVERSE; + goto getout; + } + HouseKeeping(la); + ClearCheckNewLink(la); + pip = (struct ip *)ptr; + alias_addr = pip->ip_dst; + + /* Defense against mangled packets */ + if (ntohs(pip->ip_len) > maxpacketsize + || (pip->ip_hl << 2) > maxpacketsize) { + iresult = PKT_ALIAS_IGNORED; + goto getout; + } + +#ifndef VBOX + iresult = PKT_ALIAS_IGNORED; +#endif + if ((ntohs(pip->ip_off) & IP_OFFMASK) == 0) { + switch (pip->ip_p) { + case IPPROTO_ICMP: + iresult = IcmpAliasIn(la, pip); + break; + case IPPROTO_UDP: + iresult = UdpAliasIn(la, pip); + break; + case IPPROTO_TCP: + iresult = TcpAliasIn(la, pip); + break; +#ifndef VBOX + case IPPROTO_GRE: { + int error; + struct alias_data ad; + ad.lnk = NULL, + ad.oaddr = NULL, + ad.aaddr = NULL, + ad.aport = NULL, + ad.sport = NULL, + ad.dport = NULL, + ad.maxpktsize = 0 + + /* Walk out chain. */ + error = find_handler(IN, IP, la, pip, &ad); + if (error == 0) + iresult = PKT_ALIAS_OK; + else + iresult = ProtoAliasIn(la, pip); + } + break; +#endif + default: + iresult = ProtoAliasIn(la, pip); + break; + } + + if (ntohs(pip->ip_off) & IP_MF) { + struct alias_link *lnk; + + lnk = FindFragmentIn1(la, pip->ip_src, alias_addr, pip->ip_id); + if (lnk != NULL) { + iresult = PKT_ALIAS_FOUND_HEADER_FRAGMENT; + SetFragmentAddr(lnk, pip->ip_dst); + } else { + iresult = PKT_ALIAS_ERROR; + } + } + } else { + iresult = FragmentIn(la, pip); + } + +getout: + return (iresult); +} + + + +/* Unregistered address ranges */ + +/* 10.0.0.0 -> 10.255.255.255 */ +#define UNREG_ADDR_A_LOWER 0x0a000000 +#define UNREG_ADDR_A_UPPER 0x0affffff + +/* 172.16.0.0 -> 172.31.255.255 */ +#define UNREG_ADDR_B_LOWER 0xac100000 +#define UNREG_ADDR_B_UPPER 0xac1fffff + +/* 192.168.0.0 -> 192.168.255.255 */ +#define UNREG_ADDR_C_LOWER 0xc0a80000 +#define UNREG_ADDR_C_UPPER 0xc0a8ffff + +int +LibAliasOut(struct libalias *la, char *ptr, int maxpacketsize) +{ + int res; + + LIBALIAS_LOCK(la); + res = LibAliasOutLocked(la, ptr, maxpacketsize, 1); + LIBALIAS_UNLOCK(la); + return (res); +} + +int +LibAliasOutTry(struct libalias *la, char *ptr, int maxpacketsize, int create) +{ + int res; + + LIBALIAS_LOCK(la); + res = LibAliasOutLocked(la, ptr, maxpacketsize, create); + LIBALIAS_UNLOCK(la); + return (res); +} + +static int +LibAliasOutLocked(struct libalias *la, char *ptr, /* valid IP packet */ + int maxpacketsize, /* How much the packet data may grow (FTP + * and IRC inline changes) */ + int create /* Create new entries ? */ +) +{ +#ifndef VBOX + int iresult; +#else + int iresult = PKT_ALIAS_IGNORED; +#endif + struct in_addr addr_save; + struct ip *pip; + + if (la->packetAliasMode & PKT_ALIAS_REVERSE) { + la->packetAliasMode &= ~PKT_ALIAS_REVERSE; + iresult = LibAliasInLocked(la, ptr, maxpacketsize); + la->packetAliasMode |= PKT_ALIAS_REVERSE; + goto getout; + } + HouseKeeping(la); + ClearCheckNewLink(la); + pip = (struct ip *)ptr; + + /* Defense against mangled packets */ + if (ntohs(pip->ip_len) > maxpacketsize + || (pip->ip_hl << 2) > maxpacketsize) { + iresult = PKT_ALIAS_IGNORED; + goto getout; + } + + addr_save = GetDefaultAliasAddress(la); + if (la->packetAliasMode & PKT_ALIAS_UNREGISTERED_ONLY) { + u_long addr; + int iclass; + + iclass = 0; + addr = ntohl(pip->ip_src.s_addr); + if (addr >= UNREG_ADDR_C_LOWER && addr <= UNREG_ADDR_C_UPPER) + iclass = 3; + else if (addr >= UNREG_ADDR_B_LOWER && addr <= UNREG_ADDR_B_UPPER) + iclass = 2; + else if (addr >= UNREG_ADDR_A_LOWER && addr <= UNREG_ADDR_A_UPPER) + iclass = 1; + + if (iclass == 0) { + SetDefaultAliasAddress(la, pip->ip_src); + } + } else if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY) { + SetDefaultAliasAddress(la, pip->ip_src); + } +#ifndef VBOX + iresult = PKT_ALIAS_IGNORED; +#endif + if ((ntohs(pip->ip_off) & IP_OFFMASK) == 0) { + switch (pip->ip_p) { + case IPPROTO_ICMP: + iresult = IcmpAliasOut(la, pip, create); + break; + case IPPROTO_UDP: + iresult = UdpAliasOut(la, pip, create); + break; + case IPPROTO_TCP: + iresult = TcpAliasOut(la, pip, maxpacketsize, create); + break; +#ifndef VBOX + case IPPROTO_GRE: { + int error; + struct alias_data ad = { + .lnk = NULL, + .oaddr = NULL, + .aaddr = NULL, + .aport = NULL, + .sport = NULL, + .dport = NULL, + .maxpktsize = 0 + }; + /* Walk out chain. */ + error = find_handler(OUT, IP, la, pip, &ad); + if (error == 0) + iresult = PKT_ALIAS_OK; + else + iresult = ProtoAliasOut(la, pip, create); + } + break; +#endif + default: + iresult = ProtoAliasOut(la, pip, create); + break; + } + } else { + iresult = FragmentOut(la, pip); + } + + SetDefaultAliasAddress(la, addr_save); +getout: + return (iresult); +} + +int +LibAliasUnaliasOut(struct libalias *la, char *ptr, /* valid IP packet */ + int maxpacketsize /* for error checking */ +) +{ + struct ip *pip; + struct icmp *ic; + struct udphdr *ud; + struct tcphdr *tc; + struct alias_link *lnk; + int iresult = PKT_ALIAS_IGNORED; + + LIBALIAS_LOCK(la); + pip = (struct ip *)ptr; + + /* Defense against mangled packets */ + if (ntohs(pip->ip_len) > maxpacketsize + || (pip->ip_hl << 2) > maxpacketsize) + goto getout; + + ud = (struct udphdr *)ip_next(pip); + tc = (struct tcphdr *)ip_next(pip); + ic = (struct icmp *)ip_next(pip); + + /* Find a link */ + if (pip->ip_p == IPPROTO_UDP) + lnk = FindUdpTcpIn(la, pip->ip_dst, pip->ip_src, + ud->uh_dport, ud->uh_sport, + IPPROTO_UDP, 0); + else if (pip->ip_p == IPPROTO_TCP) + lnk = FindUdpTcpIn(la, pip->ip_dst, pip->ip_src, + tc->th_dport, tc->th_sport, + IPPROTO_TCP, 0); + else if (pip->ip_p == IPPROTO_ICMP) + lnk = FindIcmpIn(la, pip->ip_dst, pip->ip_src, ic->icmp_id, 0); + else + lnk = NULL; + + /* Change it from an aliased packet to an unaliased packet */ + if (lnk != NULL) { + if (pip->ip_p == IPPROTO_UDP || pip->ip_p == IPPROTO_TCP) { + int accumulate; + struct in_addr original_address; + u_short original_port; + + original_address = GetOriginalAddress(lnk); + original_port = GetOriginalPort(lnk); + + /* Adjust TCP/UDP checksum */ + accumulate = twowords(&pip->ip_src); + accumulate -= twowords(&original_address); + + if (pip->ip_p == IPPROTO_UDP) { + accumulate += ud->uh_sport; + accumulate -= original_port; + ADJUST_CHECKSUM(accumulate, ud->uh_sum); + } else { + accumulate += tc->th_sport; + accumulate -= original_port; + ADJUST_CHECKSUM(accumulate, tc->th_sum); + } + + /* Adjust IP checksum */ + DifferentialChecksum(&pip->ip_sum, + &original_address, &pip->ip_src, 2); + + /* Un-alias source address and port number */ + pip->ip_src = original_address; + if (pip->ip_p == IPPROTO_UDP) + ud->uh_sport = original_port; + else + tc->th_sport = original_port; + + iresult = PKT_ALIAS_OK; + + } else if (pip->ip_p == IPPROTO_ICMP) { + + int accumulate; + struct in_addr original_address; + u_short original_id; + + original_address = GetOriginalAddress(lnk); + original_id = GetOriginalPort(lnk); + + /* Adjust ICMP checksum */ + accumulate = twowords(&pip->ip_src); + accumulate -= twowords(&original_address); + accumulate += ic->icmp_id; + accumulate -= original_id; + ADJUST_CHECKSUM(accumulate, ic->icmp_cksum); + + /* Adjust IP checksum */ + DifferentialChecksum(&pip->ip_sum, + &original_address, &pip->ip_src, 2); + + /* Un-alias source address and port number */ + pip->ip_src = original_address; + ic->icmp_id = original_id; + + iresult = PKT_ALIAS_OK; + } + } +getout: + LIBALIAS_UNLOCK(la); + return (iresult); + +} + +#ifndef _KERNEL + +int +LibAliasRefreshModules(void) +{ + /** @todo (r - vasily) here should be module loading */ +#ifndef VBOX + char buf[256], conf[] = "/etc/libalias.conf"; + FILE *fd; + int i, len; + + fd = fopen(conf, "r"); + if (fd == NULL) + err(1, "fopen(%s)", conf); + + LibAliasUnLoadAllModule(); + + for (;;) { + fgets(buf, 256, fd); + if feof(fd) + break; + len = strlen(buf); + if (len > 1) { + for (i = 0; i < len; i++) + if (!isspace(buf[i])) + break; + if (buf[i] == '#') + continue; + buf[len - 1] = '\0'; + printf("Loading %s\n", buf); + LibAliasLoadModule(buf); + } + } +#endif /* !VBOX */ + return (0); +} + +int +LibAliasLoadModule(char *path) +{ +#ifndef VBOX + struct dll *t; + void *handle; + struct proto_handler *m; + const char *error; + moduledata_t *p; + + handle = dlopen (path, RTLD_LAZY); + if (!handle) { + fprintf(stderr, "%s\n", dlerror()); + return (EINVAL); + } + + p = dlsym(handle, "alias_mod"); + if ((error = dlerror()) != NULL) { + fprintf(stderr, "%s\n", dlerror()); + return (EINVAL); + } + + t = malloc(sizeof(struct dll)); + if (t == NULL) + return (ENOMEM); + strncpy(t->name, p->name, DLL_LEN); + t->handle = handle; + if (attach_dll(t) == EEXIST) { + free(t); + fprintf(stderr, "dll conflict\n"); + return (EEXIST); + } + + m = dlsym(t->handle, "handlers"); + if ((error = dlerror()) != NULL) { + fprintf(stderr, "%s\n", error); + return (EINVAL); + } + + LibAliasAttachHandlers(m); +#else /* VBOX */ + NOREF(path); +#endif /* VBOX */ + return (0); +} + +int +LibAliasUnLoadAllModule(void) +{ +#ifndef VBOX + struct dll *t; + struct proto_handler *p; + + /* Unload all modules then reload everything. */ + while ((p = first_handler()) != NULL) { + detach_handler(p); + } + while ((t = walk_dll_chain()) != NULL) { + dlclose(t->handle); + free(t); + } +#endif /* !VBOX */ + return (1); +} + +#endif + +#if defined(_KERNEL) || defined(VBOX) +/* + * m_megapullup() - this function is a big hack. + * Thankfully, it's only used in ng_nat and ipfw+nat. + * + * It allocates an mbuf with cluster and copies the specified part of the chain + * into cluster, so that it is all contiguous and can be accessed via a plain + * (char *) pointer. This is required, because libalias doesn't know how to + * handle mbuf chains. + * + * On success, m_megapullup returns an mbuf (possibly with cluster) containing + * the input packet, on failure NULL. The input packet is always consumed. + */ +struct mbuf * +#ifndef VBOX +m_megapullup(struct mbuf *m, int len) +#else +m_megapullup(PNATState pData, struct mbuf *m, int len) +#endif +{ + struct mbuf *mcl; + + if (len > m->m_pkthdr.len) + goto bad; + + /* Do not reallocate packet if it is sequentional, + * writable and has some extra space for expansion. + * XXX: Constant 100bytes is completely empirical. */ +#define RESERVE 100 + if (m->m_next == NULL && M_WRITABLE(m) && M_TRAILINGSPACE(m) >= RESERVE) + return (m); + + if (len <= MCLBYTES - RESERVE) { +#ifndef VBOX + mcl = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); +#else + mcl = m_getcl(pData, M_DONTWAIT, MT_DATA, M_PKTHDR); +#endif + } else if (len < MJUM16BYTES) { + int size; + if (len <= MJUMPAGESIZE - RESERVE) { + size = MJUMPAGESIZE; + } else if (len <= MJUM9BYTES - RESERVE) { + size = MJUM9BYTES; + } else { + size = MJUM16BYTES; + }; +#ifndef VBOX + mcl = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, size); +#else + mcl = m_getjcl(pData, M_DONTWAIT, MT_DATA, M_PKTHDR, size); +#endif + } else { + goto bad; + } + if (mcl == NULL) + goto bad; + + m_move_pkthdr(mcl, m); + m_copydata(m, 0, len, mtod(mcl, caddr_t)); + mcl->m_len = mcl->m_pkthdr.len = len; +#ifndef VBOX + m_freem(m); +#else + m_freem(pData, m); +#endif + + return (mcl); +bad: +#ifndef VBOX + m_freem(m); +#else + m_freem(pData, m); +#endif + return (NULL); +} +#endif diff --git a/src/VBox/Devices/Network/slirp/libalias/alias.h b/src/VBox/Devices/Network/slirp/libalias/alias.h new file mode 100644 index 00000000..6ecec9a2 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias.h @@ -0,0 +1,304 @@ +/* lint -save -library Flexelint comment for external headers */ + +/*- + * Copyright (c) 2001 Charles Mott <cm@linktel.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/netinet/libalias/alias.h,v 1.34.8.1 2009/04/15 03:14:26 kensmith Exp $ + */ + +/* + * Alias.h defines the outside world interfaces for the packet aliasing + * software. + * + * This software is placed into the public domain with no restrictions on its + * distribution. + */ + +#ifndef _ALIAS_H_ +#define _ALIAS_H_ + +#ifndef VBOX +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#else +# include <slirp.h> +#endif + +#define LIBALIAS_BUF_SIZE 128 +#ifdef _KERNEL +/* + * The kernel version of libalias does not support these features. + */ +#define NO_FW_PUNCH +#define NO_USE_SOCKETS +#endif + +/* + * The external interface to libalias, the packet aliasing engine. + * + * There are two sets of functions: + * + * PacketAlias*() the old API which doesn't take an instance pointer + * and therefore can only have one packet engine at a time. + * + * LibAlias*() the new API which takes as first argument a pointer to + * the instance of the packet aliasing engine. + * + * The functions otherwise correspond to each other one for one, except + * for the LibAliasUnaliasOut()/PacketUnaliasOut() function which were + * were misnamed in the old API. + */ + +/* + * The instance structure + */ +struct libalias; +#if defined(VBOX) && !defined(VBOX_SLIRP_ALIAS) +/* XXX: used only for browsing */ +struct libalias { + LIST_ENTRY(libalias) instancelist; +}; +#endif + +/* + * An anonymous structure, a pointer to which is returned from + * PacketAliasRedirectAddr(), PacketAliasRedirectPort() or + * PacketAliasRedirectProto(), passed to PacketAliasAddServer(), + * and freed by PacketAliasRedirectDelete(). + */ +struct alias_link; + + +/* OLD API */ + +/* Initialization and control functions. */ +void PacketAliasInit(void); +void PacketAliasSetAddress(struct in_addr _addr); +void PacketAliasSetFWBase(unsigned int _base, unsigned int _num); +void PacketAliasSetSkinnyPort(unsigned int _port); +unsigned int + PacketAliasSetMode(unsigned int _flags, unsigned int _mask); +void PacketAliasUninit(void); + +/* Packet Handling functions. */ +int PacketAliasIn(char *_ptr, int _maxpacketsize); +int PacketAliasOut(char *_ptr, int _maxpacketsize); +int PacketUnaliasOut(char *_ptr, int _maxpacketsize); + +/* Port and address redirection functions. */ + + +int +PacketAliasAddServer(struct alias_link *_lnk, + struct in_addr _addr, unsigned short _port); +struct alias_link * +PacketAliasRedirectAddr(struct in_addr _src_addr, + struct in_addr _alias_addr); +int PacketAliasRedirectDynamic(struct alias_link *_lnk); +void PacketAliasRedirectDelete(struct alias_link *_lnk); +struct alias_link * +PacketAliasRedirectPort(struct in_addr _src_addr, + unsigned short _src_port, struct in_addr _dst_addr, + unsigned short _dst_port, struct in_addr _alias_addr, + unsigned short _alias_port, unsigned char _proto); +struct alias_link * +PacketAliasRedirectProto(struct in_addr _src_addr, + struct in_addr _dst_addr, struct in_addr _alias_addr, + unsigned char _proto); + +/* Fragment Handling functions. */ +void PacketAliasFragmentIn(char *_ptr, char *_ptr_fragment); +char *PacketAliasGetFragment(char *_ptr); +int PacketAliasSaveFragment(char *_ptr); + +/* Miscellaneous functions. */ +int PacketAliasCheckNewLink(void); +unsigned short + PacketAliasInternetChecksum(unsigned short *_ptr, int _nbytes); +void PacketAliasSetTarget(struct in_addr _target_addr); + +/* Transparent proxying routines. */ +int PacketAliasProxyRule(const char *_cmd); + +/* NEW API */ + +/* Initialization and control functions. */ +#ifndef VBOX +struct libalias *LibAliasInit(struct libalias *); +#else +struct libalias *LibAliasInit(PNATState, struct libalias *); +#endif +void LibAliasSetAddress(struct libalias *, struct in_addr _addr); +void LibAliasSetFWBase(struct libalias *, unsigned int _base, unsigned int _num); +void LibAliasSetSkinnyPort(struct libalias *, unsigned int _port); +unsigned int + LibAliasSetMode(struct libalias *, unsigned int _flags, unsigned int _mask); +void LibAliasUninit(struct libalias *); + +/* Packet Handling functions. */ +int LibAliasIn (struct libalias *, char *_ptr, int _maxpacketsize); +int LibAliasOut(struct libalias *, char *_ptr, int _maxpacketsize); +int LibAliasOutTry(struct libalias *, char *_ptr, int _maxpacketsize, int _create); +int LibAliasUnaliasOut(struct libalias *, char *_ptr, int _maxpacketsize); + +/* Port and address redirection functions. */ + +int +LibAliasAddServer(struct libalias *, struct alias_link *_lnk, + struct in_addr _addr, unsigned short _port); +struct alias_link * +LibAliasRedirectAddr(struct libalias *, struct in_addr _src_addr, + struct in_addr _alias_addr); +int LibAliasRedirectDynamic(struct libalias *, struct alias_link *_lnk); +void LibAliasRedirectDelete(struct libalias *, struct alias_link *_lnk); +struct alias_link * +LibAliasRedirectPort(struct libalias *, struct in_addr _src_addr, + unsigned short _src_port, struct in_addr _dst_addr, + unsigned short _dst_port, struct in_addr _alias_addr, + unsigned short _alias_port, unsigned char _proto); +struct alias_link * +LibAliasRedirectProto(struct libalias *, struct in_addr _src_addr, + struct in_addr _dst_addr, struct in_addr _alias_addr, + unsigned char _proto); + +/* Fragment Handling functions. */ +void LibAliasFragmentIn(struct libalias *, char *_ptr, char *_ptr_fragment); +char *LibAliasGetFragment(struct libalias *, char *_ptr); +int LibAliasSaveFragment(struct libalias *, char *_ptr); + +/* Miscellaneous functions. */ +int LibAliasCheckNewLink(struct libalias *); +unsigned short + LibAliasInternetChecksum(struct libalias *, unsigned short *_ptr, int _nbytes); +void LibAliasSetTarget(struct libalias *, struct in_addr _target_addr); + +/* Transparent proxying routines. */ +int LibAliasProxyRule(struct libalias *, const char *_cmd); + +/* Module handling API */ +int LibAliasLoadModule(char *); +int LibAliasUnLoadAllModule(void); +int LibAliasRefreshModules(void); + +/* Mbuf helper function. */ +#ifndef VBOX +struct mbuf *m_megapullup(struct mbuf *, int); +#else +struct mbuf *m_megapullup(PNATState, struct mbuf *, int); +#endif + +/* + * Mode flags and other constants. + */ + + +/* Mode flags, set using PacketAliasSetMode() */ + +/* + * If PKT_ALIAS_LOG is set, a message will be printed to /var/log/alias.log + * every time a link is created or deleted. This is useful for debugging. + */ +#define PKT_ALIAS_LOG 0x01 + +/* + * If PKT_ALIAS_DENY_INCOMING is set, then incoming connections (e.g. to ftp, + * telnet or web servers will be prevented by the aliasing mechanism. + */ +#define PKT_ALIAS_DENY_INCOMING 0x02 + +/* + * If PKT_ALIAS_SAME_PORTS is set, packets will be attempted sent from the + * same port as they originated on. This allows e.g. rsh to work *99% of the + * time*, but _not_ 100% (it will be slightly flakey instead of not working + * at all). This mode bit is set by PacketAliasInit(), so it is a default + * mode of operation. + */ +#define PKT_ALIAS_SAME_PORTS 0x04 + +/* + * If PKT_ALIAS_USE_SOCKETS is set, then when partially specified links (e.g. + * destination port and/or address is zero), the packet aliasing engine will + * attempt to allocate a socket for the aliasing port it chooses. This will + * avoid interference with the host machine. Fully specified links do not + * require this. This bit is set after a call to PacketAliasInit(), so it is + * a default mode of operation. + */ +#ifndef NO_USE_SOCKETS +#define PKT_ALIAS_USE_SOCKETS 0x08 +#endif +/*- + * If PKT_ALIAS_UNREGISTERED_ONLY is set, then only packets with + * unregistered source addresses will be aliased. Private + * addresses are those in the following ranges: + * + * 10.0.0.0 -> 10.255.255.255 + * 172.16.0.0 -> 172.31.255.255 + * 192.168.0.0 -> 192.168.255.255 + */ +#define PKT_ALIAS_UNREGISTERED_ONLY 0x10 + +/* + * If PKT_ALIAS_RESET_ON_ADDR_CHANGE is set, then the table of dynamic + * aliasing links will be reset whenever PacketAliasSetAddress() changes the + * default aliasing address. If the default aliasing address is left + * unchanged by this function call, then the table of dynamic aliasing links + * will be left intact. This bit is set after a call to PacketAliasInit(). + */ +#define PKT_ALIAS_RESET_ON_ADDR_CHANGE 0x20 + +#ifndef NO_FW_PUNCH +/* + * If PKT_ALIAS_PUNCH_FW is set, active FTP and IRC DCC connections will + * create a 'hole' in the firewall to allow the transfers to work. The + * ipfw rule number that the hole is created with is controlled by + * PacketAliasSetFWBase(). The hole will be attached to that + * particular alias_link, so when the link goes away the hole is deleted. + */ +#define PKT_ALIAS_PUNCH_FW 0x100 +#endif + +/* + * If PKT_ALIAS_PROXY_ONLY is set, then NAT will be disabled and only + * transparent proxying is performed. + */ +#define PKT_ALIAS_PROXY_ONLY 0x40 + +/* + * If PKT_ALIAS_REVERSE is set, the actions of PacketAliasIn() and + * PacketAliasOut() are reversed. + */ +#define PKT_ALIAS_REVERSE 0x80 + +/* Function return codes. */ +#define PKT_ALIAS_ERROR -1 +#define PKT_ALIAS_OK 1 +#define PKT_ALIAS_IGNORED 2 +#define PKT_ALIAS_UNRESOLVED_FRAGMENT 3 +#define PKT_ALIAS_FOUND_HEADER_FRAGMENT 4 + +#endif /* !_ALIAS_H_ */ + +/* lint -restore */ diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_cuseeme.c b/src/VBox/Devices/Network/slirp/libalias/alias_cuseeme.c new file mode 100644 index 00000000..9b5c6a57 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_cuseeme.c @@ -0,0 +1,228 @@ +/*- + * Copyright (c) 1998 Brian Somers <brian@Awfulhak.org> + * with the aid of code written by + * Junichi SATOH <junichi@astec.co.jp> 1996, 1997. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_cuseeme.c,v 1.13.8.1 2009/04/15 03:14:26 kensmith Exp $"); + +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/module.h> +#else +#include <errno.h> +#include <sys/types.h> +#include <stdio.h> +#endif + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/udp.h> + +#ifdef _KERNEL +#include <netinet/libalias/alias.h> +#include <netinet/libalias/alias_local.h> +#include <netinet/libalias/alias_mod.h> +#else +#include "alias_local.h" +#include "alias_mod.h" +#endif + +#define CUSEEME_PORT_NUMBER 7648 + +static void +AliasHandleCUSeeMeOut(struct libalias *la, struct ip *pip, + struct alias_link *lnk); + +static void +AliasHandleCUSeeMeIn(struct libalias *la, struct ip *pip, + struct in_addr original_addr); + +static int +fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + if (ah->dport == NULL || ah->oaddr == NULL) + return (-1); + if (ntohs(*ah->dport) == CUSEEME_PORT_NUMBER) + return (0); + return (-1); +} + +static int +protohandlerin(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + AliasHandleCUSeeMeIn(la, pip, *ah->oaddr); + return (0); +} + +static int +protohandlerout(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + AliasHandleCUSeeMeOut(la, pip, ah->lnk); + return (0); +} + +/* Kernel module definition. */ +struct proto_handler handlers[] = { + { + .pri = 120, + .dir = OUT, + .proto = UDP, + .fingerprint = &fingerprint, + .protohandler = &protohandlerout + }, + { + .pri = 120, + .dir = IN, + .proto = UDP, + .fingerprint = &fingerprint, + .protohandler = &protohandlerin + }, + { EOH } +}; + +static int +mod_handler(module_t mod, int type, void *data) +{ + int error; + + switch (type) { + case MOD_LOAD: + error = 0; + LibAliasAttachHandlers(handlers); + break; + case MOD_UNLOAD: + error = 0; + LibAliasDetachHandlers(handlers); + break; + default: + error = EINVAL; + } + return (error); +} + +#ifdef _KERNEL +static +#endif +moduledata_t +alias_mod = { + "alias_cuseeme", mod_handler, NULL +}; + +#ifdef _KERNEL +DECLARE_MODULE(alias_cuseeme, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND); +MODULE_VERSION(alias_cuseeme, 1); +MODULE_DEPEND(alias_cuseeme, libalias, 1, 1, 1); +#endif + +/* CU-SeeMe Data Header */ +struct cu_header { + u_int16_t dest_family; + u_int16_t dest_port; + u_int32_t dest_addr; + int16_t family; + u_int16_t port; + u_int32_t addr; + u_int32_t seq; + u_int16_t msg; + u_int16_t data_type; + u_int16_t packet_len; +}; + +/* Open Continue Header */ +struct oc_header { + u_int16_t client_count; /* Number of client info structs */ + u_int32_t seq_no; + char user_name [20]; + char reserved [4]; /* flags, version stuff, etc */ +}; + +/* client info structures */ +struct client_info { + u_int32_t address;/* Client address */ + char reserved [8]; /* Flags, pruning bitfield, packet + * counts etc */ +}; + +static void +AliasHandleCUSeeMeOut(struct libalias *la, struct ip *pip, struct alias_link *lnk) +{ + struct udphdr *ud = ip_next(pip); + + if (ntohs(ud->uh_ulen) - sizeof(struct udphdr) >= sizeof(struct cu_header)) { + struct cu_header *cu; + struct alias_link *cu_lnk; + + cu = udp_next(ud); + if (cu->addr) + cu->addr = (u_int32_t) GetAliasAddress(lnk).s_addr; + + cu_lnk = FindUdpTcpOut(la, pip->ip_src, GetDestAddress(lnk), + ud->uh_dport, 0, IPPROTO_UDP, 1); + +#ifndef NO_FW_PUNCH + if (cu_lnk) + PunchFWHole(cu_lnk); +#endif + } +} + +static void +AliasHandleCUSeeMeIn(struct libalias *la, struct ip *pip, struct in_addr original_addr) +{ + struct in_addr alias_addr; + struct udphdr *ud; + struct cu_header *cu; + struct oc_header *oc; + struct client_info *ci; + char *end; + int i; + + (void)la; + alias_addr.s_addr = pip->ip_dst.s_addr; + ud = ip_next(pip); + cu = udp_next(ud); + oc = (struct oc_header *)(cu + 1); + ci = (struct client_info *)(oc + 1); + end = (char *)ud + ntohs(ud->uh_ulen); + + if ((char *)oc <= end) { + if (cu->dest_addr) + cu->dest_addr = (u_int32_t) original_addr.s_addr; + if (ntohs(cu->data_type) == 101) + /* Find and change our address */ + for (i = 0; (char *)(ci + 1) <= end && i < oc->client_count; i++, ci++) + if (ci->address == (u_int32_t) alias_addr.s_addr) { + ci->address = (u_int32_t) original_addr.s_addr; + break; + } + } +} diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_db.c b/src/VBox/Devices/Network/slirp/libalias/alias_db.c new file mode 100644 index 00000000..a4f3c160 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_db.c @@ -0,0 +1,2966 @@ +/*- + * Copyright (c) 2001 Charles Mott <cm@linktel.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef VBOX +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_db.c,v 1.71.2.2.4.1 2009/04/15 03:14:26 kensmith Exp $"); +#endif + +/* + Alias_db.c encapsulates all data structures used for storing + packet aliasing data. Other parts of the aliasing software + access data through functions provided in this file. + + Data storage is based on the notion of a "link", which is + established for ICMP echo/reply packets, UDP datagrams and + TCP stream connections. A link stores the original source + and destination addresses. For UDP and TCP, it also stores + source and destination port numbers, as well as an alias + port number. Links are also used to store information about + fragments. + + There is a facility for sweeping through and deleting old + links as new packets are sent through. A simple timeout is + used for ICMP and UDP links. TCP links are left alone unless + there is an incomplete connection, in which case the link + can be deleted after a certain amount of time. + + + Initial version: August, 1996 (cjm) + + Version 1.4: September 16, 1996 (cjm) + Facility for handling incoming links added. + + Version 1.6: September 18, 1996 (cjm) + ICMP data handling simplified. + + Version 1.7: January 9, 1997 (cjm) + Fragment handling simplified. + Saves pointers for unresolved fragments. + Permits links for unspecified remote ports + or unspecified remote addresses. + Fixed bug which did not properly zero port + table entries after a link was deleted. + Cleaned up some obsolete comments. + + Version 1.8: January 14, 1997 (cjm) + Fixed data type error in StartPoint(). + (This error did not exist prior to v1.7 + and was discovered and fixed by Ari Suutari) + + Version 1.9: February 1, 1997 + Optionally, connections initiated from packet aliasing host + machine will will not have their port number aliased unless it + conflicts with an aliasing port already being used. (cjm) + + All options earlier being #ifdef'ed are now available through + a new interface, SetPacketAliasMode(). This allows run time + control (which is now available in PPP+pktAlias through the + 'alias' keyword). (ee) + + Added ability to create an alias port without + either destination address or port specified. + port type = ALIAS_PORT_UNKNOWN_DEST_ALL (ee) + + Removed K&R style function headers + and general cleanup. (ee) + + Added packetAliasMode to replace compiler #defines's (ee) + + Allocates sockets for partially specified + ports if ALIAS_USE_SOCKETS defined. (cjm) + + Version 2.0: March, 1997 + SetAliasAddress() will now clean up alias links + if the aliasing address is changed. (cjm) + + PacketAliasPermanentLink() function added to support permanent + links. (J. Fortes suggested the need for this.) + Examples: + + (192.168.0.1, port 23) <-> alias port 6002, unknown dest addr/port + + (192.168.0.2, port 21) <-> alias port 3604, known dest addr + unknown dest port + + These permanent links allow for incoming connections to + machines on the local network. They can be given with a + user-chosen amount of specificity, with increasing specificity + meaning more security. (cjm) + + Quite a bit of rework to the basic engine. The portTable[] + array, which kept track of which ports were in use was replaced + by a table/linked list structure. (cjm) + + SetExpire() function added. (cjm) + + DeleteLink() no longer frees memory association with a pointer + to a fragment (this bug was first recognized by E. Eklund in + v1.9). + + Version 2.1: May, 1997 (cjm) + Packet aliasing engine reworked so that it can handle + multiple external addresses rather than just a single + host address. + + PacketAliasRedirectPort() and PacketAliasRedirectAddr() + added to the API. The first function is a more generalized + version of PacketAliasPermanentLink(). The second function + implements static network address translation. + + Version 3.2: July, 2000 (salander and satoh) + Added FindNewPortGroup to get contiguous range of port values. + + Added QueryUdpTcpIn and QueryUdpTcpOut to look for an aliasing + link but not actually add one. + + Added FindRtspOut, which is closely derived from FindUdpTcpOut, + except that the alias port (from FindNewPortGroup) is provided + as input. + + See HISTORY file for additional revisions. +*/ + +#ifndef VBOX +#ifdef _KERNEL +#include <machine/stdarg.h> +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/syslog.h> +#else +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/errno.h> +#include <sys/time.h> +#include <unistd.h> +#endif + +#include <sys/socket.h> +#include <netinet/tcp.h> + +#ifdef _KERNEL +#include <netinet/libalias/alias.h> +#include <netinet/libalias/alias_local.h> +#include <netinet/libalias/alias_mod.h> +#include <net/if.h> +#else +#include "alias.h" +#include "alias_local.h" +#include "alias_mod.h" +#endif +#else /* VBOX */ +# include <iprt/assert.h> +# include "alias.h" +# include "alias_local.h" +# include "alias_mod.h" +# include <slirp.h> +#endif /* VBOX */ + +#ifndef VBOX +static LIST_HEAD(, libalias) instancehead = LIST_HEAD_INITIALIZER(instancehead); +#endif + + +/* + Constants (note: constants are also defined + near relevant functions or structs) +*/ + +/* Parameters used for cleanup of expired links */ +/* NOTE: ALIAS_CLEANUP_INTERVAL_SECS must be less then LINK_TABLE_OUT_SIZE */ +#define ALIAS_CLEANUP_INTERVAL_SECS 64 +#define ALIAS_CLEANUP_MAX_SPOKES (LINK_TABLE_OUT_SIZE/5) + +/* Timeouts (in seconds) for different link types */ +#define ICMP_EXPIRE_TIME 60 +#define UDP_EXPIRE_TIME 60 +#define PROTO_EXPIRE_TIME 60 +#define FRAGMENT_ID_EXPIRE_TIME 10 +#define FRAGMENT_PTR_EXPIRE_TIME 30 + +/* TCP link expire time for different cases */ +/* When the link has been used and closed - minimal grace time to + allow ACKs and potential re-connect in FTP (XXX - is this allowed?) */ +#ifndef TCP_EXPIRE_DEAD +#define TCP_EXPIRE_DEAD 10 +#endif + +/* When the link has been used and closed on one side - the other side + is allowed to still send data */ +#ifndef TCP_EXPIRE_SINGLEDEAD +#define TCP_EXPIRE_SINGLEDEAD 90 +#endif + +/* When the link isn't yet up */ +#ifndef TCP_EXPIRE_INITIAL +#define TCP_EXPIRE_INITIAL 300 +#endif + +/* When the link is up */ +#ifndef TCP_EXPIRE_CONNECTED +#define TCP_EXPIRE_CONNECTED 86400 +#endif + + +/* Dummy port number codes used for FindLinkIn/Out() and AddLink(). + These constants can be anything except zero, which indicates an + unknown port number. */ + +#define NO_DEST_PORT 1 +#define NO_SRC_PORT 1 + + + +/* Data Structures + + The fundamental data structure used in this program is + "struct alias_link". Whenever a TCP connection is made, + a UDP datagram is sent out, or an ICMP echo request is made, + a link record is made (if it has not already been created). + The link record is identified by the source address/port + and the destination address/port. In the case of an ICMP + echo request, the source port is treated as being equivalent + with the 16-bit ID number of the ICMP packet. + + The link record also can store some auxiliary data. For + TCP connections that have had sequence and acknowledgment + modifications, data space is available to track these changes. + A state field is used to keep track in changes to the TCP + connection state. ID numbers of fragments can also be + stored in the auxiliary space. Pointers to unresolved + fragments can also be stored. + + The link records support two independent chainings. Lookup + tables for input and out tables hold the initial pointers + the link chains. On input, the lookup table indexes on alias + port and link type. On output, the lookup table indexes on + source address, destination address, source port, destination + port and link type. +*/ + +struct ack_data_record { /* used to save changes to ACK/sequence + * numbers */ + u_long ack_old; + u_long ack_new; + int delta; + int active; +}; + +struct tcp_state { /* Information about TCP connection */ + int in; /* State for outside -> inside */ + int out; /* State for inside -> outside */ + int index; /* Index to ACK data array */ + int ack_modified; /* Indicates whether ACK and + * sequence numbers */ + /* been modified */ +}; + +#define N_LINK_TCP_DATA 3 /* Number of distinct ACK number changes + * saved for a modified TCP stream */ +struct tcp_dat { + struct tcp_state state; + struct ack_data_record ack[N_LINK_TCP_DATA]; + int fwhole; /* Which firewall record is used for this + * hole? */ +}; + +struct server { /* LSNAT server pool (circular list) */ + struct in_addr addr; + u_short port; + struct server *next; +}; + +struct alias_link { /* Main data structure */ + struct libalias *la; + struct in_addr src_addr; /* Address and port information */ + struct in_addr dst_addr; + struct in_addr alias_addr; + struct in_addr proxy_addr; + u_short src_port; + u_short dst_port; + u_short alias_port; + u_short proxy_port; + struct server *server; + + int link_type; /* Type of link: TCP, UDP, ICMP, + * proto, frag */ + +/* values for link_type */ +#define LINK_ICMP IPPROTO_ICMP +#define LINK_UDP IPPROTO_UDP +#define LINK_TCP IPPROTO_TCP +#define LINK_FRAGMENT_ID (IPPROTO_MAX + 1) +#define LINK_FRAGMENT_PTR (IPPROTO_MAX + 2) +#define LINK_ADDR (IPPROTO_MAX + 3) +#define LINK_PPTP (IPPROTO_MAX + 4) + + int flags; /* indicates special characteristics */ + int pflags; /* protocol-specific flags */ + +/* flag bits */ +#define LINK_UNKNOWN_DEST_PORT 0x01 +#define LINK_UNKNOWN_DEST_ADDR 0x02 +#define LINK_PERMANENT 0x04 +#define LINK_PARTIALLY_SPECIFIED 0x03 /* logical-or of first two bits */ +#ifndef VBOX +# define LINK_UNFIREWALLED 0x08 /* This macro definition isn't used in this revision of libalias */ + + int timestamp; /* Time link was last accessed */ + int expire_time; /* Expire time for link */ +#else /* VBOX */ + unsigned int timestamp; /* Time link was last accessed */ + unsigned int expire_time; /* Expire time for link */ +#endif + +#ifndef NO_USE_SOCKETS + int sockfd; /* socket descriptor */ +#endif + LIST_ENTRY (alias_link) list_out; /* Linked list of + * pointers for */ + LIST_ENTRY (alias_link) list_in; /* input and output + * lookup tables */ + + union { /* Auxiliary data */ + char *frag_ptr; + struct in_addr frag_addr; + struct tcp_dat *tcp; + } data; +}; + +/* Clean up procedure. */ +#ifndef VBOX +static void finishoff(void); +#endif + +/* Kernel module definition. */ +#ifdef _KERNEL +MALLOC_DEFINE(M_ALIAS, "libalias", "packet aliasing"); + +MODULE_VERSION(libalias, 1); + +static int +alias_mod_handler(module_t mod, int type, void *data) +{ + int error; + + switch (type) { + case MOD_LOAD: + error = 0; + handler_chain_init(); + break; + case MOD_QUIESCE: + case MOD_UNLOAD: + handler_chain_destroy(); + finishoff(); + error = 0; + break; + default: + error = EINVAL; + } + + return (error); +} + +static moduledata_t alias_mod = { + "alias", alias_mod_handler, NULL +}; + +DECLARE_MODULE(alias, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND); +#endif + +/* Internal utility routines (used only in alias_db.c) + +Lookup table starting points: + StartPointIn() -- link table initial search point for + incoming packets + StartPointOut() -- link table initial search point for + outgoing packets + +Miscellaneous: + SeqDiff() -- difference between two TCP sequences + ShowAliasStats() -- send alias statistics to a monitor file +*/ + + +/* Local prototypes */ +static u_int StartPointIn(struct in_addr, u_short, int); + +static u_int +StartPointOut(struct in_addr, struct in_addr, + u_short, u_short, int); + +static int SeqDiff(u_long, u_long); + +#ifndef NO_FW_PUNCH +/* Firewall control */ +static void InitPunchFW(struct libalias *); +static void UninitPunchFW(struct libalias *); +static void ClearFWHole(struct alias_link *); + +#endif + +/* Log file control */ +static void ShowAliasStats(struct libalias *); +static int InitPacketAliasLog(struct libalias *); +static void UninitPacketAliasLog(struct libalias *); + +static u_int +StartPointIn(struct in_addr alias_addr, + u_short alias_port, + int link_type) +{ + u_int n; + + n = alias_addr.s_addr; + if (link_type != LINK_PPTP) + n += alias_port; + n += link_type; + return (n % LINK_TABLE_IN_SIZE); +} + + +static u_int +StartPointOut(struct in_addr src_addr, struct in_addr dst_addr, + u_short src_port, u_short dst_port, int link_type) +{ + u_int n; + + n = src_addr.s_addr; + n += dst_addr.s_addr; + if (link_type != LINK_PPTP) { + n += src_port; + n += dst_port; + } + n += link_type; + + return (n % LINK_TABLE_OUT_SIZE); +} + + +static int +SeqDiff(u_long x, u_long y) +{ +/* Return the difference between two TCP sequence numbers */ + +/* + This function is encapsulated in case there are any unusual + arithmetic conditions that need to be considered. +*/ + + return (ntohl(y) - ntohl(x)); +} + +#ifdef _KERNEL + +static void +AliasLog(char *str, const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + vsnprintf(str, LIBALIAS_BUF_SIZE, format, ap); + va_end(ap); +} +#else +static void +AliasLog(FILE *stream, const char *format, ...) +{ +# ifndef VBOX + va_list ap; + + va_start(ap, format); + vfprintf(stream, format, ap); + va_end(ap); + fflush(stream); +# else + + va_list args; + char buffer[1024]; + NOREF(stream); + memset(buffer, 0, 1024); + va_start(args, format); + RTStrPrintfV(buffer, 1024, format, args); + va_end(args); + /*make it grepable */ + Log2(("NAT:ALIAS: %s\n", buffer)); +# endif +} +#endif + +static void +ShowAliasStats(struct libalias *la) +{ + + LIBALIAS_LOCK_ASSERT(la); +/* Used for debugging */ + if (la->logDesc) { + int tot = la->icmpLinkCount + la->udpLinkCount + + la->tcpLinkCount + la->pptpLinkCount + + la->protoLinkCount + la->fragmentIdLinkCount + + la->fragmentPtrLinkCount; + + AliasLog(la->logDesc, + "icmp=%u, udp=%u, tcp=%u, pptp=%u, proto=%u, frag_id=%u frag_ptr=%u / tot=%u", + la->icmpLinkCount, + la->udpLinkCount, + la->tcpLinkCount, + la->pptpLinkCount, + la->protoLinkCount, + la->fragmentIdLinkCount, + la->fragmentPtrLinkCount, tot); +#ifndef _KERNEL + AliasLog(la->logDesc, " (sock=%u)\n", la->sockCount); +#endif + } +} + +/* Internal routines for finding, deleting and adding links + +Port Allocation: + GetNewPort() -- find and reserve new alias port number + GetSocket() -- try to allocate a socket for a given port + +Link creation and deletion: + CleanupAliasData() - remove all link chains from lookup table + IncrementalCleanup() - look for stale links in a single chain + DeleteLink() - remove link + AddLink() - add link + ReLink() - change link + +Link search: + FindLinkOut() - find link for outgoing packets + FindLinkIn() - find link for incoming packets + +Port search: + FindNewPortGroup() - find an available group of ports +*/ + +/* Local prototypes */ +static int GetNewPort(struct libalias *, struct alias_link *, int); +#ifndef NO_USE_SOCKETS +static u_short GetSocket(struct libalias *, u_short, int *, int); +#endif +static void CleanupAliasData(struct libalias *); + +static void IncrementalCleanup(struct libalias *); + +static void DeleteLink(struct alias_link *); + +static struct alias_link * +AddLink(struct libalias *, struct in_addr, struct in_addr, struct in_addr, + u_short, u_short, int, int); + +static struct alias_link * +ReLink(struct alias_link *, + struct in_addr, struct in_addr, struct in_addr, + u_short, u_short, int, int); + +static struct alias_link * + FindLinkOut (struct libalias *, struct in_addr, struct in_addr, u_short, u_short, int, int); + +static struct alias_link * + FindLinkIn (struct libalias *, struct in_addr, struct in_addr, u_short, u_short, int, int); + + +#define ALIAS_PORT_BASE 0x08000 +#define ALIAS_PORT_MASK 0x07fff +#define ALIAS_PORT_MASK_EVEN 0x07ffe +#define GET_NEW_PORT_MAX_ATTEMPTS 20 + +#define GET_ALIAS_PORT -1 +#define GET_ALIAS_ID GET_ALIAS_PORT + +#define FIND_EVEN_ALIAS_BASE 1 + +/* GetNewPort() allocates port numbers. Note that if a port number + is already in use, that does not mean that it cannot be used by + another link concurrently. This is because GetNewPort() looks for + unused triplets: (dest addr, dest port, alias port). */ + +static int +GetNewPort(struct libalias *la, struct alias_link *lnk, int alias_port_param) +{ + int i; + int max_trials; + u_short port_sys; + u_short port_net; + + LIBALIAS_LOCK_ASSERT(la); +/* + Description of alias_port_param for GetNewPort(). When + this parameter is zero or positive, it precisely specifies + the port number. GetNewPort() will return this number + without check that it is in use. + + When this parameter is GET_ALIAS_PORT, it indicates to get a randomly + selected port number. +*/ + + if (alias_port_param == GET_ALIAS_PORT) { + /* + * The aliasing port is automatically selected by one of + * two methods below: + */ + max_trials = GET_NEW_PORT_MAX_ATTEMPTS; + + if (la->packetAliasMode & PKT_ALIAS_SAME_PORTS) { + /* + * When the PKT_ALIAS_SAME_PORTS option is chosen, + * the first try will be the actual source port. If + * this is already in use, the remainder of the + * trials will be random. + */ + port_net = lnk->src_port; + port_sys = ntohs(port_net); + } else { + /* First trial and all subsequent are random. */ + port_sys = arc4random() & ALIAS_PORT_MASK; + port_sys += ALIAS_PORT_BASE; + port_net = htons(port_sys); + } + } else if (alias_port_param >= 0 && alias_port_param < 0x10000) { + lnk->alias_port = (u_short) alias_port_param; + return (0); + } else { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, "PacketAlias/GetNewPort(): "); + fprintf(stderr, "input parameter error\n"); +#endif + return (-1); + } + + +/* Port number search */ + for (i = 0; i < max_trials; i++) { + int go_ahead; + struct alias_link *search_result; + + search_result = FindLinkIn(la, lnk->dst_addr, lnk->alias_addr, + lnk->dst_port, port_net, + lnk->link_type, 0); + + if (search_result == NULL) + go_ahead = 1; + else if (!(lnk->flags & LINK_PARTIALLY_SPECIFIED) + && (search_result->flags & LINK_PARTIALLY_SPECIFIED)) + go_ahead = 1; + else + go_ahead = 0; + + if (go_ahead) { +#ifndef NO_USE_SOCKETS + if ((la->packetAliasMode & PKT_ALIAS_USE_SOCKETS) + && (lnk->flags & LINK_PARTIALLY_SPECIFIED) + && ((lnk->link_type == LINK_TCP) || + (lnk->link_type == LINK_UDP))) { + if (GetSocket(la, port_net, &lnk->sockfd, lnk->link_type)) { + lnk->alias_port = port_net; + return (0); + } + } else { +#endif + lnk->alias_port = port_net; + return (0); +#ifndef NO_USE_SOCKETS + } +#endif + } + port_sys = arc4random() & ALIAS_PORT_MASK; + port_sys += ALIAS_PORT_BASE; + port_net = htons(port_sys); + } + +#ifdef LIBALIAS_DEBUG + fprintf(stderr, "PacketAlias/GetnewPort(): "); + fprintf(stderr, "could not find free port\n"); +#endif + + return (-1); +} + +#ifndef NO_USE_SOCKETS +static u_short +GetSocket(struct libalias *la, u_short port_net, int *sockfd, int link_type) +{ + int err; + int sock; + struct sockaddr_in sock_addr; + + LIBALIAS_LOCK_ASSERT(la); + if (link_type == LINK_TCP) + sock = socket(AF_INET, SOCK_STREAM, 0); + else if (link_type == LINK_UDP) + sock = socket(AF_INET, SOCK_DGRAM, 0); + else { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, "PacketAlias/GetSocket(): "); + fprintf(stderr, "incorrect link type\n"); +#endif + return (0); + } + + if (sock < 0) { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, "PacketAlias/GetSocket(): "); + fprintf(stderr, "socket() error %d\n", *sockfd); +#endif + return (0); + } + + memset(&sock_addr, 0, sizeof(struct sockaddr_in)); + sock_addr.sin_family = AF_INET; + sock_addr.sin_addr.s_addr = htonl(INADDR_ANY); + sock_addr.sin_port = port_net; +#ifdef RT_OS_DARWIN + sock_addr.sin_len = sizeof(struct sockaddr_in); +#endif + + + err = bind(sock, + (struct sockaddr *)&sock_addr, + sizeof(sock_addr)); + if (err == 0) { + la->sockCount++; + *sockfd = sock; + return (1); + } else { + closesocket(sock); + return (0); + } +} +#endif + +/* FindNewPortGroup() returns a base port number for an available + range of contiguous port numbers. Note that if a port number + is already in use, that does not mean that it cannot be used by + another link concurrently. This is because FindNewPortGroup() + looks for unused triplets: (dest addr, dest port, alias port). */ + +int +FindNewPortGroup(struct libalias *la, + struct in_addr dst_addr, + struct in_addr alias_addr, + u_short src_port, + u_short dst_port, + u_short port_count, + u_char proto, + u_char align) +{ + int i, j; + int max_trials; + u_short port_sys; + int link_type; + + LIBALIAS_LOCK_ASSERT(la); + /* + * Get link_type from protocol + */ + + switch (proto) { + case IPPROTO_UDP: + link_type = LINK_UDP; + break; + case IPPROTO_TCP: + link_type = LINK_TCP; + break; + default: + return (0); + break; + } + + /* + * The aliasing port is automatically selected by one of two + * methods below: + */ + max_trials = GET_NEW_PORT_MAX_ATTEMPTS; + + if (la->packetAliasMode & PKT_ALIAS_SAME_PORTS) { + /* + * When the ALIAS_SAME_PORTS option is chosen, the first + * try will be the actual source port. If this is already + * in use, the remainder of the trials will be random. + */ + port_sys = ntohs(src_port); + + } else { + + /* First trial and all subsequent are random. */ + if (align == FIND_EVEN_ALIAS_BASE) + port_sys = arc4random() & ALIAS_PORT_MASK_EVEN; + else + port_sys = arc4random() & ALIAS_PORT_MASK; + + port_sys += ALIAS_PORT_BASE; + } + +/* Port number search */ + for (i = 0; i < max_trials; i++) { + + struct alias_link *search_result; + + for (j = 0; j < port_count; j++) + if (0 != (search_result = FindLinkIn(la, dst_addr, alias_addr, + dst_port, htons(port_sys + j), + link_type, 0))) + break; + + /* Found a good range, return base */ + if (j == port_count) + return (htons(port_sys)); + + /* Find a new base to try */ + if (align == FIND_EVEN_ALIAS_BASE) + port_sys = arc4random() & ALIAS_PORT_MASK_EVEN; + else + port_sys = arc4random() & ALIAS_PORT_MASK; + + port_sys += ALIAS_PORT_BASE; + } + +#ifdef LIBALIAS_DEBUG + fprintf(stderr, "PacketAlias/FindNewPortGroup(): "); + fprintf(stderr, "could not find free port(s)\n"); +#endif + + return (0); +} + +static void +CleanupAliasData(struct libalias *la) +{ + struct alias_link *lnk; + int i; + + LIBALIAS_LOCK_ASSERT(la); + for (i = 0; i < LINK_TABLE_OUT_SIZE; i++) { + lnk = LIST_FIRST(&la->linkTableOut[i]); + while (lnk != NULL) { + struct alias_link *link_next = LIST_NEXT(lnk, list_out); + DeleteLink(lnk); + lnk = link_next; + } + } + + la->cleanupIndex = 0; +} + + +static void +IncrementalCleanup(struct libalias *la) +{ + struct alias_link *lnk, *lnk_tmp; + + LIBALIAS_LOCK_ASSERT(la); + LIST_FOREACH_SAFE(lnk, &la->linkTableOut[la->cleanupIndex++], + list_out, lnk_tmp) { + if (la->timeStamp - lnk->timestamp > lnk->expire_time) + DeleteLink(lnk); + } + + if (la->cleanupIndex == LINK_TABLE_OUT_SIZE) + la->cleanupIndex = 0; +} + +static void +DeleteLink(struct alias_link *lnk) +{ + struct libalias *la = lnk->la; + LogFlowFuncEnter(); + + LIBALIAS_LOCK_ASSERT(la); +/* Don't do anything if the link is marked permanent */ + if (la->deleteAllLinks == 0 && lnk->flags & LINK_PERMANENT) + return; + +#ifndef NO_FW_PUNCH +/* Delete associated firewall hole, if any */ + ClearFWHole(lnk); +#endif + +/* Free memory allocated for LSNAT server pool */ + if (lnk->server != NULL) { + struct server *head, *curr, *next; + + head = curr = lnk->server; + do { + next = curr->next; + free(curr); + } while ((curr = next) != head); + } +/* Adjust output table pointers */ + LIST_REMOVE(lnk, list_out); + +/* Adjust input table pointers */ + LIST_REMOVE(lnk, list_in); +#ifndef NO_USE_SOCKETS +/* Close socket, if one has been allocated */ + if (lnk->sockfd != -1) { + la->sockCount--; + closesocket(lnk->sockfd); + } +#endif +/* Link-type dependent cleanup */ + switch (lnk->link_type) { + case LINK_ICMP: + la->icmpLinkCount--; + break; + case LINK_UDP: + la->udpLinkCount--; + break; + case LINK_TCP: + la->tcpLinkCount--; + free(lnk->data.tcp); + break; + case LINK_PPTP: + la->pptpLinkCount--; + break; + case LINK_FRAGMENT_ID: + la->fragmentIdLinkCount--; + break; + case LINK_FRAGMENT_PTR: + la->fragmentPtrLinkCount--; + if (lnk->data.frag_ptr != NULL) + free(lnk->data.frag_ptr); + break; + case LINK_ADDR: + break; + default: + la->protoLinkCount--; + break; + } + +/* Free memory */ + free(lnk); + +/* Write statistics, if logging enabled */ + if (la->packetAliasMode & PKT_ALIAS_LOG) { + ShowAliasStats(la); + } + LogFlowFuncLeave(); +} + + +static struct alias_link * +AddLink(struct libalias *la, struct in_addr src_addr, + struct in_addr dst_addr, + struct in_addr alias_addr, + u_short src_port, + u_short dst_port, + int alias_port_param, /* if less than zero, alias */ + int link_type) +{ /* port will be automatically *//* chosen. + * If greater than */ + u_int start_point; /* zero, equal to alias port */ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + lnk = malloc(sizeof(struct alias_link)); + if (lnk != NULL) { + /* Basic initialization */ + lnk->la = la; + lnk->src_addr = src_addr; + lnk->dst_addr = dst_addr; + lnk->alias_addr = alias_addr; + lnk->proxy_addr.s_addr = INADDR_ANY; + lnk->src_port = src_port; + lnk->dst_port = dst_port; + lnk->proxy_port = 0; + lnk->server = NULL; + lnk->link_type = link_type; +#ifndef NO_USE_SOCKETS + lnk->sockfd = -1; +#endif + lnk->flags = 0; + lnk->pflags = 0; + lnk->timestamp = la->timeStamp; + + /* Expiration time */ + switch (link_type) { + case LINK_ICMP: + lnk->expire_time = ICMP_EXPIRE_TIME; + break; + case LINK_UDP: + lnk->expire_time = UDP_EXPIRE_TIME; + break; + case LINK_TCP: + lnk->expire_time = TCP_EXPIRE_INITIAL; + break; + case LINK_PPTP: + lnk->flags |= LINK_PERMANENT; /* no timeout. */ + break; + case LINK_FRAGMENT_ID: + lnk->expire_time = FRAGMENT_ID_EXPIRE_TIME; + break; + case LINK_FRAGMENT_PTR: + lnk->expire_time = FRAGMENT_PTR_EXPIRE_TIME; + break; + case LINK_ADDR: + break; + default: + lnk->expire_time = PROTO_EXPIRE_TIME; + break; + } + + /* Determine alias flags */ + if (dst_addr.s_addr == INADDR_ANY) + lnk->flags |= LINK_UNKNOWN_DEST_ADDR; + if (dst_port == 0) + lnk->flags |= LINK_UNKNOWN_DEST_PORT; + + /* Determine alias port */ + if (GetNewPort(la, lnk, alias_port_param) != 0) { + free(lnk); + return (NULL); + } + /* Link-type dependent initialization */ + switch (link_type) { + struct tcp_dat *aux_tcp; + + case LINK_ICMP: + la->icmpLinkCount++; + break; + case LINK_UDP: + la->udpLinkCount++; + break; + case LINK_TCP: + aux_tcp = malloc(sizeof(struct tcp_dat)); + if (aux_tcp != NULL) { + int i; + + la->tcpLinkCount++; + aux_tcp->state.in = ALIAS_TCP_STATE_NOT_CONNECTED; + aux_tcp->state.out = ALIAS_TCP_STATE_NOT_CONNECTED; + aux_tcp->state.index = 0; + aux_tcp->state.ack_modified = 0; + for (i = 0; i < N_LINK_TCP_DATA; i++) + aux_tcp->ack[i].active = 0; + aux_tcp->fwhole = -1; + lnk->data.tcp = aux_tcp; + } else { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, "PacketAlias/AddLink: "); + fprintf(stderr, " cannot allocate auxiliary TCP data\n"); +#endif + free(lnk); + return (NULL); + } + break; + case LINK_PPTP: + la->pptpLinkCount++; + break; + case LINK_FRAGMENT_ID: + la->fragmentIdLinkCount++; + break; + case LINK_FRAGMENT_PTR: + la->fragmentPtrLinkCount++; + break; + case LINK_ADDR: + break; + default: + la->protoLinkCount++; + break; + } + + /* Set up pointers for output lookup table */ + start_point = StartPointOut(src_addr, dst_addr, + src_port, dst_port, link_type); + LIST_INSERT_HEAD(&la->linkTableOut[start_point], lnk, list_out); + + /* Set up pointers for input lookup table */ + start_point = StartPointIn(alias_addr, lnk->alias_port, link_type); + LIST_INSERT_HEAD(&la->linkTableIn[start_point], lnk, list_in); + } else { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, "PacketAlias/AddLink(): "); + fprintf(stderr, "malloc() call failed.\n"); +#endif + } + if (la->packetAliasMode & PKT_ALIAS_LOG) { + ShowAliasStats(la); + } + return (lnk); +} + +static struct alias_link * +ReLink(struct alias_link *old_lnk, + struct in_addr src_addr, + struct in_addr dst_addr, + struct in_addr alias_addr, + u_short src_port, + u_short dst_port, + int alias_port_param, /* if less than zero, alias */ + int link_type) +{ /* port will be automatically *//* chosen. + * If greater than */ + struct alias_link *new_lnk; /* zero, equal to alias port */ + struct libalias *la = old_lnk->la; + + LIBALIAS_LOCK_ASSERT(la); + new_lnk = AddLink(la, src_addr, dst_addr, alias_addr, + src_port, dst_port, alias_port_param, + link_type); +#ifndef NO_FW_PUNCH + if (new_lnk != NULL && + old_lnk->link_type == LINK_TCP && + old_lnk->data.tcp->fwhole > 0) { + PunchFWHole(new_lnk); + } +#endif + DeleteLink(old_lnk); + return (new_lnk); +} + +static struct alias_link * +_FindLinkOut(struct libalias *la, struct in_addr src_addr, + struct in_addr dst_addr, + u_short src_port, + u_short dst_port, + int link_type, + int replace_partial_links) +{ + u_int i; + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + i = StartPointOut(src_addr, dst_addr, src_port, dst_port, link_type); + LIST_FOREACH(lnk, &la->linkTableOut[i], list_out) { + if (lnk->dst_addr.s_addr == dst_addr.s_addr && + lnk->src_addr.s_addr == src_addr.s_addr && + lnk->src_port == src_port && + lnk->dst_port == dst_port && + lnk->link_type == link_type && + lnk->server == NULL) { + lnk->timestamp = la->timeStamp; + break; + } + } + +/* Search for partially specified links. */ + if (lnk == NULL && replace_partial_links) { + if (dst_port != 0 && dst_addr.s_addr != INADDR_ANY) { + lnk = _FindLinkOut(la, src_addr, dst_addr, src_port, 0, + link_type, 0); + if (lnk == NULL) + lnk = _FindLinkOut(la, src_addr, la->nullAddress, src_port, + dst_port, link_type, 0); + } + if (lnk == NULL && + (dst_port != 0 || dst_addr.s_addr != INADDR_ANY)) { + lnk = _FindLinkOut(la, src_addr, la->nullAddress, src_port, 0, + link_type, 0); + } + if (lnk != NULL) { + lnk = ReLink(lnk, + src_addr, dst_addr, lnk->alias_addr, + src_port, dst_port, lnk->alias_port, + link_type); + } + } + return (lnk); +} + +static struct alias_link * +FindLinkOut(struct libalias *la, struct in_addr src_addr, + struct in_addr dst_addr, + u_short src_port, + u_short dst_port, + int link_type, + int replace_partial_links) +{ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + lnk = _FindLinkOut(la, src_addr, dst_addr, src_port, dst_port, + link_type, replace_partial_links); + + if (lnk == NULL) { + /* + * The following allows permanent links to be specified as + * using the default source address (i.e. device interface + * address) without knowing in advance what that address + * is. + */ + if (la->aliasAddress.s_addr != INADDR_ANY && + src_addr.s_addr == la->aliasAddress.s_addr) { + lnk = _FindLinkOut(la, la->nullAddress, dst_addr, src_port, dst_port, + link_type, replace_partial_links); + } + } + return (lnk); +} + + +static struct alias_link * +_FindLinkIn(struct libalias *la, struct in_addr dst_addr, + struct in_addr alias_addr, + u_short dst_port, + u_short alias_port, + int link_type, + int replace_partial_links) +{ + int flags_in; + u_int start_point; + struct alias_link *lnk; + struct alias_link *lnk_fully_specified; + struct alias_link *lnk_unknown_all; + struct alias_link *lnk_unknown_dst_addr; + struct alias_link *lnk_unknown_dst_port; + + LIBALIAS_LOCK_ASSERT(la); +/* Initialize pointers */ + lnk_fully_specified = NULL; + lnk_unknown_all = NULL; + lnk_unknown_dst_addr = NULL; + lnk_unknown_dst_port = NULL; + +/* If either the dest addr or port is unknown, the search + loop will have to know about this. */ + + flags_in = 0; + if (dst_addr.s_addr == INADDR_ANY) + flags_in |= LINK_UNKNOWN_DEST_ADDR; + if (dst_port == 0) + flags_in |= LINK_UNKNOWN_DEST_PORT; + +/* Search loop */ + start_point = StartPointIn(alias_addr, alias_port, link_type); + LIST_FOREACH(lnk, &la->linkTableIn[start_point], list_in) { + int flags; + + flags = flags_in | lnk->flags; + if (!(flags & LINK_PARTIALLY_SPECIFIED)) { + if (lnk->alias_addr.s_addr == alias_addr.s_addr + && lnk->alias_port == alias_port + && lnk->dst_addr.s_addr == dst_addr.s_addr + && lnk->dst_port == dst_port + && lnk->link_type == link_type) { + lnk_fully_specified = lnk; + break; + } + } else if ((flags & LINK_UNKNOWN_DEST_ADDR) + && (flags & LINK_UNKNOWN_DEST_PORT)) { + if (lnk->alias_addr.s_addr == alias_addr.s_addr + && lnk->alias_port == alias_port + && lnk->link_type == link_type) { + if (lnk_unknown_all == NULL) + lnk_unknown_all = lnk; + } + } else if (flags & LINK_UNKNOWN_DEST_ADDR) { + if (lnk->alias_addr.s_addr == alias_addr.s_addr + && lnk->alias_port == alias_port + && lnk->link_type == link_type + && lnk->dst_port == dst_port) { + if (lnk_unknown_dst_addr == NULL) + lnk_unknown_dst_addr = lnk; + } + } else if (flags & LINK_UNKNOWN_DEST_PORT) { + if (lnk->alias_addr.s_addr == alias_addr.s_addr + && lnk->alias_port == alias_port + && lnk->link_type == link_type + && lnk->dst_addr.s_addr == dst_addr.s_addr) { + if (lnk_unknown_dst_port == NULL) + lnk_unknown_dst_port = lnk; + } + } + } + + + + if (lnk_fully_specified != NULL) { + lnk_fully_specified->timestamp = la->timeStamp; + lnk = lnk_fully_specified; + } else if (lnk_unknown_dst_port != NULL) + lnk = lnk_unknown_dst_port; + else if (lnk_unknown_dst_addr != NULL) + lnk = lnk_unknown_dst_addr; + else if (lnk_unknown_all != NULL) + lnk = lnk_unknown_all; + else + return (NULL); + + if (replace_partial_links && + (lnk->flags & LINK_PARTIALLY_SPECIFIED || lnk->server != NULL)) { + struct in_addr src_addr; + u_short src_port; + + if (lnk->server != NULL) { /* LSNAT link */ + src_addr = lnk->server->addr; + src_port = lnk->server->port; + lnk->server = lnk->server->next; + } else { + src_addr = lnk->src_addr; + src_port = lnk->src_port; + } + + lnk = ReLink(lnk, + src_addr, dst_addr, alias_addr, + src_port, dst_port, alias_port, + link_type); + } + return (lnk); +} + +static struct alias_link * +FindLinkIn(struct libalias *la, struct in_addr dst_addr, + struct in_addr alias_addr, + u_short dst_port, + u_short alias_port, + int link_type, + int replace_partial_links) +{ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + lnk = _FindLinkIn(la, dst_addr, alias_addr, dst_port, alias_port, + link_type, replace_partial_links); + + if (lnk == NULL) { + /* + * The following allows permanent links to be specified as + * using the default aliasing address (i.e. device + * interface address) without knowing in advance what that + * address is. + */ + if (la->aliasAddress.s_addr != INADDR_ANY && + alias_addr.s_addr == la->aliasAddress.s_addr) { + lnk = _FindLinkIn(la, dst_addr, la->nullAddress, dst_port, alias_port, + link_type, replace_partial_links); + } + } + return (lnk); +} + + + + +/* External routines for finding/adding links + +-- "external" means outside alias_db.c, but within alias*.c -- + + FindIcmpIn(), FindIcmpOut() + FindFragmentIn1(), FindFragmentIn2() + AddFragmentPtrLink(), FindFragmentPtr() + FindProtoIn(), FindProtoOut() + FindUdpTcpIn(), FindUdpTcpOut() + AddPptp(), FindPptpOutByCallId(), FindPptpInByCallId(), + FindPptpOutByPeerCallId(), FindPptpInByPeerCallId() + FindOriginalAddress(), FindAliasAddress() + +(prototypes in alias_local.h) +*/ + + +struct alias_link * +FindIcmpIn(struct libalias *la, struct in_addr dst_addr, + struct in_addr alias_addr, + u_short id_alias, + int create) +{ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + lnk = FindLinkIn(la, dst_addr, alias_addr, + NO_DEST_PORT, id_alias, + LINK_ICMP, 0); + if (lnk == NULL && create && !(la->packetAliasMode & PKT_ALIAS_DENY_INCOMING)) { + struct in_addr target_addr; + + target_addr = FindOriginalAddress(la, alias_addr); + lnk = AddLink(la, target_addr, dst_addr, alias_addr, + id_alias, NO_DEST_PORT, id_alias, + LINK_ICMP); + } + return (lnk); +} + + +struct alias_link * +FindIcmpOut(struct libalias *la, struct in_addr src_addr, + struct in_addr dst_addr, + u_short id, + int create) +{ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + lnk = FindLinkOut(la, src_addr, dst_addr, + id, NO_DEST_PORT, + LINK_ICMP, 0); + if (lnk == NULL && create) { + struct in_addr alias_addr; + + alias_addr = FindAliasAddress(la, src_addr); + lnk = AddLink(la, src_addr, dst_addr, alias_addr, + id, NO_DEST_PORT, GET_ALIAS_ID, + LINK_ICMP); + } + return (lnk); +} + + +struct alias_link * +FindFragmentIn1(struct libalias *la, struct in_addr dst_addr, + struct in_addr alias_addr, + u_short ip_id) +{ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + lnk = FindLinkIn(la, dst_addr, alias_addr, + NO_DEST_PORT, ip_id, + LINK_FRAGMENT_ID, 0); + + if (lnk == NULL) { + lnk = AddLink(la, la->nullAddress, dst_addr, alias_addr, + NO_SRC_PORT, NO_DEST_PORT, ip_id, + LINK_FRAGMENT_ID); + } + return (lnk); +} + + +struct alias_link * +FindFragmentIn2(struct libalias *la, struct in_addr dst_addr, /* Doesn't add a link if + * one */ + struct in_addr alias_addr, /* is not found. */ + u_short ip_id) +{ + + LIBALIAS_LOCK_ASSERT(la); + return FindLinkIn(la, dst_addr, alias_addr, + NO_DEST_PORT, ip_id, + LINK_FRAGMENT_ID, 0); +} + + +struct alias_link * +AddFragmentPtrLink(struct libalias *la, struct in_addr dst_addr, + u_short ip_id) +{ + + LIBALIAS_LOCK_ASSERT(la); + return AddLink(la, la->nullAddress, dst_addr, la->nullAddress, + NO_SRC_PORT, NO_DEST_PORT, ip_id, + LINK_FRAGMENT_PTR); +} + + +struct alias_link * +FindFragmentPtr(struct libalias *la, struct in_addr dst_addr, + u_short ip_id) +{ + + LIBALIAS_LOCK_ASSERT(la); + return FindLinkIn(la, dst_addr, la->nullAddress, + NO_DEST_PORT, ip_id, + LINK_FRAGMENT_PTR, 0); +} + + +struct alias_link * +FindProtoIn(struct libalias *la, struct in_addr dst_addr, + struct in_addr alias_addr, + u_char proto) +{ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + lnk = FindLinkIn(la, dst_addr, alias_addr, + NO_DEST_PORT, 0, + proto, 1); + + if (lnk == NULL && !(la->packetAliasMode & PKT_ALIAS_DENY_INCOMING)) { + struct in_addr target_addr; + + target_addr = FindOriginalAddress(la, alias_addr); + lnk = AddLink(la, target_addr, dst_addr, alias_addr, + NO_SRC_PORT, NO_DEST_PORT, 0, + proto); + } + return (lnk); +} + + +struct alias_link * +FindProtoOut(struct libalias *la, struct in_addr src_addr, + struct in_addr dst_addr, + u_char proto) +{ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + lnk = FindLinkOut(la, src_addr, dst_addr, + NO_SRC_PORT, NO_DEST_PORT, + proto, 1); + + if (lnk == NULL) { + struct in_addr alias_addr; + + alias_addr = FindAliasAddress(la, src_addr); + lnk = AddLink(la, src_addr, dst_addr, alias_addr, + NO_SRC_PORT, NO_DEST_PORT, 0, + proto); + } + return (lnk); +} + + +struct alias_link * +FindUdpTcpIn(struct libalias *la, struct in_addr dst_addr, + struct in_addr alias_addr, + u_short dst_port, + u_short alias_port, + u_char proto, + int create) +{ + int link_type; + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + switch (proto) { + case IPPROTO_UDP: + link_type = LINK_UDP; + break; + case IPPROTO_TCP: + link_type = LINK_TCP; + break; + default: + return (NULL); + break; + } + + lnk = FindLinkIn(la, dst_addr, alias_addr, + dst_port, alias_port, + link_type, create); + + if (lnk == NULL && create && !(la->packetAliasMode & PKT_ALIAS_DENY_INCOMING)) { + struct in_addr target_addr; + + target_addr = FindOriginalAddress(la, alias_addr); + lnk = AddLink(la, target_addr, dst_addr, alias_addr, + alias_port, dst_port, alias_port, + link_type); + } + return (lnk); +} + + +struct alias_link * +FindUdpTcpOut(struct libalias *la, struct in_addr src_addr, + struct in_addr dst_addr, + u_short src_port, + u_short dst_port, + u_char proto, + int create) +{ + int link_type; + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + switch (proto) { + case IPPROTO_UDP: + link_type = LINK_UDP; + break; + case IPPROTO_TCP: + link_type = LINK_TCP; + break; + default: + return (NULL); + break; + } + + lnk = FindLinkOut(la, src_addr, dst_addr, src_port, dst_port, link_type, create); + + if (lnk == NULL && create) { + struct in_addr alias_addr; + + alias_addr = FindAliasAddress(la, src_addr); + lnk = AddLink(la, src_addr, dst_addr, alias_addr, + src_port, dst_port, GET_ALIAS_PORT, + link_type); + } + return (lnk); +} + + +struct alias_link * +AddPptp(struct libalias *la, struct in_addr src_addr, + struct in_addr dst_addr, + struct in_addr alias_addr, + u_int16_t src_call_id) +{ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + lnk = AddLink(la, src_addr, dst_addr, alias_addr, + src_call_id, 0, GET_ALIAS_PORT, + LINK_PPTP); + + return (lnk); +} + + +struct alias_link * +FindPptpOutByCallId(struct libalias *la, struct in_addr src_addr, + struct in_addr dst_addr, + u_int16_t src_call_id) +{ + u_int i; + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + i = StartPointOut(src_addr, dst_addr, 0, 0, LINK_PPTP); + LIST_FOREACH(lnk, &la->linkTableOut[i], list_out) + if (lnk->link_type == LINK_PPTP && + lnk->src_addr.s_addr == src_addr.s_addr && + lnk->dst_addr.s_addr == dst_addr.s_addr && + lnk->src_port == src_call_id) + break; + + return (lnk); +} + + +struct alias_link * +FindPptpOutByPeerCallId(struct libalias *la, struct in_addr src_addr, + struct in_addr dst_addr, + u_int16_t dst_call_id) +{ + u_int i; + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + i = StartPointOut(src_addr, dst_addr, 0, 0, LINK_PPTP); + LIST_FOREACH(lnk, &la->linkTableOut[i], list_out) + if (lnk->link_type == LINK_PPTP && + lnk->src_addr.s_addr == src_addr.s_addr && + lnk->dst_addr.s_addr == dst_addr.s_addr && + lnk->dst_port == dst_call_id) + break; + + return (lnk); +} + + +struct alias_link * +FindPptpInByCallId(struct libalias *la, struct in_addr dst_addr, + struct in_addr alias_addr, + u_int16_t dst_call_id) +{ + u_int i; + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + i = StartPointIn(alias_addr, 0, LINK_PPTP); + LIST_FOREACH(lnk, &la->linkTableIn[i], list_in) + if (lnk->link_type == LINK_PPTP && + lnk->dst_addr.s_addr == dst_addr.s_addr && + lnk->alias_addr.s_addr == alias_addr.s_addr && + lnk->dst_port == dst_call_id) + break; + + return (lnk); +} + + +struct alias_link * +FindPptpInByPeerCallId(struct libalias *la, struct in_addr dst_addr, + struct in_addr alias_addr, + u_int16_t alias_call_id) +{ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + lnk = FindLinkIn(la, dst_addr, alias_addr, + 0 /* any */ , alias_call_id, + LINK_PPTP, 0); + + + return (lnk); +} + + +struct alias_link * +FindRtspOut(struct libalias *la, struct in_addr src_addr, + struct in_addr dst_addr, + u_short src_port, + u_short alias_port, + u_char proto) +{ + int link_type; + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + switch (proto) { + case IPPROTO_UDP: + link_type = LINK_UDP; + break; + case IPPROTO_TCP: + link_type = LINK_TCP; + break; + default: + return (NULL); + break; + } + + lnk = FindLinkOut(la, src_addr, dst_addr, src_port, 0, link_type, 1); + + if (lnk == NULL) { + struct in_addr alias_addr; + + alias_addr = FindAliasAddress(la, src_addr); + lnk = AddLink(la, src_addr, dst_addr, alias_addr, + src_port, 0, alias_port, + link_type); + } + return (lnk); +} + + +struct in_addr +FindOriginalAddress(struct libalias *la, struct in_addr alias_addr) +{ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + lnk = FindLinkIn(la, la->nullAddress, alias_addr, + 0, 0, LINK_ADDR, 0); + if (lnk == NULL) { + la->newDefaultLink = 1; + if (la->targetAddress.s_addr == INADDR_ANY) + return (alias_addr); + else if (la->targetAddress.s_addr == INADDR_NONE) + return (la->aliasAddress.s_addr != INADDR_ANY) ? + la->aliasAddress : alias_addr; + else + return (la->targetAddress); + } else { + if (lnk->server != NULL) { /* LSNAT link */ + struct in_addr src_addr; + + src_addr = lnk->server->addr; + lnk->server = lnk->server->next; + return (src_addr); + } else if (lnk->src_addr.s_addr == INADDR_ANY) + return (la->aliasAddress.s_addr != INADDR_ANY) ? + la->aliasAddress : alias_addr; + else + return (lnk->src_addr); + } +} + + +struct in_addr +FindAliasAddress(struct libalias *la, struct in_addr original_addr) +{ + struct alias_link *lnk; + + LIBALIAS_LOCK_ASSERT(la); + lnk = FindLinkOut(la, original_addr, la->nullAddress, + 0, 0, LINK_ADDR, 0); + if (lnk == NULL) { + return (la->aliasAddress.s_addr != INADDR_ANY) ? + la->aliasAddress : original_addr; + } else { + if (lnk->alias_addr.s_addr == INADDR_ANY) + return (la->aliasAddress.s_addr != INADDR_ANY) ? + la->aliasAddress : original_addr; + else + return (lnk->alias_addr); + } +} + + +/* External routines for getting or changing link data + (external to alias_db.c, but internal to alias*.c) + + SetFragmentData(), GetFragmentData() + SetFragmentPtr(), GetFragmentPtr() + SetStateIn(), SetStateOut(), GetStateIn(), GetStateOut() + GetOriginalAddress(), GetDestAddress(), GetAliasAddress() + GetOriginalPort(), GetAliasPort() + SetAckModified(), GetAckModified() + GetDeltaAckIn(), GetDeltaSeqOut(), AddSeq() + SetProtocolFlags(), GetProtocolFlags() + SetDestCallId() +*/ + + +void +SetFragmentAddr(struct alias_link *lnk, struct in_addr src_addr) +{ + lnk->data.frag_addr = src_addr; +} + + +void +GetFragmentAddr(struct alias_link *lnk, struct in_addr *src_addr) +{ + *src_addr = lnk->data.frag_addr; +} + + +void +SetFragmentPtr(struct alias_link *lnk, char *fptr) +{ + lnk->data.frag_ptr = fptr; +} + + +void +GetFragmentPtr(struct alias_link *lnk, char **fptr) +{ + *fptr = lnk->data.frag_ptr; +} + + +void +SetStateIn(struct alias_link *lnk, int state) +{ + /* TCP input state */ + switch (state) { + case ALIAS_TCP_STATE_DISCONNECTED: + if (lnk->data.tcp->state.out != ALIAS_TCP_STATE_CONNECTED) + lnk->expire_time = TCP_EXPIRE_DEAD; + else + lnk->expire_time = TCP_EXPIRE_SINGLEDEAD; + break; + case ALIAS_TCP_STATE_CONNECTED: + if (lnk->data.tcp->state.out == ALIAS_TCP_STATE_CONNECTED) + lnk->expire_time = TCP_EXPIRE_CONNECTED; + break; + default: +#ifdef _KERNEL + panic("libalias:SetStateIn() unknown state"); +#else + abort(); +#endif + } + lnk->data.tcp->state.in = state; +} + + +void +SetStateOut(struct alias_link *lnk, int state) +{ + /* TCP output state */ + switch (state) { + case ALIAS_TCP_STATE_DISCONNECTED: + if (lnk->data.tcp->state.in != ALIAS_TCP_STATE_CONNECTED) + lnk->expire_time = TCP_EXPIRE_DEAD; + else + lnk->expire_time = TCP_EXPIRE_SINGLEDEAD; + break; + case ALIAS_TCP_STATE_CONNECTED: + if (lnk->data.tcp->state.in == ALIAS_TCP_STATE_CONNECTED) + lnk->expire_time = TCP_EXPIRE_CONNECTED; + break; + default: +#ifdef _KERNEL + panic("libalias:SetStateOut() unknown state"); +#else + abort(); +#endif + } + lnk->data.tcp->state.out = state; +} + + +int +GetStateIn(struct alias_link *lnk) +{ + /* TCP input state */ + return (lnk->data.tcp->state.in); +} + + +int +GetStateOut(struct alias_link *lnk) +{ + /* TCP output state */ + return (lnk->data.tcp->state.out); +} + + +struct in_addr +GetOriginalAddress(struct alias_link *lnk) +{ + if (lnk->src_addr.s_addr == INADDR_ANY) + return (lnk->la->aliasAddress); + else + return (lnk->src_addr); +} + + +struct in_addr +GetDestAddress(struct alias_link *lnk) +{ + return (lnk->dst_addr); +} + + +struct in_addr +GetAliasAddress(struct alias_link *lnk) +{ + if (lnk->alias_addr.s_addr == INADDR_ANY) + return (lnk->la->aliasAddress); + else + return (lnk->alias_addr); +} + + +struct in_addr +GetDefaultAliasAddress(struct libalias *la) +{ + + LIBALIAS_LOCK_ASSERT(la); + return (la->aliasAddress); +} + + +void +SetDefaultAliasAddress(struct libalias *la, struct in_addr alias_addr) +{ + + LIBALIAS_LOCK_ASSERT(la); + la->aliasAddress = alias_addr; +} + + +u_short +GetOriginalPort(struct alias_link *lnk) +{ + return (lnk->src_port); +} + + +u_short +GetAliasPort(struct alias_link *lnk) +{ + return (lnk->alias_port); +} + +#ifndef NO_FW_PUNCH +static u_short +GetDestPort(struct alias_link *lnk) +{ + return (lnk->dst_port); +} + +#endif + +void +SetAckModified(struct alias_link *lnk) +{ +/* Indicate that ACK numbers have been modified in a TCP connection */ + lnk->data.tcp->state.ack_modified = 1; +} + + +struct in_addr +GetProxyAddress(struct alias_link *lnk) +{ + return (lnk->proxy_addr); +} + + +void +SetProxyAddress(struct alias_link *lnk, struct in_addr addr) +{ + lnk->proxy_addr = addr; +} + + +u_short +GetProxyPort(struct alias_link *lnk) +{ + return (lnk->proxy_port); +} + + +void +SetProxyPort(struct alias_link *lnk, u_short port) +{ + lnk->proxy_port = port; +} + + +int +GetAckModified(struct alias_link *lnk) +{ +/* See if ACK numbers have been modified */ + return (lnk->data.tcp->state.ack_modified); +} + + +int +GetDeltaAckIn(struct ip *pip, struct alias_link *lnk) +{ +/* +Find out how much the ACK number has been altered for an incoming +TCP packet. To do this, a circular list of ACK numbers where the TCP +packet size was altered is searched. +*/ + + int i; + struct tcphdr *tc; + int delta, ack_diff_min; + u_long ack; + + tc = ip_next(pip); + ack = tc->th_ack; + + delta = 0; + ack_diff_min = -1; + for (i = 0; i < N_LINK_TCP_DATA; i++) { + struct ack_data_record x; + + x = lnk->data.tcp->ack[i]; + if (x.active == 1) { + int ack_diff; + + ack_diff = SeqDiff(x.ack_new, ack); + if (ack_diff >= 0) { + if (ack_diff_min >= 0) { + if (ack_diff < ack_diff_min) { + delta = x.delta; + ack_diff_min = ack_diff; + } + } else { + delta = x.delta; + ack_diff_min = ack_diff; + } + } + } + } + return (delta); +} + + +int +GetDeltaSeqOut(struct ip *pip, struct alias_link *lnk) +{ +/* +Find out how much the sequence number has been altered for an outgoing +TCP packet. To do this, a circular list of ACK numbers where the TCP +packet size was altered is searched. +*/ + + int i; + struct tcphdr *tc; + int delta, seq_diff_min; + u_long seq; + + tc = ip_next(pip); + seq = tc->th_seq; + + delta = 0; + seq_diff_min = -1; + for (i = 0; i < N_LINK_TCP_DATA; i++) { + struct ack_data_record x; + + x = lnk->data.tcp->ack[i]; + if (x.active == 1) { + int seq_diff; + + seq_diff = SeqDiff(x.ack_old, seq); + if (seq_diff >= 0) { + if (seq_diff_min >= 0) { + if (seq_diff < seq_diff_min) { + delta = x.delta; + seq_diff_min = seq_diff; + } + } else { + delta = x.delta; + seq_diff_min = seq_diff; + } + } + } + } + return (delta); +} + + +void +AddSeq(struct ip *pip, struct alias_link *lnk, int delta) +{ +/* +When a TCP packet has been altered in length, save this +information in a circular list. If enough packets have +been altered, then this list will begin to overwrite itself. +*/ + + struct tcphdr *tc; + struct ack_data_record x; + int hlen, tlen, dlen; + int i; + + tc = ip_next(pip); + + hlen = (pip->ip_hl + tc->th_off) << 2; + tlen = ntohs(pip->ip_len); + dlen = tlen - hlen; + + x.ack_old = htonl(ntohl(tc->th_seq) + dlen); + x.ack_new = htonl(ntohl(tc->th_seq) + dlen + delta); + x.delta = delta; + x.active = 1; + + i = lnk->data.tcp->state.index; + lnk->data.tcp->ack[i] = x; + + i++; + if (i == N_LINK_TCP_DATA) + lnk->data.tcp->state.index = 0; + else + lnk->data.tcp->state.index = i; +} + +void +SetExpire(struct alias_link *lnk, int expire) +{ + if (expire == 0) { + lnk->flags &= ~LINK_PERMANENT; + DeleteLink(lnk); + } else if (expire == -1) { + lnk->flags |= LINK_PERMANENT; + } else if (expire > 0) { + lnk->expire_time = expire; + } else { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, "PacketAlias/SetExpire(): "); + fprintf(stderr, "error in expire parameter\n"); +#endif + } +} + +void +ClearCheckNewLink(struct libalias *la) +{ + + LIBALIAS_LOCK_ASSERT(la); + la->newDefaultLink = 0; +} + +void +SetProtocolFlags(struct alias_link *lnk, int pflags) +{ + + lnk->pflags = pflags;; +} + +int +GetProtocolFlags(struct alias_link *lnk) +{ + + return (lnk->pflags); +} + +void +SetDestCallId(struct alias_link *lnk, u_int16_t cid) +{ + struct libalias *la = lnk->la; + + LIBALIAS_LOCK_ASSERT(la); + la->deleteAllLinks = 1; + ReLink(lnk, lnk->src_addr, lnk->dst_addr, lnk->alias_addr, + lnk->src_port, cid, lnk->alias_port, lnk->link_type); + la->deleteAllLinks = 0; +} + + +/* Miscellaneous Functions + + HouseKeeping() + InitPacketAliasLog() + UninitPacketAliasLog() +*/ + +/* + Whenever an outgoing or incoming packet is handled, HouseKeeping() + is called to find and remove timed-out aliasing links. Logic exists + to sweep through the entire table and linked list structure + every 60 seconds. + + (prototype in alias_local.h) +*/ + +void +HouseKeeping(struct libalias *la) +{ + int i, n; +#ifndef VBOX +#ifndef _KERNEL + struct timeval tv; + struct timezone tz; +#endif +#endif /* !VBOX */ + + LIBALIAS_LOCK_ASSERT(la); + /* + * Save system time (seconds) in global variable timeStamp for use + * by other functions. This is done so as not to unnecessarily + * waste timeline by making system calls. + */ +#ifndef VBOX +#ifdef _KERNEL + la->timeStamp = time_uptime; +#else + gettimeofday(&tv, &tz); + la->timeStamp = tv.tv_sec; +#endif +#else /* VBOX */ + la->timeStamp = la->curtime / 1000; /* NB: la->pData->curtime (msec) */ +#endif + + /* Compute number of spokes (output table link chains) to cover */ + n = LINK_TABLE_OUT_SIZE * (la->timeStamp - la->lastCleanupTime); + n /= ALIAS_CLEANUP_INTERVAL_SECS; + + /* Handle different cases */ + if (n > 0) { + if (n > ALIAS_CLEANUP_MAX_SPOKES) + n = ALIAS_CLEANUP_MAX_SPOKES; + la->lastCleanupTime = la->timeStamp; + for (i = 0; i < n; i++) + IncrementalCleanup(la); + } else if (n < 0) { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, "PacketAlias/HouseKeeping(): "); + fprintf(stderr, "something unexpected in time values\n"); +#endif + la->lastCleanupTime = la->timeStamp; + } +} + +/* Init the log file and enable logging */ +static int +InitPacketAliasLog(struct libalias *la) +{ + + LIBALIAS_LOCK_ASSERT(la); + if (~la->packetAliasMode & PKT_ALIAS_LOG) { +#ifndef VBOX +#ifdef _KERNEL + if ((la->logDesc = malloc(LIBALIAS_BUF_SIZE))) + ; +#else + if ((la->logDesc = fopen("/var/log/alias.log", "w"))) + fprintf(la->logDesc, "PacketAlias/InitPacketAliasLog: Packet alias logging enabled.\n"); +#endif + else + return (ENOMEM); /* log initialization failed */ +#else + Log2(("NAT: PacketAlias/InitPacketAliasLog: Packet alias logging enabled.\n")); + la->logDesc = (void *)(uintptr_t)1; /* XXX: in vbox we don't use this param */ +#endif + la->packetAliasMode |= PKT_ALIAS_LOG; + } + + return (1); +} + +/* Close the log-file and disable logging. */ +static void +UninitPacketAliasLog(struct libalias *la) +{ + + LIBALIAS_LOCK_ASSERT(la); + if (la->logDesc) { +#ifndef VBOX +#ifdef _KERNEL + free(la->logDesc); +#else + fclose(la->logDesc); +#endif +#endif /* !VBOX */ + la->logDesc = NULL; + } + la->packetAliasMode &= ~PKT_ALIAS_LOG; +} + +/* Outside world interfaces + +-- "outside world" means other than alias*.c routines -- + + PacketAliasRedirectPort() + PacketAliasAddServer() + PacketAliasRedirectProto() + PacketAliasRedirectAddr() + PacketAliasRedirectDynamic() + PacketAliasRedirectDelete() + PacketAliasSetAddress() + PacketAliasInit() + PacketAliasUninit() + PacketAliasSetMode() + +(prototypes in alias.h) +*/ + +/* Redirection from a specific public addr:port to a + private addr:port */ +struct alias_link * +LibAliasRedirectPort(struct libalias *la, struct in_addr src_addr, u_short src_port, + struct in_addr dst_addr, u_short dst_port, + struct in_addr alias_addr, u_short alias_port, + u_char proto) +{ + int link_type; + struct alias_link *lnk; + + LIBALIAS_LOCK(la); + switch (proto) { + case IPPROTO_UDP: + link_type = LINK_UDP; + break; + case IPPROTO_TCP: + link_type = LINK_TCP; + break; + default: +#ifdef LIBALIAS_DEBUG + fprintf(stderr, "PacketAliasRedirectPort(): "); + fprintf(stderr, "only TCP and UDP protocols allowed\n"); +#endif + lnk = NULL; + goto getout; + } + + lnk = AddLink(la, src_addr, dst_addr, alias_addr, + src_port, dst_port, alias_port, + link_type); + + if (lnk != NULL) { + lnk->flags |= LINK_PERMANENT; + } +#ifdef LIBALIAS_DEBUG + else { + fprintf(stderr, "PacketAliasRedirectPort(): " + "call to AddLink() failed\n"); + } +#endif + +getout: + LIBALIAS_UNLOCK(la); + return (lnk); +} + +/* Add server to the pool of servers */ +int +LibAliasAddServer(struct libalias *la, struct alias_link *lnk, struct in_addr addr, u_short port) +{ + struct server *server; + int res; + + LIBALIAS_LOCK(la); + (void)la; + + server = malloc(sizeof(struct server)); + + if (server != NULL) { + struct server *head; + + server->addr = addr; + server->port = port; + + head = lnk->server; + if (head == NULL) + server->next = server; + else { + struct server *s; + + for (s = head; s->next != head; s = s->next); + s->next = server; + server->next = head; + } + lnk->server = server; + res = 0; + } else + res = -1; + + LIBALIAS_UNLOCK(la); + return (res); +} + +/* Redirect packets of a given IP protocol from a specific + public address to a private address */ +struct alias_link * +LibAliasRedirectProto(struct libalias *la, struct in_addr src_addr, + struct in_addr dst_addr, + struct in_addr alias_addr, + u_char proto) +{ + struct alias_link *lnk; + + LIBALIAS_LOCK(la); + lnk = AddLink(la, src_addr, dst_addr, alias_addr, + NO_SRC_PORT, NO_DEST_PORT, 0, + proto); + + if (lnk != NULL) { + lnk->flags |= LINK_PERMANENT; + } +#ifdef LIBALIAS_DEBUG + else { + fprintf(stderr, "PacketAliasRedirectProto(): " + "call to AddLink() failed\n"); + } +#endif + + LIBALIAS_UNLOCK(la); + return (lnk); +} + +/* Static address translation */ +struct alias_link * +LibAliasRedirectAddr(struct libalias *la, struct in_addr src_addr, + struct in_addr alias_addr) +{ + struct alias_link *lnk; + + LIBALIAS_LOCK(la); + lnk = AddLink(la, src_addr, la->nullAddress, alias_addr, + 0, 0, 0, + LINK_ADDR); + + if (lnk != NULL) { + lnk->flags |= LINK_PERMANENT; + } +#ifdef LIBALIAS_DEBUG + else { + fprintf(stderr, "PacketAliasRedirectAddr(): " + "call to AddLink() failed\n"); + } +#endif + + LIBALIAS_UNLOCK(la); + return (lnk); +} + + +/* Mark the aliasing link dynamic */ +int +LibAliasRedirectDynamic(struct libalias *la, struct alias_link *lnk) +{ + int res; + + LIBALIAS_LOCK(la); + (void)la; + + if (lnk->flags & LINK_PARTIALLY_SPECIFIED) + res = -1; + else { + lnk->flags &= ~LINK_PERMANENT; + res = 0; + } + LIBALIAS_UNLOCK(la); + return (res); +} + + +void +LibAliasRedirectDelete(struct libalias *la, struct alias_link *lnk) +{ +/* This is a dangerous function to put in the API, + because an invalid pointer can crash the program. */ + + LIBALIAS_LOCK(la); + la->deleteAllLinks = 1; + DeleteLink(lnk); + la->deleteAllLinks = 0; + LIBALIAS_UNLOCK(la); +} + + +void +LibAliasSetAddress(struct libalias *la, struct in_addr addr) +{ + + LIBALIAS_LOCK(la); + if (la->packetAliasMode & PKT_ALIAS_RESET_ON_ADDR_CHANGE + && la->aliasAddress.s_addr != addr.s_addr) + CleanupAliasData(la); + + la->aliasAddress = addr; + LIBALIAS_UNLOCK(la); +} + + +void +LibAliasSetTarget(struct libalias *la, struct in_addr target_addr) +{ + + LIBALIAS_LOCK(la); + la->targetAddress = target_addr; + LIBALIAS_UNLOCK(la); +} + +#ifndef VBOX +static void +finishoff(void) +{ + + while (!LIST_EMPTY(&instancehead)) + LibAliasUninit(LIST_FIRST(&instancehead)); +} +#endif + +struct libalias * +#ifndef VBOX +LibAliasInit(struct libalias *la) +#else +LibAliasInit(PNATState pData, struct libalias *la) +#endif +{ + int i; +#ifndef VBOX +#ifndef _KERNEL + struct timeval tv; + struct timezone tz; +#endif +#endif /* !VBOX */ + + if (la == NULL) { + la = calloc(sizeof *la, 1); + if (la == NULL) + return (la); + +#ifndef VBOX +#ifndef _KERNEL /* kernel cleans up on module unload */ + if (LIST_EMPTY(&instancehead)) + atexit(finishoff); +#endif +#endif /* !VBOX */ + LIST_INSERT_HEAD(&instancehead, la, instancelist); + +#ifndef VBOX +#ifdef _KERNEL + la->timeStamp = time_uptime; + la->lastCleanupTime = time_uptime; +#else + gettimeofday(&tv, &tz); + la->timeStamp = tv.tv_sec; + la->lastCleanupTime = tv.tv_sec; +#endif +#else /* VBOX */ + la->pData = pData; + la->timeStamp = la->curtime / 1000; /* NB: la->pData->curtime (msec) */ + la->lastCleanupTime = la->timeStamp; +#endif /* VBOX */ + + for (i = 0; i < LINK_TABLE_OUT_SIZE; i++) + LIST_INIT(&la->linkTableOut[i]); + for (i = 0; i < LINK_TABLE_IN_SIZE; i++) + LIST_INIT(&la->linkTableIn[i]); + LIBALIAS_LOCK_INIT(la); + LIBALIAS_LOCK(la); + } else { + LIBALIAS_LOCK(la); + la->deleteAllLinks = 1; + CleanupAliasData(la); + la->deleteAllLinks = 0; + } + + la->aliasAddress.s_addr = INADDR_ANY; + la->targetAddress.s_addr = INADDR_ANY; + + la->icmpLinkCount = 0; + la->udpLinkCount = 0; + la->tcpLinkCount = 0; + la->pptpLinkCount = 0; + la->protoLinkCount = 0; + la->fragmentIdLinkCount = 0; + la->fragmentPtrLinkCount = 0; + la->sockCount = 0; + + la->cleanupIndex = 0; + + la->packetAliasMode = PKT_ALIAS_SAME_PORTS +#ifndef NO_USE_SOCKETS + | PKT_ALIAS_USE_SOCKETS +#endif + | PKT_ALIAS_RESET_ON_ADDR_CHANGE; +#ifndef NO_FW_PUNCH + la->fireWallFD = -1; +#endif +#ifndef _KERNEL + LibAliasRefreshModules(); +#endif + LIBALIAS_UNLOCK(la); + return (la); +} + +void +LibAliasUninit(struct libalias *la) +{ + + LIBALIAS_LOCK(la); + la->deleteAllLinks = 1; + CleanupAliasData(la); + la->deleteAllLinks = 0; + UninitPacketAliasLog(la); +#ifndef NO_FW_PUNCH + UninitPunchFW(la); +#endif + LIST_REMOVE(la, instancelist); + LIBALIAS_UNLOCK(la); + LIBALIAS_LOCK_DESTROY(la); + free(la); +} + +/* Change mode for some operations */ +unsigned int +LibAliasSetMode( + struct libalias *la, + unsigned int flags, /* Which state to bring flags to */ + unsigned int mask /* Mask of which flags to affect (use 0 to + * do a probe for flag values) */ +) +{ + int res = -1; + + LIBALIAS_LOCK(la); +/* Enable logging? */ + if (flags & mask & PKT_ALIAS_LOG) { + /* Do the enable */ + if (InitPacketAliasLog(la) == ENOMEM) + goto getout; + } else +/* _Disable_ logging? */ + if (~flags & mask & PKT_ALIAS_LOG) { + UninitPacketAliasLog(la); + } +#ifndef NO_FW_PUNCH +/* Start punching holes in the firewall? */ + if (flags & mask & PKT_ALIAS_PUNCH_FW) { + InitPunchFW(la); + } else +/* Stop punching holes in the firewall? */ + if (~flags & mask & PKT_ALIAS_PUNCH_FW) { + UninitPunchFW(la); + } +#endif + +/* Other flags can be set/cleared without special action */ + la->packetAliasMode = (flags & mask) | (la->packetAliasMode & ~mask); + res = la->packetAliasMode; +getout: + LIBALIAS_UNLOCK(la); + return (res); +} + + +int +LibAliasCheckNewLink(struct libalias *la) +{ + int res; + + LIBALIAS_LOCK(la); + res = la->newDefaultLink; + LIBALIAS_UNLOCK(la); + return (res); +} + + +#ifndef NO_FW_PUNCH + +/***************** + Code to support firewall punching. This shouldn't really be in this + file, but making variables global is evil too. + ****************/ + +/* Firewall include files */ +#include <net/if.h> +#include <netinet/ip_fw.h> +#include <string.h> +#include <err.h> + +/* + * helper function, updates the pointer to cmd with the length + * of the current command, and also cleans up the first word of + * the new command in case it has been clobbered before. + */ +static ipfw_insn * +next_cmd(ipfw_insn * cmd) +{ + cmd += F_LEN(cmd); + bzero(cmd, sizeof(*cmd)); + return (cmd); +} + +/* + * A function to fill simple commands of size 1. + * Existing flags are preserved. + */ +static ipfw_insn * +fill_cmd(ipfw_insn * cmd, enum ipfw_opcodes opcode, int size, + int flags, u_int16_t arg) +{ + cmd->opcode = opcode; + cmd->len = ((cmd->len | flags) & (F_NOT | F_OR)) | (size & F_LEN_MASK); + cmd->arg1 = arg; + return next_cmd(cmd); +} + +static ipfw_insn * +fill_ip(ipfw_insn * cmd1, enum ipfw_opcodes opcode, u_int32_t addr) +{ + ipfw_insn_ip *cmd = (ipfw_insn_ip *) cmd1; + + cmd->addr.s_addr = addr; + return fill_cmd(cmd1, opcode, F_INSN_SIZE(ipfw_insn_u32), 0, 0); +} + +static ipfw_insn * +fill_one_port(ipfw_insn * cmd1, enum ipfw_opcodes opcode, u_int16_t port) +{ + ipfw_insn_u16 *cmd = (ipfw_insn_u16 *) cmd1; + + cmd->ports[0] = cmd->ports[1] = port; + return fill_cmd(cmd1, opcode, F_INSN_SIZE(ipfw_insn_u16), 0, 0); +} + +static int +fill_rule(void *buf, int bufsize, int rulenum, + enum ipfw_opcodes action, int proto, + struct in_addr sa, u_int16_t sp, struct in_addr da, u_int16_t dp) +{ + struct ip_fw *rule = (struct ip_fw *)buf; + ipfw_insn *cmd = (ipfw_insn *) rule->cmd; + + bzero(buf, bufsize); + rule->rulenum = rulenum; + + cmd = fill_cmd(cmd, O_PROTO, F_INSN_SIZE(ipfw_insn), 0, proto); + cmd = fill_ip(cmd, O_IP_SRC, sa.s_addr); + cmd = fill_one_port(cmd, O_IP_SRCPORT, sp); + cmd = fill_ip(cmd, O_IP_DST, da.s_addr); + cmd = fill_one_port(cmd, O_IP_DSTPORT, dp); + + rule->act_ofs = (u_int32_t *) cmd - (u_int32_t *) rule->cmd; + cmd = fill_cmd(cmd, action, F_INSN_SIZE(ipfw_insn), 0, 0); + + rule->cmd_len = (u_int32_t *) cmd - (u_int32_t *) rule->cmd; + + return ((char *)cmd - (char *)buf); +} + +static void ClearAllFWHoles(struct libalias *la); + + +#define fw_setfield(la, field, num) \ +do { \ + (field)[(num) - la->fireWallBaseNum] = 1; \ +} /*lint -save -e717 */ while(0)/* lint -restore */ + +#define fw_clrfield(la, field, num) \ +do { \ + (field)[(num) - la->fireWallBaseNum] = 0; \ +} /*lint -save -e717 */ while(0)/* lint -restore */ + +#define fw_tstfield(la, field, num) ((field)[(num) - la->fireWallBaseNum]) + +static void +InitPunchFW(struct libalias *la) +{ + + LIBALIAS_LOCK_ASSERT(la); + la->fireWallField = malloc(la->fireWallNumNums); + if (la->fireWallField) { + memset(la->fireWallField, 0, la->fireWallNumNums); + if (la->fireWallFD < 0) { + la->fireWallFD = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); + } + ClearAllFWHoles(la); + la->fireWallActiveNum = la->fireWallBaseNum; + } +} + +static void +UninitPunchFW(struct libalias *la) +{ + + LIBALIAS_LOCK_ASSERT(la); + ClearAllFWHoles(la); + if (la->fireWallFD >= 0) + closesocket(la->fireWallFD); + la->fireWallFD = -1; + if (la->fireWallField) + free(la->fireWallField); + la->fireWallField = NULL; + la->packetAliasMode &= ~PKT_ALIAS_PUNCH_FW; +} + +/* Make a certain link go through the firewall */ +void +PunchFWHole(struct alias_link *lnk) +{ + struct libalias *la; + int r; /* Result code */ + struct ip_fw rule; /* On-the-fly built rule */ + int fwhole; /* Where to punch hole */ + + LIBALIAS_LOCK_ASSERT(la); + la = lnk->la; + +/* Don't do anything unless we are asked to */ + if (!(la->packetAliasMode & PKT_ALIAS_PUNCH_FW) || + la->fireWallFD < 0 || + lnk->link_type != LINK_TCP) + return; + + memset(&rule, 0, sizeof rule); + +/** Build rule **/ + + /* Find empty slot */ + for (fwhole = la->fireWallActiveNum; + fwhole < la->fireWallBaseNum + la->fireWallNumNums && + fw_tstfield(la, la->fireWallField, fwhole); + fwhole++); + if (fwhole == la->fireWallBaseNum + la->fireWallNumNums) { + for (fwhole = la->fireWallBaseNum; + fwhole < la->fireWallActiveNum && + fw_tstfield(la, la->fireWallField, fwhole); + fwhole++); + if (fwhole == la->fireWallActiveNum) { + /* No rule point empty - we can't punch more holes. */ + la->fireWallActiveNum = la->fireWallBaseNum; +#ifdef LIBALIAS_DEBUG + fprintf(stderr, "libalias: Unable to create firewall hole!\n"); +#endif + return; + } + } + /* Start next search at next position */ + la->fireWallActiveNum = fwhole + 1; + + /* + * generate two rules of the form + * + * add fwhole accept tcp from OAddr OPort to DAddr DPort add fwhole + * accept tcp from DAddr DPort to OAddr OPort + */ + if (GetOriginalPort(lnk) != 0 && GetDestPort(lnk) != 0) { + u_int32_t rulebuf[255]; + int i; + + i = fill_rule(rulebuf, sizeof(rulebuf), fwhole, + O_ACCEPT, IPPROTO_TCP, + GetOriginalAddress(lnk), ntohs(GetOriginalPort(lnk)), + GetDestAddress(lnk), ntohs(GetDestPort(lnk))); + r = setsockopt(la->fireWallFD, IPPROTO_IP, IP_FW_ADD, rulebuf, i); + if (r) + err(1, "alias punch inbound(1) setsockopt(IP_FW_ADD)"); + + i = fill_rule(rulebuf, sizeof(rulebuf), fwhole, + O_ACCEPT, IPPROTO_TCP, + GetDestAddress(lnk), ntohs(GetDestPort(lnk)), + GetOriginalAddress(lnk), ntohs(GetOriginalPort(lnk))); + r = setsockopt(la->fireWallFD, IPPROTO_IP, IP_FW_ADD, rulebuf, i); + if (r) + err(1, "alias punch inbound(2) setsockopt(IP_FW_ADD)"); + } + +/* Indicate hole applied */ + lnk->data.tcp->fwhole = fwhole; + fw_setfield(la, la->fireWallField, fwhole); +} + +/* Remove a hole in a firewall associated with a particular alias + lnk. Calling this too often is harmless. */ +static void +ClearFWHole(struct alias_link *lnk) +{ + struct libalias *la; + + LIBALIAS_LOCK_ASSERT(la); + la = lnk->la; + if (lnk->link_type == LINK_TCP) { + int fwhole = lnk->data.tcp->fwhole; /* Where is the firewall + * hole? */ + struct ip_fw rule; + + if (fwhole < 0) + return; + + memset(&rule, 0, sizeof rule); /* useless for ipfw2 */ + while (!setsockopt(la->fireWallFD, IPPROTO_IP, IP_FW_DEL, + &fwhole, sizeof fwhole)); + fw_clrfield(la, la->fireWallField, fwhole); + lnk->data.tcp->fwhole = -1; + } +} + +/* Clear out the entire range dedicated to firewall holes. */ +static void +ClearAllFWHoles(struct libalias *la) +{ + struct ip_fw rule; /* On-the-fly built rule */ + int i; + + LIBALIAS_LOCK_ASSERT(la); + if (la->fireWallFD < 0) + return; + + memset(&rule, 0, sizeof rule); + for (i = la->fireWallBaseNum; i < la->fireWallBaseNum + la->fireWallNumNums; i++) { + int r = i; + + while (!setsockopt(la->fireWallFD, IPPROTO_IP, IP_FW_DEL, &r, sizeof r)); + } + /* XXX: third arg correct here ? /phk */ + memset(la->fireWallField, 0, la->fireWallNumNums); +} + +#endif + +void +LibAliasSetFWBase(struct libalias *la, unsigned int base, unsigned int num) +{ + +#ifdef VBOX + NOREF(la); + NOREF(base); + NOREF(num); +#endif + LIBALIAS_LOCK(la); +#ifndef NO_FW_PUNCH + la->fireWallBaseNum = base; + la->fireWallNumNums = num; +#endif + LIBALIAS_UNLOCK(la); +} + +void +LibAliasSetSkinnyPort(struct libalias *la, unsigned int port) +{ + + LIBALIAS_LOCK(la); + la->skinnyPort = port; + LIBALIAS_UNLOCK(la); +} diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_dummy.c b/src/VBox/Devices/Network/slirp/libalias/alias_dummy.c new file mode 100644 index 00000000..31220c54 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_dummy.c @@ -0,0 +1,153 @@ +/*- + * Copyright (c) 2005 Paolo Pisati <piso@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_dummy.c,v 1.1.8.1 2009/04/15 03:14:26 kensmith Exp $"); + +/* + * Alias_dummy is just an empty skeleton used to demostrate how to write + * a module for libalias, that will run unalterated in userland or in + * kernel land. + */ + +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/module.h> +#else +#include <errno.h> +#include <sys/types.h> +#include <stdio.h> +#endif + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/udp.h> + +#ifdef _KERNEL +#include <netinet/libalias/alias_local.h> +#include <netinet/libalias/alias_mod.h> +#else +#include "alias_local.h" +#include "alias_mod.h" +#endif + +static void +AliasHandleDummy(struct libalias *la, struct ip *ip, struct alias_data *ah); + +static int +fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + /* + * Check here all the data that will be used later, if any field + * is empy/NULL, return a -1 value. + */ + if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || + ah->maxpktsize == 0) + return (-1); + /* + * Fingerprint the incoming packet, if it matches any conditions + * return an OK value. + */ + if (ntohs(*ah->dport) == 123 + || ntohs(*ah->sport) == 456) + return (0); /* I know how to handle it. */ + return (-1); /* I don't recognize this packet. */ +} + +/* + * Wrap in this general purpose function, the real function used to alias the + * packets. + */ + +static int +protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + AliasHandleDummy(la, pip, ah); + return (0); +} + +/* + * NOTA BENE: the next variable MUST NOT be renamed in any case if you want + * your module to work in userland, cause it's used to find and use all + * the protocol handlers present in every module. + * So WATCH OUT, your module needs this variables and it needs it with + * ITS EXACT NAME: handlers. + */ + +struct proto_handler handlers [] = { + { + .pri = 666, + .dir = IN|OUT, + .proto = UDP|TCP, + .fingerprint = &fingerprint, + .protohandler = &protohandler + }, + { EOH } +}; + +static int +mod_handler(module_t mod, int type, void *data) +{ + int error; + + switch (type) { + case MOD_LOAD: + error = 0; + LibAliasAttachHandlers(handlers); + break; + case MOD_UNLOAD: + error = 0; + LibAliasDetachHandlers(handlers); + break; + default: + error = EINVAL; + } + return (error); +} + +#ifdef _KERNEL +static +#endif +moduledata_t alias_mod = { + "alias_dummy", mod_handler, NULL +}; + +#ifdef _KERNEL +DECLARE_MODULE(alias_dummy, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND); +MODULE_VERSION(alias_dummy, 1); +MODULE_DEPEND(alias_dummy, libalias, 1, 1, 1); +#endif + +static void +AliasHandleDummy(struct libalias *la, struct ip *ip, struct alias_data *ah) +{ + ; /* Dummy. */ +} + diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_ftp.c b/src/VBox/Devices/Network/slirp/libalias/alias_ftp.c new file mode 100644 index 00000000..afbd70d9 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_ftp.c @@ -0,0 +1,773 @@ +/*- + * Copyright (c) 2001 Charles Mott <cm@linktel.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef VBOX +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_ftp.c,v 1.29.2.1.4.1 2009/04/15 03:14:26 kensmith Exp $"); + +/* + Alias_ftp.c performs special processing for FTP sessions under + TCP. Specifically, when a PORT/EPRT command from the client + side or 227/229 reply from the server is sent, it is intercepted + and modified. The address is changed to the gateway machine + and an aliasing port is used. + + For this routine to work, the message must fit entirely into a + single TCP packet. This is typically the case, but exceptions + can easily be envisioned under the actual specifications. + + Probably the most troubling aspect of the approach taken here is + that the new message will typically be a different length, and + this causes a certain amount of bookkeeping to keep track of the + changes of sequence and acknowledgment numbers, since the client + machine is totally unaware of the modification to the TCP stream. + + + References: RFC 959, RFC 2428. + + Initial version: August, 1996 (cjm) + + Version 1.6 + Brian Somers and Martin Renters identified an IP checksum + error for modified IP packets. + + Version 1.7: January 9, 1996 (cjm) + Differential checksum computation for change + in IP packet length. + + Version 2.1: May, 1997 (cjm) + Very minor changes to conform with + local/global/function naming conventions + within the packet aliasing module. + + Version 3.1: May, 2000 (eds) + Add support for passive mode, alias the 227 replies. + + See HISTORY file for record of revisions. +*/ + +/* Includes */ +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/ctype.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/module.h> +#else +#include <errno.h> +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#endif + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/tcp.h> + +#ifdef _KERNEL +#include <netinet/libalias/alias.h> +#include <netinet/libalias/alias_local.h> +#include <netinet/libalias/alias_mod.h> +#else +#include "alias_local.h" +#include "alias_mod.h" +#endif +#else /* VBOX */ +# include <iprt/ctype.h> +# include <slirp.h> +# include "alias_local.h" +# include "alias_mod.h" +# define isspace(ch) RT_C_IS_SPACE(ch) +# define isdigit(ch) RT_C_IS_DIGIT(ch) +#endif /* VBOX */ + +#define FTP_CONTROL_PORT_NUMBER 21 + +static void +AliasHandleFtpOut(struct libalias *, struct ip *, struct alias_link *, + int maxpacketsize); + +static int +fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + +#ifdef VBOX + NOREF(la); + NOREF(pip); +#endif + if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || + ah->maxpktsize == 0) + return (-1); + if (ntohs(*ah->dport) == FTP_CONTROL_PORT_NUMBER + || ntohs(*ah->sport) == FTP_CONTROL_PORT_NUMBER) + return (0); + return (-1); +} + +static int +protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + AliasHandleFtpOut(la, pip, ah->lnk, ah->maxpktsize); + return (0); +} + +#ifndef VBOX +struct proto_handler handlers[] = { + { + .pri = 80, + .dir = OUT, + .proto = TCP, + .fingerprint = &fingerprint, + .protohandler = &protohandler + }, + { EOH } +}; +#else /* VBOX */ +#define handlers pData->ftp_module +#endif /* VBOX */ + +#ifndef VBOX +static int +mod_handler(module_t mod, int type, void *data) +#else +static int ftp_alias_handler(PNATState pData, int type); + +int +ftp_alias_load(PNATState pData) +{ + return ftp_alias_handler(pData, MOD_LOAD); +} + +int +ftp_alias_unload(PNATState pData) +{ + return ftp_alias_handler(pData, MOD_UNLOAD); +} +static int +ftp_alias_handler(PNATState pData, int type) +#endif +{ + int error; +#ifdef VBOX + if (handlers == NULL) + handlers = RTMemAllocZ(2 * sizeof(struct proto_handler)); + handlers[0].pri = 80; + handlers[0].dir = OUT; + handlers[0].proto = TCP; + handlers[0].fingerprint = &fingerprint; + handlers[0].protohandler = &protohandler; + handlers[1].pri = (u_int)EOH; +#endif /* VBOX */ + + switch (type) { + case MOD_LOAD: + error = 0; +#ifdef VBOX + LibAliasAttachHandlers(pData, handlers); +#else + LibAliasAttachHandlers(handlers); +#endif + break; + case MOD_UNLOAD: + error = 0; +#ifdef VBOX + LibAliasDetachHandlers(pData, handlers); + RTMemFree(handlers); + handlers = NULL; +#else + LibAliasDetachHandlers(handlers); +#endif + break; + default: + error = EINVAL; + } + return (error); +} + +#ifndef VBOX +#ifdef _KERNEL +static +#endif +moduledata_t alias_mod = { + "alias_ftp", mod_handler, NULL +}; +#endif /* !VBOX */ + +#ifdef _KERNEL +DECLARE_MODULE(alias_ftp, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND); +MODULE_VERSION(alias_ftp, 1); +MODULE_DEPEND(alias_ftp, libalias, 1, 1, 1); +#endif + +#define FTP_CONTROL_PORT_NUMBER 21 +#define MAX_MESSAGE_SIZE 128 + +/* FTP protocol flags. */ +#define WAIT_CRLF 0x01 + +enum ftp_message_type { + FTP_PORT_COMMAND, + FTP_EPRT_COMMAND, + FTP_227_REPLY, + FTP_229_REPLY, + FTP_UNKNOWN_MESSAGE +}; + +static int ParseFtpPortCommand(struct libalias *la, char *, int); +static int ParseFtpEprtCommand(struct libalias *la, char *, int); +static int ParseFtp227Reply(struct libalias *la, char *, int); +static int ParseFtp229Reply(struct libalias *la, char *, int); +static void NewFtpMessage(struct libalias *la, struct ip *, struct alias_link *, int, int); + +static void +AliasHandleFtpOut( + struct libalias *la, + struct ip *pip, /* IP packet to examine/patch */ + struct alias_link *lnk, /* The link to go through (aliased port) */ + int maxpacketsize /* The maximum size this packet can grow to + (including headers) */ ) +{ + int hlen, tlen, dlen, pflags; + char *sptr; + struct tcphdr *tc; + int ftp_message_type; + +/* Calculate data length of TCP packet */ + tc = (struct tcphdr *)ip_next(pip); + hlen = (pip->ip_hl + tc->th_off) << 2; + tlen = ntohs(pip->ip_len); + dlen = tlen - hlen; + +/* Place string pointer and beginning of data */ + sptr = (char *)pip; + sptr += hlen; + +/* + * Check that data length is not too long and previous message was + * properly terminated with CRLF. + */ + pflags = GetProtocolFlags(lnk); + if (dlen <= MAX_MESSAGE_SIZE && !(pflags & WAIT_CRLF)) { + ftp_message_type = FTP_UNKNOWN_MESSAGE; + + if (ntohs(tc->th_dport) == FTP_CONTROL_PORT_NUMBER) { +/* + * When aliasing a client, check for the PORT/EPRT command. + */ + if (ParseFtpPortCommand(la, sptr, dlen)) + ftp_message_type = FTP_PORT_COMMAND; + else if (ParseFtpEprtCommand(la, sptr, dlen)) + ftp_message_type = FTP_EPRT_COMMAND; + } else { +/* + * When aliasing a server, check for the 227/229 reply. + */ + if (ParseFtp227Reply(la, sptr, dlen)) + ftp_message_type = FTP_227_REPLY; + else if (ParseFtp229Reply(la, sptr, dlen)) { + ftp_message_type = FTP_229_REPLY; + la->true_addr.s_addr = pip->ip_src.s_addr; + } + } + + if (ftp_message_type != FTP_UNKNOWN_MESSAGE) + NewFtpMessage(la, pip, lnk, maxpacketsize, ftp_message_type); + } +/* Track the msgs which are CRLF term'd for PORT/PASV FW breach */ + + if (dlen) { /* only if there's data */ + sptr = (char *)pip; /* start over at beginning */ + tlen = ntohs(pip->ip_len); /* recalc tlen, pkt may + * have grown */ + if (sptr[tlen - 2] == '\r' && sptr[tlen - 1] == '\n') + pflags &= ~WAIT_CRLF; + else + pflags |= WAIT_CRLF; + SetProtocolFlags(lnk, pflags); + } +} + +static int +ParseFtpPortCommand(struct libalias *la, char *sptr, int dlen) +{ + char ch; + int i, state; + u_int32_t addr; + u_short port; + u_int8_t octet; + + /* Format: "PORT A,D,D,R,PO,RT". */ + + /* Return if data length is too short. */ + if (dlen < 18) + return (0); + + if (strncasecmp("PORT ", sptr, 5)) + return (0); + + addr = port = octet = 0; + state = 0; + for (i = 5; i < dlen; i++) { + ch = sptr[i]; + switch (state) { + case 0: + if (isspace(ch)) + break; + else + state++; + RT_FALL_THRU(); + case 1: + case 3: + case 5: + case 7: + case 9: + case 11: + if (isdigit(ch)) { + octet = ch - '0'; + state++; + } else + return (0); + break; + case 2: + case 4: + case 6: + case 8: + if (isdigit(ch)) + octet = 10 * octet + ch - '0'; + else if (ch == ',') { + addr = (addr << 8) + octet; + state++; + } else + return (0); + break; + case 10: + case 12: + if (isdigit(ch)) + octet = 10 * octet + ch - '0'; + else if (ch == ',' || state == 12) { + port = (port << 8) + octet; + state++; + } else + return (0); + break; + } + } + + if (state == 13) { + la->true_addr.s_addr = htonl(addr); + la->true_port = port; + return (1); + } else + return (0); +} + +static int +ParseFtpEprtCommand(struct libalias *la, char *sptr, int dlen) +{ + char ch, delim; + int i, state; + u_int32_t addr; + u_short port; + u_int8_t octet; + + /* Format: "EPRT |1|A.D.D.R|PORT|". */ + + /* Return if data length is too short. */ + if (dlen < 18) + return (0); + + if (strncasecmp("EPRT ", sptr, 5)) + return (0); + + addr = port = octet = 0; + delim = '|'; /* XXX gcc -Wuninitialized */ + state = 0; + for (i = 5; i < dlen; i++) { + ch = sptr[i]; + switch (state) { + case 0: + if (!isspace(ch)) { + delim = ch; + state++; + } + break; + case 1: + if (ch == '1') /* IPv4 address */ + state++; + else + return (0); + break; + case 2: + if (ch == delim) + state++; + else + return (0); + break; + case 3: + case 5: + case 7: + case 9: + if (isdigit(ch)) { + octet = ch - '0'; + state++; + } else + return (0); + break; + case 4: + case 6: + case 8: + case 10: + if (isdigit(ch)) + octet = 10 * octet + ch - '0'; + else if (ch == '.' || state == 10) { + addr = (addr << 8) + octet; + state++; + } else + return (0); + break; + case 11: + if (isdigit(ch)) { + port = ch - '0'; + state++; + } else + return (0); + break; + case 12: + if (isdigit(ch)) + port = 10 * port + ch - '0'; + else if (ch == delim) + state++; + else + return (0); + break; + } + } + + if (state == 13) { + la->true_addr.s_addr = htonl(addr); + la->true_port = port; + return (1); + } else + return (0); +} + +static int +ParseFtp227Reply(struct libalias *la, char *sptr, int dlen) +{ + char ch; + int i, state; + u_int32_t addr; + u_short port; + u_int8_t octet; + + /* Format: "227 Entering Passive Mode (A,D,D,R,PO,RT)" */ + + /* Return if data length is too short. */ + if (dlen < 17) + return (0); + + if (strncmp("227 ", sptr, 4)) + return (0); + + addr = port = octet = 0; + + state = 0; + for (i = 4; i < dlen; i++) { + ch = sptr[i]; + switch (state) { + case 0: + if (ch == '(') + state++; + break; + case 1: + case 3: + case 5: + case 7: + case 9: + case 11: + if (isdigit(ch)) { + octet = ch - '0'; + state++; + } else + return (0); + break; + case 2: + case 4: + case 6: + case 8: + if (isdigit(ch)) + octet = 10 * octet + ch - '0'; + else if (ch == ',') { + addr = (addr << 8) + octet; + state++; + } else + return (0); + break; + case 10: + case 12: + if (isdigit(ch)) + octet = 10 * octet + ch - '0'; + else if (ch == ',' || (state == 12 && ch == ')')) { + port = (port << 8) + octet; + state++; + } else + return (0); + break; + } + } + + if (state == 13) { + if (addr != INADDR_LOOPBACK) + la->true_addr.s_addr = htonl(addr); + else + la->true_addr.s_addr = la->pData->alias_addr.s_addr; + la->true_port = port; + return (1); + } else + return (0); +} + +static int +ParseFtp229Reply(struct libalias *la, char *sptr, int dlen) +{ + char ch, delim; + int i, state; + u_short port; + + /* Format: "229 Entering Extended Passive Mode (|||PORT|)" */ + + /* Return if data length is too short. */ + if (dlen < 11) + return (0); + + if (strncmp("229 ", sptr, 4)) + return (0); + + port = 0; + delim = '|'; /* XXX gcc -Wuninitialized */ + + state = 0; + for (i = 4; i < dlen; i++) { + ch = sptr[i]; + switch (state) { + case 0: + if (ch == '(') + state++; + break; + case 1: + delim = ch; + state++; + break; + case 2: + case 3: + if (ch == delim) + state++; + else + return (0); + break; + case 4: + if (isdigit(ch)) { + port = ch - '0'; + state++; + } else + return (0); + break; + case 5: + if (isdigit(ch)) + port = 10 * port + ch - '0'; + else if (ch == delim) + state++; + else + return (0); + break; + case 6: + if (ch == ')') + state++; + else + return (0); + break; + } + } + + if (state == 7) { + la->true_port = port; + return (1); + } else + return (0); +} + +static void +NewFtpMessage(struct libalias *la, struct ip *pip, + struct alias_link *lnk, + int maxpacketsize, + int ftp_message_type) +{ + struct alias_link *ftp_lnk; + +/* Security checks. */ + if (pip->ip_src.s_addr != la->true_addr.s_addr) + return; + + if (la->true_port < IPPORT_RESERVED) + return; + +/* Establish link to address and port found in FTP control message. */ + ftp_lnk = FindUdpTcpOut(la, la->true_addr, GetDestAddress(lnk), + htons(la->true_port), 0, IPPROTO_TCP, 1); + + if (ftp_lnk != NULL) { + int slen, hlen, tlen, dlen; + struct tcphdr *tc; + +#ifndef NO_FW_PUNCH + /* Punch hole in firewall */ + PunchFWHole(ftp_lnk); +#endif + +/* Calculate data length of TCP packet */ + tc = (struct tcphdr *)ip_next(pip); + hlen = (pip->ip_hl + tc->th_off) << 2; + tlen = ntohs(pip->ip_len); + dlen = tlen - hlen; + +/* Create new FTP message. */ + { + char stemp[MAX_MESSAGE_SIZE + 1]; + char *sptr; + u_short alias_port; + u_char *ptr; + int a1, a2, a3, a4, p1, p2; + struct in_addr alias_address; + +/* Decompose alias address into quad format */ + alias_address = GetAliasAddress(lnk); + ptr = (u_char *) & alias_address.s_addr; + a1 = *ptr++; + a2 = *ptr++; + a3 = *ptr++; + a4 = *ptr; + + alias_port = GetAliasPort(ftp_lnk); + +/* Prepare new command */ + switch (ftp_message_type) { + case FTP_PORT_COMMAND: + case FTP_227_REPLY: + /* Decompose alias port into pair format. */ + ptr = (u_char *)&alias_port; + p1 = *ptr++; + p2 = *ptr; + + if (ftp_message_type == FTP_PORT_COMMAND) { + /* Generate PORT command string. */ +#ifndef VBOX + sprintf(stemp, "PORT %d,%d,%d,%d,%d,%d\r\n", + a1, a2, a3, a4, p1, p2); +#else + RTStrPrintf(stemp, sizeof(stemp), "PORT %d,%d,%d,%d,%d,%d\r\n", + a1, a2, a3, a4, p1, p2); +#endif + } else { + /* Generate 227 reply string. */ +#ifndef VBOX + sprintf(stemp, + "227 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n", + a1, a2, a3, a4, p1, p2); +#else + RTStrPrintf(stemp, sizeof(stemp), + "227 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n", + a1, a2, a3, a4, p1, p2); +#endif + } + break; + case FTP_EPRT_COMMAND: + /* Generate EPRT command string. */ +#ifndef VBOX + sprintf(stemp, "EPRT |1|%d.%d.%d.%d|%d|\r\n", + a1, a2, a3, a4, ntohs(alias_port)); +#else + RTStrPrintf(stemp, sizeof(stemp), "EPRT |1|%d.%d.%d.%d|%d|\r\n", + a1, a2, a3, a4, ntohs(alias_port)); +#endif + break; + case FTP_229_REPLY: + /* Generate 229 reply string. */ +#ifndef VBOX + sprintf(stemp, "229 Entering Extended Passive Mode (|||%d|)\r\n", + ntohs(alias_port)); +#else + RTStrPrintf(stemp, sizeof(stemp), "229 Entering Extended Passive Mode (|||%d|)\r\n", + ntohs(alias_port)); +#endif + break; + } + +/* Save string length for IP header modification */ + slen = (int)strlen(stemp); + +/* Copy modified buffer into IP packet. */ + sptr = (char *)pip; + sptr += hlen; + strncpy(sptr, stemp, maxpacketsize - hlen); + } + +/* Save information regarding modified seq and ack numbers */ + { + int delta; + + SetAckModified(lnk); + delta = GetDeltaSeqOut(pip, lnk); + AddSeq(pip, lnk, delta + slen - dlen); + } + +/* Revise IP header */ + { + u_short new_len; + + new_len = htons(hlen + slen); + DifferentialChecksum(&pip->ip_sum, + &new_len, + &pip->ip_len, + 1); + pip->ip_len = new_len; + } + +/* Compute TCP checksum for revised packet */ + tc->th_sum = 0; +#ifdef _KERNEL + tc->th_x2 = 1; +#else + tc->th_sum = TcpChecksum(pip); +#endif + } else { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, + "PacketAlias/HandleFtpOut: Cannot allocate FTP data port\n"); +#endif + } +} diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_irc.c b/src/VBox/Devices/Network/slirp/libalias/alias_irc.c new file mode 100644 index 00000000..01962ddf --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_irc.c @@ -0,0 +1,485 @@ +/*- + * Copyright (c) 2001 Charles Mott <cm@linktel.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_irc.c,v 1.23.2.2.4.1 2009/04/15 03:14:26 kensmith Exp $"); + +/* Alias_irc.c intercepts packages contain IRC CTCP commands, and + changes DCC commands to export a port on the aliasing host instead + of an aliased host. + + For this routine to work, the DCC command must fit entirely into a + single TCP packet. This will usually happen, but is not + guaranteed. + + The interception is likely to change the length of the packet. + The handling of this is copied more-or-less verbatim from + ftp_alias.c + + Initial version: Eivind Eklund <perhaps@yes.no> (ee) 97-01-29 + + Version 2.1: May, 1997 (cjm) + Very minor changes to conform with + local/global/function naming conventions + withing the packet alising module. +*/ + +/* Includes */ +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/ctype.h> +#include <sys/limits.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/module.h> +#else +#include <errno.h> +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#endif + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/tcp.h> + +#ifdef _KERNEL +#include <netinet/libalias/alias.h> +#include <netinet/libalias/alias_local.h> +#include <netinet/libalias/alias_mod.h> +#else +#include "alias_local.h" +#include "alias_mod.h" +#endif + +#define IRC_CONTROL_PORT_NUMBER_1 6667 +#define IRC_CONTROL_PORT_NUMBER_2 6668 + +#define PKTSIZE (IP_MAXPACKET + 1) +char *newpacket; + +/* Local defines */ +#define DBprintf(a) + +static void +AliasHandleIrcOut(struct libalias *, struct ip *, struct alias_link *, + int maxpacketsize); + +static int +fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + if (ah->dport == NULL || ah->lnk == NULL || + ah->maxpktsize == 0) + return (-1); + if (ntohs(*ah->dport) == IRC_CONTROL_PORT_NUMBER_1 + || ntohs(*ah->dport) == IRC_CONTROL_PORT_NUMBER_2) + return (0); + return (-1); +} + +static int +protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + newpacket = malloc(PKTSIZE); + if (newpacket) { + AliasHandleIrcOut(la, pip, ah->lnk, ah->maxpktsize); + free(newpacket); + } + return (0); +} + +struct proto_handler handlers[] = { + { + .pri = 90, + .dir = OUT, + .proto = TCP, + .fingerprint = &fingerprint, + .protohandler = &protohandler + }, + { EOH } +}; + +static int +mod_handler(module_t mod, int type, void *data) +{ + int error; + + switch (type) { + case MOD_LOAD: + error = 0; + LibAliasAttachHandlers(handlers); + break; + case MOD_UNLOAD: + error = 0; + LibAliasDetachHandlers(handlers); + break; + default: + error = EINVAL; + } + return (error); +} + +#ifdef _KERNEL +static +#endif +moduledata_t alias_mod = { + "alias_irc", mod_handler, NULL +}; + +/* Kernel module definition. */ +#ifdef _KERNEL +DECLARE_MODULE(alias_irc, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND); +MODULE_VERSION(alias_irc, 1); +MODULE_DEPEND(alias_irc, libalias, 1, 1, 1); +#endif + +static void +AliasHandleIrcOut(struct libalias *la, + struct ip *pip, /* IP packet to examine */ + struct alias_link *lnk, /* Which link are we on? */ + int maxsize /* Maximum size of IP packet including + * headers */ +) +{ + int hlen, tlen, dlen; + struct in_addr true_addr; + u_short true_port; + char *sptr; + struct tcphdr *tc; + int i; /* Iterator through the source */ + +/* Calculate data length of TCP packet */ + tc = (struct tcphdr *)ip_next(pip); + hlen = (pip->ip_hl + tc->th_off) << 2; + tlen = ntohs(pip->ip_len); + dlen = tlen - hlen; + + /* + * Return if data length is too short - assume an entire PRIVMSG in + * each packet. + */ + if (dlen < (int)sizeof(":A!a@n.n PRIVMSG A :aDCC 1 1a") - 1) + return; + +/* Place string pointer at beginning of data */ + sptr = (char *)pip; + sptr += hlen; + maxsize -= hlen; /* We're interested in maximum size of + * data, not packet */ + + /* Search for a CTCP command [Note 1] */ + for (i = 0; i < dlen; i++) { + if (sptr[i] == '\001') + goto lFOUND_CTCP; + } + return; /* No CTCP commands in */ + /* Handle CTCP commands - the buffer may have to be copied */ +lFOUND_CTCP: + { + unsigned int copyat = i; + unsigned int iCopy = 0; /* How much data have we written to + * copy-back string? */ + unsigned long org_addr; /* Original IP address */ + unsigned short org_port; /* Original source port + * address */ + +lCTCP_START: + if (i >= dlen || iCopy >= PKTSIZE) + goto lPACKET_DONE; + newpacket[iCopy++] = sptr[i++]; /* Copy the CTCP start + * character */ + /* Start of a CTCP */ + if (i + 4 >= dlen) /* Too short for DCC */ + goto lBAD_CTCP; + if (sptr[i + 0] != 'D') + goto lBAD_CTCP; + if (sptr[i + 1] != 'C') + goto lBAD_CTCP; + if (sptr[i + 2] != 'C') + goto lBAD_CTCP; + if (sptr[i + 3] != ' ') + goto lBAD_CTCP; + /* We have a DCC command - handle it! */ + i += 4; /* Skip "DCC " */ + if (iCopy + 4 > PKTSIZE) + goto lPACKET_DONE; + newpacket[iCopy++] = 'D'; + newpacket[iCopy++] = 'C'; + newpacket[iCopy++] = 'C'; + newpacket[iCopy++] = ' '; + + DBprintf(("Found DCC\n")); + /* + * Skip any extra spaces (should not occur according to + * protocol, but DCC breaks CTCP protocol anyway + */ + while (sptr[i] == ' ') { + if (++i >= dlen) { + DBprintf(("DCC packet terminated in just spaces\n")); + goto lPACKET_DONE; + } + } + + DBprintf(("Transferring command...\n")); + while (sptr[i] != ' ') { + newpacket[iCopy++] = sptr[i]; + if (++i >= dlen || iCopy >= PKTSIZE) { + DBprintf(("DCC packet terminated during command\n")); + goto lPACKET_DONE; + } + } + /* Copy _one_ space */ + if (i + 1 < dlen && iCopy < PKTSIZE) + newpacket[iCopy++] = sptr[i++]; + + DBprintf(("Done command - removing spaces\n")); + /* + * Skip any extra spaces (should not occur according to + * protocol, but DCC breaks CTCP protocol anyway + */ + while (sptr[i] == ' ') { + if (++i >= dlen) { + DBprintf(("DCC packet terminated in just spaces (post-command)\n")); + goto lPACKET_DONE; + } + } + + DBprintf(("Transferring filename...\n")); + while (sptr[i] != ' ') { + newpacket[iCopy++] = sptr[i]; + if (++i >= dlen || iCopy >= PKTSIZE) { + DBprintf(("DCC packet terminated during filename\n")); + goto lPACKET_DONE; + } + } + /* Copy _one_ space */ + if (i + 1 < dlen && iCopy < PKTSIZE) + newpacket[iCopy++] = sptr[i++]; + + DBprintf(("Done filename - removing spaces\n")); + /* + * Skip any extra spaces (should not occur according to + * protocol, but DCC breaks CTCP protocol anyway + */ + while (sptr[i] == ' ') { + if (++i >= dlen) { + DBprintf(("DCC packet terminated in just spaces (post-filename)\n")); + goto lPACKET_DONE; + } + } + + DBprintf(("Fetching IP address\n")); + /* Fetch IP address */ + org_addr = 0; + while (i < dlen && isdigit(sptr[i])) { + if (org_addr > ULONG_MAX / 10UL) { /* Terminate on overflow */ + DBprintf(("DCC Address overflow (org_addr == 0x%08lx, next char %c\n", org_addr, sptr[i])); + goto lBAD_CTCP; + } + org_addr *= 10; + org_addr += sptr[i++] - '0'; + } + DBprintf(("Skipping space\n")); + if (i + 1 >= dlen || sptr[i] != ' ') { + DBprintf(("Overflow (%d >= %d) or bad character (%02x) terminating IP address\n", i + 1, dlen, sptr[i])); + goto lBAD_CTCP; + } + /* + * Skip any extra spaces (should not occur according to + * protocol, but DCC breaks CTCP protocol anyway, so we + * might as well play it safe + */ + while (sptr[i] == ' ') { + if (++i >= dlen) { + DBprintf(("Packet failure - space overflow.\n")); + goto lPACKET_DONE; + } + } + DBprintf(("Fetching port number\n")); + /* Fetch source port */ + org_port = 0; + while (i < dlen && isdigit(sptr[i])) { + if (org_port > 6554) { /* Terminate on overflow + * (65536/10 rounded up */ + DBprintf(("DCC: port number overflow\n")); + goto lBAD_CTCP; + } + org_port *= 10; + org_port += sptr[i++] - '0'; + } + /* Skip illegal addresses (or early termination) */ + if (i >= dlen || (sptr[i] != '\001' && sptr[i] != ' ')) { + DBprintf(("Bad port termination\n")); + goto lBAD_CTCP; + } + DBprintf(("Got IP %lu and port %u\n", org_addr, (unsigned)org_port)); + + /* We've got the address and port - now alias it */ + { + struct alias_link *dcc_lnk; + struct in_addr destaddr; + + + true_port = htons(org_port); + true_addr.s_addr = htonl(org_addr); + destaddr.s_addr = 0; + + /* Sanity/Security checking */ + if (!org_addr || !org_port || + pip->ip_src.s_addr != true_addr.s_addr || + org_port < IPPORT_RESERVED) + goto lBAD_CTCP; + + /* + * Steal the FTP_DATA_PORT - it doesn't really + * matter, and this would probably allow it through + * at least _some_ firewalls. + */ + dcc_lnk = FindUdpTcpOut(la, true_addr, destaddr, + true_port, 0, + IPPROTO_TCP, 1); + DBprintf(("Got a DCC link\n")); + if (dcc_lnk) { + struct in_addr alias_address; /* Address from aliasing */ + u_short alias_port; /* Port given by + * aliasing */ + int n; + +#ifndef NO_FW_PUNCH + /* Generate firewall hole as appropriate */ + PunchFWHole(dcc_lnk); +#endif + + alias_address = GetAliasAddress(lnk); + n = snprintf(&newpacket[iCopy], + PKTSIZE - iCopy, + "%lu ", (u_long) htonl(alias_address.s_addr)); + if (n < 0) { + DBprintf(("DCC packet construct failure.\n")); + goto lBAD_CTCP; + } + if ((iCopy += n) >= PKTSIZE) { /* Truncated/fit exactly + * - bad news */ + DBprintf(("DCC constructed packet overflow.\n")); + goto lBAD_CTCP; + } + alias_port = GetAliasPort(dcc_lnk); + n = snprintf(&newpacket[iCopy], + PKTSIZE - iCopy, + "%u", htons(alias_port)); + if (n < 0) { + DBprintf(("DCC packet construct failure.\n")); + goto lBAD_CTCP; + } + iCopy += n; + /* + * Done - truncated cases will be taken + * care of by lBAD_CTCP + */ + DBprintf(("Aliased IP %lu and port %u\n", alias_address.s_addr, (unsigned)alias_port)); + } + } + /* + * An uninteresting CTCP - state entered right after '\001' + * has been pushed. Also used to copy the rest of a DCC, + * after IP address and port has been handled + */ +lBAD_CTCP: + for (; i < dlen && iCopy < PKTSIZE; i++, iCopy++) { + newpacket[iCopy] = sptr[i]; /* Copy CTCP unchanged */ + if (sptr[i] == '\001') { + goto lNORMAL_TEXT; + } + } + goto lPACKET_DONE; + /* Normal text */ +lNORMAL_TEXT: + for (; i < dlen && iCopy < PKTSIZE; i++, iCopy++) { + newpacket[iCopy] = sptr[i]; /* Copy CTCP unchanged */ + if (sptr[i] == '\001') { + goto lCTCP_START; + } + } + /* Handle the end of a packet */ +lPACKET_DONE: + iCopy = iCopy > maxsize - copyat ? maxsize - copyat : iCopy; + memcpy(sptr + copyat, newpacket, iCopy); + +/* Save information regarding modified seq and ack numbers */ + { + int delta; + + SetAckModified(lnk); + delta = GetDeltaSeqOut(pip, lnk); + AddSeq(pip, lnk, delta + copyat + iCopy - dlen); + } + + /* Revise IP header */ + { + u_short new_len; + + new_len = htons(hlen + iCopy + copyat); + DifferentialChecksum(&pip->ip_sum, + &new_len, + &pip->ip_len, + 1); + pip->ip_len = new_len; + } + + /* Compute TCP checksum for revised packet */ + tc->th_sum = 0; +#ifdef _KERNEL + tc->th_x2 = 1; +#else + tc->th_sum = TcpChecksum(pip); +#endif + return; + } +} + +/* Notes: + [Note 1] + The initial search will most often fail; it could be replaced with a 32-bit specific search. + Such a search would be done for 32-bit unsigned value V: + V ^= 0x01010101; (Search is for null bytes) + if( ((V-0x01010101)^V) & 0x80808080 ) { + (found a null bytes which was a 01 byte) + } + To assert that the processor is 32-bits, do + extern int ircdccar[32]; (32 bits) + extern int ircdccar[CHAR_BIT*sizeof(unsigned int)]; + which will generate a type-error on all but 32-bit machines. + + [Note 2] This routine really ought to be replaced with one that + creates a transparent proxy on the aliasing host, to allow arbitary + changes in the TCP stream. This should not be too difficult given + this base; I (ee) will try to do this some time later. + */ diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_local.h b/src/VBox/Devices/Network/slirp/libalias/alias_local.h new file mode 100644 index 00000000..9ee4d03a --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_local.h @@ -0,0 +1,370 @@ +/*- + * Copyright (c) 2001 Charles Mott <cm@linktel.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/netinet/libalias/alias_local.h,v 1.34.2.1.4.1 2009/04/15 03:14:26 kensmith Exp $ + */ + +/* + * Alias_local.h contains the function prototypes for alias.c, + * alias_db.c, alias_util.c and alias_ftp.c, alias_irc.c (as well + * as any future add-ons). It also includes macros, globals and + * struct definitions shared by more than one alias*.c file. + * + * This include file is intended to be used only within the aliasing + * software. Outside world interfaces are defined in alias.h + * + * This software is placed into the public domain with no restrictions + * on its distribution. + * + * Initial version: August, 1996 (cjm) + * + * <updated several times by original author and Eivind Eklund> + */ + +#ifndef _ALIAS_LOCAL_H_ +#define _ALIAS_LOCAL_H_ + +#ifndef VBOX +#include <sys/types.h> +#include <sys/sysctl.h> + +#ifdef _KERNEL +#include <sys/malloc.h> +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/mutex.h> + +/* XXX: LibAliasSetTarget() uses this constant. */ +#define INADDR_NONE 0xffffffff +#endif +#else /* VBOX */ +# include <slirp.h> +#endif /* VBOX */ + +/* Sizes of input and output link tables */ +#define LINK_TABLE_OUT_SIZE 4001 +#define LINK_TABLE_IN_SIZE 4001 + +struct proxy_entry; + +struct libalias { + LIST_ENTRY(libalias) instancelist; + + int packetAliasMode; /* Mode flags */ + /* - documented in alias.h */ + + struct in_addr aliasAddress; /* Address written onto source */ + /* field of IP packet. */ + + struct in_addr targetAddress; /* IP address incoming packets */ + /* are sent to if no aliasing */ + /* link already exists */ + + struct in_addr nullAddress; /* Used as a dummy parameter for */ + /* some function calls */ + + LIST_HEAD (RT_NOTHING, alias_link) linkTableOut[LINK_TABLE_OUT_SIZE]; + /* Lookup table of pointers to */ + /* chains of link records. Each */ + + LIST_HEAD (RT_NOTHING, alias_link) linkTableIn[LINK_TABLE_IN_SIZE]; + /* link record is doubly indexed */ + /* into input and output lookup */ + /* tables. */ + + /* Link statistics */ + int icmpLinkCount; + int udpLinkCount; + int tcpLinkCount; + int pptpLinkCount; + int protoLinkCount; + int fragmentIdLinkCount; + int fragmentPtrLinkCount; + int sockCount; + + int cleanupIndex; /* Index to chain of link table */ + /* being inspected for old links */ +#ifndef VBOX + int timeStamp; /* System time in seconds for */ + /* current packet */ + + int lastCleanupTime; /* Last time + * IncrementalCleanup() */ +#else + unsigned int timeStamp; /* System time in seconds for */ + unsigned int lastCleanupTime; /* Last time */ +#endif + /* was called */ + + int deleteAllLinks; /* If equal to zero, DeleteLink() */ + /* will not remove permanent links */ + + /* log descriptor */ +#ifdef _KERNEL + char *logDesc; +#else + FILE *logDesc; +#endif + /* statistics monitoring */ + + int newDefaultLink; /* Indicates if a new aliasing */ + /* link has been created after a */ + /* call to PacketAliasIn/Out(). */ + +#ifndef NO_FW_PUNCH + int fireWallFD; /* File descriptor to be able to */ + /* control firewall. Opened by */ + /* PacketAliasSetMode on first */ + /* setting the PKT_ALIAS_PUNCH_FW */ + /* flag. */ + int fireWallBaseNum; /* The first firewall entry + * free for our use */ + int fireWallNumNums; /* How many entries can we + * use? */ + int fireWallActiveNum; /* Which entry did we last + * use? */ + char *fireWallField; /* bool array for entries */ +#endif + + unsigned int skinnyPort; /* TCP port used by the Skinny */ + /* protocol. */ + + struct proxy_entry *proxyList; + + struct in_addr true_addr; /* in network byte order. */ + u_short true_port; /* in host byte order. */ +#if defined(_KERNEL) && !defined(VBOX) + /* + * avoid races in libalias: every public function has to use it. + */ + struct mtx mutex; +#endif +#ifdef VBOX + PNATState pData; +#endif +}; + +/* Macros */ + +#if defined(_KERNEL) && !defined(VBOX) +#define LIBALIAS_LOCK_INIT(l) \ + mtx_init(&l->mutex, "per-instance libalias mutex", NULL, MTX_DEF) +#define LIBALIAS_LOCK_ASSERT(l) mtx_assert(&l->mutex, MA_OWNED) +#define LIBALIAS_LOCK(l) mtx_lock(&l->mutex) +#define LIBALIAS_UNLOCK(l) mtx_unlock(&l->mutex) +#define LIBALIAS_LOCK_DESTROY(l) mtx_destroy(&l->mutex) +#else +#define LIBALIAS_LOCK_INIT(l) +#define LIBALIAS_LOCK_ASSERT(l) +#define LIBALIAS_LOCK(l) NOREF((l)); +#define LIBALIAS_UNLOCK(l) NOREF((l)) +#define LIBALIAS_LOCK_DESTROY(l) +#endif + +/* + * The following macro is used to update an + * internet checksum. "delta" is a 32-bit + * accumulation of all the changes to the + * checksum (adding in new 16-bit words and + * subtracting out old words), and "cksum" + * is the checksum value to be updated. + */ +#define ADJUST_CHECKSUM(acc, cksum) \ + do { \ + acc += cksum; \ + if (acc < 0) { \ + acc = -acc; \ + acc = (acc >> 16) + (acc & 0xffff); \ + acc += acc >> 16; \ + cksum = (u_short) ~acc; \ + } else { \ + acc = (acc >> 16) + (acc & 0xffff); \ + acc += acc >> 16; \ + cksum = (u_short) acc; \ + } \ + } while (0) + + +/* Prototypes */ + +/* + * We do not calculate TCP checksums when libalias is a kernel + * module, since it has no idea about checksum offloading. + * If TCP data has changed, then we just set checksum to zero, + * and caller must recalculate it himself. + * In case if libalias will edit UDP data, the same approach + * should be used. + */ +#ifndef _KERNEL +u_short IpChecksum(struct ip *_pip); +u_short TcpChecksum(struct ip *_pip); +#endif +void +DifferentialChecksum(u_short * _cksum, void * _new, void * _old, int _n); + +/* Internal data access */ +struct alias_link * +FindIcmpIn(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr, + u_short _id_alias, int _create); +struct alias_link * +FindIcmpOut(struct libalias *la, struct in_addr _src_addr, struct in_addr _dst_addr, + u_short _id, int _create); +struct alias_link * +FindFragmentIn1(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr, + u_short _ip_id); +struct alias_link * +FindFragmentIn2(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr, + u_short _ip_id); +struct alias_link * + AddFragmentPtrLink(struct libalias *la, struct in_addr _dst_addr, u_short _ip_id); +struct alias_link * + FindFragmentPtr(struct libalias *la, struct in_addr _dst_addr, u_short _ip_id); +struct alias_link * +FindProtoIn(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr, + u_char _proto); +struct alias_link * +FindProtoOut(struct libalias *la, struct in_addr _src_addr, struct in_addr _dst_addr, + u_char _proto); +struct alias_link * +FindUdpTcpIn(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr, + u_short _dst_port, u_short _alias_port, u_char _proto, int _create); +struct alias_link * +FindUdpTcpOut(struct libalias *la, struct in_addr _src_addr, struct in_addr _dst_addr, + u_short _src_port, u_short _dst_port, u_char _proto, int _create); +struct alias_link * +AddPptp(struct libalias *la, struct in_addr _src_addr, struct in_addr _dst_addr, + struct in_addr _alias_addr, u_int16_t _src_call_id); +struct alias_link * +FindPptpOutByCallId(struct libalias *la, struct in_addr _src_addr, + struct in_addr _dst_addr, u_int16_t _src_call_id); +struct alias_link * +FindPptpInByCallId(struct libalias *la, struct in_addr _dst_addr, + struct in_addr _alias_addr, u_int16_t _dst_call_id); +struct alias_link * +FindPptpOutByPeerCallId(struct libalias *la, struct in_addr _src_addr, + struct in_addr _dst_addr, u_int16_t _dst_call_id); +struct alias_link * +FindPptpInByPeerCallId(struct libalias *la, struct in_addr _dst_addr, + struct in_addr _alias_addr, u_int16_t _alias_call_id); +struct alias_link * +FindRtspOut(struct libalias *la, struct in_addr _src_addr, struct in_addr _dst_addr, + u_short _src_port, u_short _alias_port, u_char _proto); +struct in_addr + FindOriginalAddress(struct libalias *la, struct in_addr _alias_addr); +struct in_addr + FindAliasAddress(struct libalias *la, struct in_addr _original_addr); + +/* External data access/modification */ +int +FindNewPortGroup(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr, + u_short _src_port, u_short _dst_port, u_short _port_count, + u_char _proto, u_char _align); +void GetFragmentAddr(struct alias_link *_lnk, struct in_addr *_src_addr); +void SetFragmentAddr(struct alias_link *_lnk, struct in_addr _src_addr); +void GetFragmentPtr(struct alias_link *_lnk, char **_fptr); +void SetFragmentPtr(struct alias_link *_lnk, char *fptr); +void SetStateIn(struct alias_link *_lnk, int _state); +void SetStateOut(struct alias_link *_lnk, int _state); +int GetStateIn (struct alias_link *_lnk); +int GetStateOut(struct alias_link *_lnk); +struct in_addr + GetOriginalAddress(struct alias_link *_lnk); +struct in_addr + GetDestAddress(struct alias_link *_lnk); +struct in_addr + GetAliasAddress(struct alias_link *_lnk); +struct in_addr + GetDefaultAliasAddress(struct libalias *la); +void SetDefaultAliasAddress(struct libalias *la, struct in_addr _alias_addr); +u_short GetOriginalPort(struct alias_link *_lnk); +u_short GetAliasPort(struct alias_link *_lnk); +struct in_addr + GetProxyAddress(struct alias_link *_lnk); +void SetProxyAddress(struct alias_link *_lnk, struct in_addr _addr); +u_short GetProxyPort(struct alias_link *_lnk); +void SetProxyPort(struct alias_link *_lnk, u_short _port); +void SetAckModified(struct alias_link *_lnk); +int GetAckModified(struct alias_link *_lnk); +int GetDeltaAckIn(struct ip *_pip, struct alias_link *_lnk); +int GetDeltaSeqOut(struct ip *_pip, struct alias_link *_lnk); +void AddSeq (struct ip *_pip, struct alias_link *_lnk, int _delta); +void SetExpire (struct alias_link *_lnk, int _expire); +void ClearCheckNewLink(struct libalias *la); +void SetProtocolFlags(struct alias_link *_lnk, int _pflags); +int GetProtocolFlags(struct alias_link *_lnk); +void SetDestCallId(struct alias_link *_lnk, u_int16_t _cid); + +#ifndef NO_FW_PUNCH +void PunchFWHole(struct alias_link *_lnk); + +#endif + +/* Housekeeping function */ +void HouseKeeping(struct libalias *); + +/* Tcp specfic routines */ +/* lint -save -library Suppress flexelint warnings */ + +/* Transparent proxy routines */ +int +ProxyCheck(struct libalias *la, struct ip *_pip, struct in_addr *_proxy_server_addr, + u_short * _proxy_server_port); +void +ProxyModify(struct libalias *la, struct alias_link *_lnk, struct ip *_pip, + int _maxpacketsize, int _proxy_type); + +enum alias_tcp_state { + ALIAS_TCP_STATE_NOT_CONNECTED, + ALIAS_TCP_STATE_CONNECTED, + ALIAS_TCP_STATE_DISCONNECTED +}; + +#if defined(_NETINET_IP_H_) +static __inline void * +ip_next(struct ip *iphdr) +{ + char *p = (char *)iphdr; + return (&p[iphdr->ip_hl * 4]); +} +#endif + +#if defined(_NETINET_TCP_H_) +static __inline void * +tcp_next(struct tcphdr *tcphdr) +{ + char *p = (char *)tcphdr; + return (&p[tcphdr->th_off * 4]); +} +#endif + +#if defined(_NETINET_UDP_H_) +static __inline void * +udp_next(struct udphdr *udphdr) +{ + return ((void *)(udphdr + 1)); +} +#endif + +#endif /* !_ALIAS_LOCAL_H_ */ diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_mod.c b/src/VBox/Devices/Network/slirp/libalias/alias_mod.c new file mode 100644 index 00000000..4423bc38 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_mod.c @@ -0,0 +1,347 @@ +/*- + * Copyright (c) 2005 Paolo Pisati <piso@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ +#ifndef VBOX +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_mod.c,v 1.3.8.1 2009/04/15 03:14:26 kensmith Exp $"); + +#ifdef _KERNEL +#include <sys/libkern.h> +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/rwlock.h> +#else +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <errno.h> +#endif + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> + +#ifdef _KERNEL +#include <netinet/libalias/alias_local.h> +#include <netinet/libalias/alias_mod.h> +#else +#include "alias_local.h" +#include "alias_mod.h" +#endif + +/* Protocol and userland module handlers chains. */ +LIST_HEAD(handler_chain, proto_handler) handler_chain = LIST_HEAD_INITIALIZER(foo); +#else /* VBOX */ +# include <slirp.h> +# include "alias_local.h" +# include "alias_mod.h" +#endif /* VBOX */ +#ifdef _KERNEL +struct rwlock handler_rw; +#endif +SLIST_HEAD(dll_chain, dll) dll_chain = SLIST_HEAD_INITIALIZER(foo); + +#ifdef _KERNEL + +#define LIBALIAS_RWLOCK_INIT() \ + rw_init(&handler_rw, "Libalias_modules_rwlock") +#define LIBALIAS_RWLOCK_DESTROY() rw_destroy(&handler_rw) +#define LIBALIAS_WLOCK_ASSERT() \ + rw_assert(&handler_rw, RA_WLOCKED) + +static __inline void +LIBALIAS_RLOCK(void) +{ + rw_rlock(&handler_rw); +} + +static __inline void +LIBALIAS_RUNLOCK(void) +{ + rw_runlock(&handler_rw); +} + +static __inline void +LIBALIAS_WLOCK(void) +{ + rw_wlock(&handler_rw); +} + +static __inline void +LIBALIAS_WUNLOCK(void) +{ + rw_wunlock(&handler_rw); +} + +static void +_handler_chain_init(void) +{ + + if (!rw_initialized(&handler_rw)) + LIBALIAS_RWLOCK_INIT(); +} + +static void +_handler_chain_destroy(void) +{ + + if (rw_initialized(&handler_rw)) + LIBALIAS_RWLOCK_DESTROY(); +} + +#else /* VBOX */ +# define LIBALIAS_WLOCK_ASSERT() ; +# define LIBALIAS_RLOCK() \ + do { \ + int rc = RTCritSectRwEnterShared(&pData->CsRwHandlerChain); \ + AssertRC(rc); \ + } while (0) +# define LIBALIAS_RUNLOCK() \ + do { \ + int rc = RTCritSectRwLeaveShared(&pData->CsRwHandlerChain); \ + AssertRC(rc); \ + } while (0) +# define LIBALIAS_WLOCK() \ + do { \ + int rc = RTCritSectRwEnterExcl(&pData->CsRwHandlerChain); \ + AssertRC(rc); \ + } while (0) +# define LIBALIAS_WUNLOCK() \ + do { \ + int rc = RTCritSectRwLeaveExcl(&pData->CsRwHandlerChain); \ + AssertRC(rc); \ + } while (0) +# define _handler_chain_init() ; +# define _handler_chain_destroy() ; +#endif + +void +handler_chain_init(void) +{ + _handler_chain_init(); +} + +void +handler_chain_destroy(void) +{ + _handler_chain_destroy(); +} + +static int +#ifdef VBOX +_attach_handler(PNATState pData, struct proto_handler *p) +#else +_attach_handler(struct proto_handler *p) +#endif +{ + struct proto_handler *b = NULL, *handler_chain_tail = NULL; + + LIBALIAS_WLOCK_ASSERT(); + LIST_FOREACH(b, &handler_chain, entries) { + if ((b->pri == p->pri) && + (b->dir == p->dir) && + (b->proto == p->proto)) + return (EEXIST); /* Priority conflict. */ + if (b->pri > p->pri) { + LIST_INSERT_BEFORE(b, p, entries); + return (0); + } + + /* If the conditions above do not work, we should keep the last + * element of the list in order to insert *p right after it. */ + handler_chain_tail = b; + } + /* End of list or found right position, inserts here. */ + if (handler_chain_tail) + LIST_INSERT_AFTER(handler_chain_tail, p, entries); + else + LIST_INSERT_HEAD(&handler_chain, p, entries); + return (0); +} + +static int +#ifdef VBOX +_detach_handler(PNATState pData, struct proto_handler *p) +#else +_detach_handler(struct proto_handler *p) +#endif +{ + struct proto_handler *b, *b_tmp;; + + LIBALIAS_WLOCK_ASSERT(); + LIST_FOREACH_SAFE(b, &handler_chain, entries, b_tmp) { + if (b == p) { + LIST_REMOVE(b, entries); + return (0); + } + } + return (ENOENT); /* Handler not found. */ +} + +int +#ifdef VBOX +LibAliasAttachHandlers(PNATState pData, struct proto_handler *_p) +#else +LibAliasAttachHandlers(struct proto_handler *_p) +#endif +{ + int i, error = -1; + + LIBALIAS_WLOCK(); + for (i=0; 1; i++) { + if (*((int *)&_p[i]) == EOH) + break; +#ifdef VBOX + error = _attach_handler(pData, &_p[i]); +#else + error = _attach_handler(&_p[i]); +#endif + if (error != 0) + break; + } + LIBALIAS_WUNLOCK(); + return (error); +} + +int +#ifdef VBOX +LibAliasDetachHandlers(PNATState pData, struct proto_handler *_p) +#else +LibAliasDetachHandlers(struct proto_handler *_p) +#endif +{ + int i, error = -1; + + LIBALIAS_WLOCK(); + for (i=0; 1; i++) { + if (*((int *)&_p[i]) == EOH) + break; +#ifdef VBOX + error = _detach_handler(pData, &_p[i]); +#else + error = _detach_handler(&_p[i]); +#endif + if (error != 0) + break; + } + LIBALIAS_WUNLOCK(); + return (error); +} + +int +#ifdef VBOX +detach_handler(PNATState pData, struct proto_handler *_p) +#else +detach_handler(struct proto_handler *_p) +#endif +{ + int error = -1; + + LIBALIAS_WLOCK(); +#ifdef VBOX + error = _detach_handler(pData, _p); +#else + error = _detach_handler(_p); +#endif + LIBALIAS_WUNLOCK(); + return (error); +} + +int +find_handler(int8_t dir, int8_t proto, struct libalias *la, struct ip *pip, + struct alias_data *ad) +{ +#ifdef VBOX + PNATState pData = la->pData; +#endif + struct proto_handler *p; + int error = ENOENT; + + LIBALIAS_RLOCK(); + + LIST_FOREACH(p, &handler_chain, entries) { + if ((p->dir & dir) && (p->proto & proto)) + if (p->fingerprint(la, pip, ad) == 0) { + error = p->protohandler(la, pip, ad); + break; + } + } + LIBALIAS_RUNLOCK(); + return (error); +} + +struct proto_handler * +#ifdef VBOX +first_handler(PNATState pData) +#else +first_handler(void) +#endif +{ + + return (LIST_FIRST(&handler_chain)); +} + +/* Dll manipulation code - this code is not thread safe... */ + +int +attach_dll(struct dll *p) +{ + struct dll *b; + + SLIST_FOREACH(b, &dll_chain, next) { + if (!strncmp(b->name, p->name, DLL_LEN)) + return (EEXIST); /* Dll name conflict. */ + } + SLIST_INSERT_HEAD(&dll_chain, p, next); + return (0); +} + +void * +detach_dll(char *p) +{ + struct dll *b = NULL, *b_tmp; + void *error = NULL; + + SLIST_FOREACH_SAFE(b, &dll_chain, next, b_tmp) + if (!strncmp(b->name, p, DLL_LEN)) { + SLIST_REMOVE(&dll_chain, b, dll, next); + error = b; + break; + } + return (error); +} + +struct dll * +walk_dll_chain(void) +{ + struct dll *t; + + t = SLIST_FIRST(&dll_chain); + if (t == NULL) + return (NULL); + SLIST_REMOVE_HEAD(&dll_chain, next); + return (t); +} diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_mod.h b/src/VBox/Devices/Network/slirp/libalias/alias_mod.h new file mode 100644 index 00000000..74a52137 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_mod.h @@ -0,0 +1,169 @@ +/*- + * Copyright (c) 2005 Paolo Pisati <piso@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/netinet/libalias/alias_mod.h,v 1.1.8.1 2009/04/15 03:14:26 kensmith Exp $ + */ + +/* + * Alias_mod.h defines the outside world interfaces for the packet aliasing + * modular framework + */ + +#ifndef _ALIAS_MOD_H_ +#define _ALIAS_MOD_H_ + +#if defined(_KERNEL) && !defined(VBOX) +MALLOC_DECLARE(M_ALIAS); + +/* Use kernel allocator. */ +#if defined(_SYS_MALLOC_H_) +#define malloc(x) malloc(x, M_ALIAS, M_NOWAIT|M_ZERO) +#define calloc(x, n) malloc(x*n) +#define free(x) free(x, M_ALIAS) +#endif +#else /* VBOX */ +# ifdef RT_OS_WINDOWS +# undef IN +# undef OUT +# endif /* RT_OS_WINDOWS */ +#endif /* VBOX */ + +/* Protocol handlers struct & function. */ + +/* Packet flow direction. */ +#define IN 1 +#define OUT 2 + +/* Working protocol. */ +#define IP 1 +#define TCP 2 +#define UDP 4 + +/* + * Data passed to protocol handler module, it must be filled + * right before calling find_handler() to determine which + * module is elegible to be called. + */ + +struct alias_data { + struct alias_link *lnk; + struct in_addr *oaddr; /* Original address. */ + struct in_addr *aaddr; /* Alias address. */ + uint16_t *aport; /* Alias port. */ + uint16_t *sport, *dport; /* Source & destination port */ + uint16_t maxpktsize; /* Max packet size. */ +}; + +/* + * This structure contains all the information necessary to make + * a protocol handler correctly work. + */ + +struct proto_handler { + u_int pri; /* Handler priority. */ + int16_t dir; /* Flow direction. */ + uint8_t proto; /* Working protocol. */ + int (*fingerprint)(struct libalias *la, /* Fingerprint * function. */ + struct ip *pip, struct alias_data *ah); + int (*protohandler)(struct libalias *la, /* Aliasing * function. */ + struct ip *pip, struct alias_data *ah); + LIST_ENTRY(proto_handler) entries; +}; + + +/* + * Used only in userland when libalias needs to keep track of all + * module loaded. In kernel land (kld mode) we don't need to care + * care about libalias modules cause it's kld to do it for us. + */ + +#define DLL_LEN 32 +struct dll { + char name[DLL_LEN]; /* Name of module. */ + void *handle; /* + * Ptr to shared obj obtained through + * dlopen() - use this ptr to get access + * to any symbols from a loaded module + * via dlsym(). + */ + SLIST_ENTRY(dll) next; +}; + +/* Functions used with protocol handlers. */ + +void handler_chain_init(void); +void handler_chain_destroy(void); +#ifdef VBOX +int LibAliasAttachHandlers(PNATState pData, struct proto_handler *); +int LibAliasDetachHandlers(PNATState pData, struct proto_handler *); +int detach_handler(PNATState pData, struct proto_handler *); +struct proto_handler *first_handler(PNATState pData); +#else +int LibAliasAttachHandlers(struct proto_handler *); +int LibAliasDetachHandlers(struct proto_handler *); +int detach_handler(struct proto_handler *); +struct proto_handler *first_handler(void); +#endif +int find_handler(int8_t, int8_t, struct libalias *, + struct ip *, struct alias_data *); + +/* Functions used with dll module. */ + +void dll_chain_init(void); +void dll_chain_destroy(void); +int attach_dll(struct dll *); +void *detach_dll(char *); +struct dll *walk_dll_chain(void); + +/* End of handlers. */ +#define EOH -1 + +/* + * Some defines borrowed from sys/module.h used to compile a kld + * in userland as a shared lib. + */ + +#ifndef _KERNEL +typedef enum modeventtype { + MOD_LOAD, + MOD_UNLOAD, + MOD_SHUTDOWN, + MOD_QUIESCE +} modeventtype_t; + +typedef struct module *module_t; +typedef int (*modeventhand_t)(module_t, int /* modeventtype_t */, void *); + +/* + * Struct for registering modules statically via SYSINIT. + */ +typedef struct moduledata { + const char *name; /* module name */ + modeventhand_t evhand; /* event handler */ + void *priv; /* extra data */ +} moduledata_t; +#endif + +#endif /* !_ALIAS_MOD_H_ */ diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_nbt.c b/src/VBox/Devices/Network/slirp/libalias/alias_nbt.c new file mode 100644 index 00000000..886433ae --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_nbt.c @@ -0,0 +1,955 @@ +/*- + * Written by Atsushi Murai <amurai@spec.co.jp> + * Copyright (c) 1998, System Planning and Engineering Co. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * TODO: + * oClean up. + * oConsidering for word alignment for other platform. + */ + +#ifndef VBOX +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_nbt.c,v 1.20.8.1 2009/04/15 03:14:26 kensmith Exp $"); + +/* + alias_nbt.c performs special processing for NetBios over TCP/IP + sessions by UDP. + + Initial version: May, 1998 (Atsushi Murai <amurai@spec.co.jp>) + + See HISTORY file for record of revisions. +*/ + +/* Includes */ +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/module.h> +#else +#include <errno.h> +#include <sys/types.h> +#include <stdio.h> +#endif + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/udp.h> + +#ifdef _KERNEL +#include <netinet/libalias/alias_local.h> +#include <netinet/libalias/alias_mod.h> +#else +#include "alias_local.h" +#include "alias_mod.h" +#endif +#else /* VBOX */ +# include <iprt/ctype.h> +# include <slirp.h> +# include "alias_local.h" +# include "alias_mod.h" +# define isprint RT_C_IS_PRINT +#endif /* VBOX */ + +#define NETBIOS_NS_PORT_NUMBER 137 +#define NETBIOS_DGM_PORT_NUMBER 138 + +static int +AliasHandleUdpNbt(struct libalias *, struct ip *, struct alias_link *, + struct in_addr *, u_short); + +static int +AliasHandleUdpNbtNS(struct libalias *, struct ip *, struct alias_link *, + struct in_addr *, u_short *, struct in_addr *, u_short *); +static int +fingerprint1(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + +#ifdef VBOX + NOREF(la); + NOREF(pip); +#endif + if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || + ah->aaddr == NULL || ah->aport == NULL) + return (-1); + if (ntohs(*ah->dport) == NETBIOS_DGM_PORT_NUMBER + || ntohs(*ah->sport) == NETBIOS_DGM_PORT_NUMBER) + return (0); + return (-1); +} + +static int +protohandler1(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + AliasHandleUdpNbt(la, pip, ah->lnk, ah->aaddr, *ah->aport); + return (0); +} + +static int +fingerprint2(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + +#ifdef VBOX + NOREF(la); + NOREF(pip); +#endif + if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || + ah->aaddr == NULL || ah->aport == NULL) + return (-1); + if (ntohs(*ah->dport) == NETBIOS_NS_PORT_NUMBER + || ntohs(*ah->sport) == NETBIOS_NS_PORT_NUMBER) + return (0); + return (-1); +} + +static int +protohandler2in(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + AliasHandleUdpNbtNS(la, pip, ah->lnk, ah->aaddr, ah->aport, + ah->oaddr, ah->dport); + return (0); +} + +static int +protohandler2out(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + AliasHandleUdpNbtNS(la, pip, ah->lnk, &pip->ip_src, ah->sport, + ah->aaddr, ah->aport); + return (0); +} + +/* Kernel module definition. */ +#ifndef VBOX +struct proto_handler handlers[] = { + { + .pri = 130, + .dir = IN|OUT, + .proto = UDP, + .fingerprint = &fingerprint1, + .protohandler = &protohandler1 + }, + { + .pri = 140, + .dir = IN, + .proto = UDP, + .fingerprint = &fingerprint2, + .protohandler = &protohandler2in + }, + { + .pri = 140, + .dir = OUT, + .proto = UDP, + .fingerprint = &fingerprint2, + .protohandler = &protohandler2out + }, + { EOH } +}; +#else /* VBOX */ +#define handlers pData->nbt_module +#endif /* VBOX */ + +#ifndef VBOX +static int +mod_handler(module_t mod, int type, void *data) +#else /* VBOX */ +static int nbt_alias_handler(PNATState pData, int type); + +int +nbt_alias_load(PNATState pData) +{ + return nbt_alias_handler(pData, MOD_LOAD); +} + +int +nbt_alias_unload(PNATState pData) +{ + return nbt_alias_handler(pData, MOD_UNLOAD); +} +static int +nbt_alias_handler(PNATState pData, int type) +#endif /* VBOX */ +{ + int error; +#ifdef VBOX + if (handlers == NULL) + handlers = RTMemAllocZ(4 * sizeof(struct proto_handler)); + handlers[0].pri = 130; + handlers[0].dir = IN|OUT; + handlers[0].proto = UDP; + handlers[0].fingerprint = &fingerprint1; + handlers[0].protohandler = &protohandler1; + + + handlers[1].pri = 140; + handlers[1].dir = IN; + handlers[1].proto = UDP; + handlers[1].fingerprint = &fingerprint2; + handlers[1].protohandler = &protohandler2in; + + + handlers[2].pri = 140; + handlers[2].dir = OUT; + handlers[2].proto = UDP; + handlers[2].fingerprint = &fingerprint2; + handlers[2].protohandler = &protohandler2out; + + handlers[3].pri = (u_int)EOH; +#endif /* VBOX */ + + switch (type) { + case MOD_LOAD: + error = 0; +#ifdef VBOX + LibAliasAttachHandlers(pData, handlers); +#else + LibAliasAttachHandlers(handlers); +#endif + break; + case MOD_UNLOAD: + error = 0; +#ifdef VBOX + LibAliasDetachHandlers(pData, handlers); + RTMemFree(handlers); + handlers = NULL; +#else + LibAliasDetachHandlers(handlers); +#endif + break; + default: + error = EINVAL; + } + return (error); +} + +#ifndef VBOX +#ifdef _KERNEL +static +#endif +moduledata_t alias_mod = { + "alias_nbt", mod_handler, NULL +}; +#endif /* !VBOX */ + +#ifdef _KERNEL +DECLARE_MODULE(alias_nbt, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND); +MODULE_VERSION(alias_nbt, 1); +MODULE_DEPEND(alias_nbt, libalias, 1, 1, 1); +#endif + +typedef struct { + struct in_addr oldaddr; + u_short oldport; + struct in_addr newaddr; + u_short newport; + u_short *uh_sum; +} NBTArguments; + +typedef struct { + unsigned char type; + unsigned char flags; + u_short id; + struct in_addr source_ip; + u_short source_port; + u_short len; + u_short offset; +} NbtDataHeader; + +#define OpQuery 0 +#define OpUnknown 4 +#define OpRegist 5 +#define OpRelease 6 +#define OpWACK 7 +#define OpRefresh 8 +typedef struct { +#ifndef VBOX + u_short nametrid; + u_short dir: 1, opcode:4, nmflags:7, rcode:4; +#else + unsigned nametrid:16; + unsigned dir: 1, opcode:4, nmflags:7, rcode:4; +#endif + u_short qdcount; + u_short ancount; + u_short nscount; + u_short arcount; +} NbtNSHeader; +AssertCompileSize(NbtNSHeader, 12); + +#define FMT_ERR 0x1 +#define SRV_ERR 0x2 +#define IMP_ERR 0x4 +#define RFS_ERR 0x5 +#define ACT_ERR 0x6 +#define CFT_ERR 0x7 + + +#ifdef LIBALIAS_DEBUG +static void +PrintRcode(u_char rcode) +{ + + switch (rcode) { + case FMT_ERR: + printf("\nFormat Error."); + case SRV_ERR: + printf("\nSever failure."); + case IMP_ERR: + printf("\nUnsupported request error.\n"); + case RFS_ERR: + printf("\nRefused error.\n"); + case ACT_ERR: + printf("\nActive error.\n"); + case CFT_ERR: + printf("\nName in conflict error.\n"); + default: + printf("\n?%c?=%0x\n", '?', rcode); + + } +} + +#endif + + +/* Handling Name field */ +static u_char * +AliasHandleName(u_char * p, char *pmax) +{ + + u_char *s; +#ifdef LIBALIAS_DEBUG + u_char c; +#endif + int compress; + + /* Following length field */ + + if (p == NULL || (char *)p >= pmax) + return (NULL); + + if (*p & 0xc0) { + p = p + 2; + if ((char *)p > pmax) + return (NULL); + return ((u_char *) p); + } + while ((*p & 0x3f) != 0x00) { + s = p + 1; + if (*p == 0x20) + compress = 1; + else + compress = 0; + + /* Get next length field */ + p = (u_char *) (p + (*p & 0x3f) + 1); + if ((char *)p > pmax) { + p = NULL; + break; + } +#ifdef LIBALIAS_DEBUG + printf(":"); +#endif + while (s < p) { + if (compress == 1) { +#ifdef LIBALIAS_DEBUG + c = (u_char) (((((*s & 0x0f) << 4) | (*(s + 1) & 0x0f)) - 0x11)); + if (isprint(c)) + printf("%c", c); + else + printf("<0x%02x>", c); +#endif + s += 2; + } else { +#ifdef LIBALIAS_DEBUG + printf("%c", *s); +#endif + s++; + } + } +#ifdef LIBALIAS_DEBUG + printf(":"); + fflush(stdout); +#endif + } + + /* Set up to out of Name field */ + if (p == NULL || (char *)p >= pmax) + p = NULL; + else + p++; + return ((u_char *) p); +} + +/* + * NetBios Datagram Handler (IP/UDP) + */ +#define DGM_DIRECT_UNIQ 0x10 +#define DGM_DIRECT_GROUP 0x11 +#define DGM_BROADCAST 0x12 +#define DGM_ERROR 0x13 +#define DGM_QUERY 0x14 +#define DGM_POSITIVE_RES 0x15 +#define DGM_NEGATIVE_RES 0x16 + +static int +AliasHandleUdpNbt( + struct libalias *la, + struct ip *pip, /* IP packet to examine/patch */ + struct alias_link *lnk, + struct in_addr *alias_address, + u_short alias_port +) +{ + struct udphdr *uh; + NbtDataHeader *ndh; + u_char *p = NULL; + char *pmax; + + (void)la; + (void)lnk; + + /* Calculate data length of UDP packet */ + uh = (struct udphdr *)ip_next(pip); + pmax = (char *)uh + ntohs(uh->uh_ulen); + + /* IP header has been verified, cross-check uh_ulen */ + if (RT_UNLIKELY(pmax != (char *)pip + ntohs(pip->ip_len))) + return (-1); + + ndh = (NbtDataHeader *)udp_next(uh); + if ((char *)(ndh + 1) > pmax) + return (-1); +#ifdef LIBALIAS_DEBUG + printf("\nType=%02x,", ndh->type); +#endif + switch (ndh->type) { + case DGM_DIRECT_UNIQ: + case DGM_DIRECT_GROUP: + case DGM_BROADCAST: + p = (u_char *) ndh + 14; + p = AliasHandleName(p, pmax); /* Source Name */ + p = AliasHandleName(p, pmax); /* Destination Name */ + break; + case DGM_ERROR: + p = (u_char *) ndh + 11; + break; + case DGM_QUERY: + case DGM_POSITIVE_RES: + case DGM_NEGATIVE_RES: + p = (u_char *) ndh + 10; + p = AliasHandleName(p, pmax); /* Destination Name */ + break; + } + if (p == NULL || (char *)p > pmax) + p = NULL; +#ifdef LIBALIAS_DEBUG + printf("%s:%d-->", inet_ntoa(ndh->source_ip), ntohs(ndh->source_port)); +#endif + /* Doing an IP address and Port number Translation */ + if (uh->uh_sum != 0) { + int acc; + u_short *sptr; + + acc = ndh->source_port; + acc -= alias_port; + sptr = (u_short *) & (ndh->source_ip); + acc += *sptr++; + acc += *sptr; + sptr = (u_short *) alias_address; + acc -= *sptr++; + acc -= *sptr; + ADJUST_CHECKSUM(acc, uh->uh_sum); + } + ndh->source_ip = *alias_address; + ndh->source_port = alias_port; +#ifdef LIBALIAS_DEBUG + printf("%s:%d\n", inet_ntoa(ndh->source_ip), ntohs(ndh->source_port)); + fflush(stdout); +#endif + return ((p == NULL) ? -1 : 0); +} + +/* Question Section */ +#define QS_TYPE_NB 0x0020 +#define QS_TYPE_NBSTAT 0x0021 +#define QS_CLAS_IN 0x0001 +typedef struct { + u_short type; /* The type of Request */ + u_short class; /* The class of Request */ +} NBTNsQuestion; + +static u_char * +AliasHandleQuestion( + u_short count, + NBTNsQuestion * q, + char *pmax, + NBTArguments * nbtarg) +{ + + (void)nbtarg; + + while (count != 0) { + /* Name Filed */ + q = (NBTNsQuestion *) AliasHandleName((u_char *) q, pmax); + + if (q == NULL || (char *)(q + 1) > pmax) { + q = NULL; + break; + } + /* Type and Class filed */ + switch (ntohs(q->type)) { + case QS_TYPE_NB: + case QS_TYPE_NBSTAT: + q = q + 1; + break; + default: +#ifdef LIBALIAS_DEBUG + printf("\nUnknown Type on Question %0x\n", ntohs(q->type)); +#endif + break; + } + count--; + } + + /* Set up to out of Question Section */ + return ((u_char *) q); +} + +/* Resource Record */ +#define RR_TYPE_A 0x0001 +#define RR_TYPE_NS 0x0002 +#define RR_TYPE_NULL 0x000a +#define RR_TYPE_NB 0x0020 +#define RR_TYPE_NBSTAT 0x0021 +#define RR_CLAS_IN 0x0001 +#define SizeOfNsResource 8 +typedef struct { + u_short type; + u_short class; + unsigned int ttl; + u_short rdlen; +} NBTNsResource; + +#define SizeOfNsRNB 6 +typedef struct { +#ifndef VBOX + u_short g: 1 , ont:2, resv:13; +#else + unsigned g: 1 , ont:2, resv:13; +#endif + struct in_addr addr; +} NBTNsRNB; +AssertCompileSize(NBTNsRNB, 8); + +static u_char * +AliasHandleResourceNB( + NBTNsResource * q, + char *pmax, + NBTArguments * nbtarg) +{ + NBTNsRNB *nb; + u_short bcount; + + if (q == NULL || (char *)(q + 1) > pmax) + return (NULL); + /* Check out a length */ + bcount = ntohs(q->rdlen); + + /* Forward to Resource NB position */ + nb = (NBTNsRNB *) ((u_char *) q + SizeOfNsResource); + + /* Processing all in_addr array */ +#ifdef LIBALIAS_DEBUG + printf("NB rec[%s", inet_ntoa(nbtarg->oldaddr)); + printf("->%s, %dbytes] ", inet_ntoa(nbtarg->newaddr), bcount); +#endif + while (nb != NULL && bcount != 0) { + if ((char *)(nb + 1) > pmax) { + nb = NULL; + break; + } +#ifdef LIBALIAS_DEBUG + printf("<%s>", inet_ntoa(nb->addr)); +#endif + if (!bcmp(&nbtarg->oldaddr, &nb->addr, sizeof(struct in_addr))) { + if (*nbtarg->uh_sum != 0) { + int acc; + u_short *sptr; + + sptr = (u_short *) & (nb->addr); + acc = *sptr++; + acc += *sptr; + sptr = (u_short *) & (nbtarg->newaddr); + acc -= *sptr++; + acc -= *sptr; + ADJUST_CHECKSUM(acc, *nbtarg->uh_sum); + } + nb->addr = nbtarg->newaddr; +#ifdef LIBALIAS_DEBUG + printf("O"); +#endif + } +#ifdef LIBALIAS_DEBUG + else { + printf("."); + } +#endif + nb = (NBTNsRNB *) ((u_char *) nb + SizeOfNsRNB); + bcount -= SizeOfNsRNB; + } + if (nb == NULL || (char *)(nb + 1) > pmax) { + nb = NULL; + } + return ((u_char *) nb); +} + +#define SizeOfResourceA 6 +typedef struct { + struct in_addr addr; +} NBTNsResourceA; + +static u_char * +AliasHandleResourceA( + NBTNsResource * q, + char *pmax, + NBTArguments * nbtarg) +{ + NBTNsResourceA *a; + u_short bcount; + + if (q == NULL || (char *)(q + 1) > pmax) + return (NULL); + + /* Forward to Resource A position */ + a = (NBTNsResourceA *) ((u_char *) q + sizeof(NBTNsResource)); + + /* Check out of length */ + bcount = ntohs(q->rdlen); + + /* Processing all in_addr array */ +#ifdef LIBALIAS_DEBUG + printf("Arec [%s", inet_ntoa(nbtarg->oldaddr)); + printf("->%s]", inet_ntoa(nbtarg->newaddr)); +#endif + while (bcount != 0) { + if (a == NULL || (char *)(a + 1) > pmax) + return (NULL); +#ifdef LIBALIAS_DEBUG + printf("..%s", inet_ntoa(a->addr)); +#endif + if (!bcmp(&nbtarg->oldaddr, &a->addr, sizeof(struct in_addr))) { + if (*nbtarg->uh_sum != 0) { + int acc; + u_short *sptr; + + sptr = (u_short *) & (a->addr); /* Old */ + acc = *sptr++; + acc += *sptr; + sptr = (u_short *) & nbtarg->newaddr; /* New */ + acc -= *sptr++; + acc -= *sptr; + ADJUST_CHECKSUM(acc, *nbtarg->uh_sum); + } + a->addr = nbtarg->newaddr; + } + a++; /* XXXX */ + bcount -= SizeOfResourceA; + } + if (a == NULL || (char *)(a + 1) > pmax) + a = NULL; + return ((u_char *) a); +} + +typedef struct { +#ifndef VBOX + u_short opcode:4, flags:8, resv:4; +#else + u_short hidden; /* obviously not needed */ +#endif +} NBTNsResourceNULL; +AssertCompileSize(NBTNsResourceNULL, 2); + +static u_char * +AliasHandleResourceNULL( + NBTNsResource * q, + char *pmax, + NBTArguments * nbtarg) +{ + NBTNsResourceNULL *n; + u_short bcount; + + (void)nbtarg; + + if (q == NULL || (char *)(q + 1) > pmax) + return (NULL); + + /* Forward to Resource NULL position */ + n = (NBTNsResourceNULL *) ((u_char *) q + sizeof(NBTNsResource)); + + /* Check out of length */ + bcount = ntohs(q->rdlen); + + /* Processing all in_addr array */ + while (bcount != 0) { + if ((char *)(n + 1) > pmax) { + n = NULL; + break; + } + n++; + bcount -= sizeof(NBTNsResourceNULL); + } + if ((char *)(n + 1) > pmax) + n = NULL; + + return ((u_char *) n); +} + +static u_char * +AliasHandleResourceNS( + NBTNsResource * q, + char *pmax, + NBTArguments * nbtarg) +{ + NBTNsResourceNULL *n; + u_short bcount; + + (void)nbtarg; + + if (q == NULL || (char *)(q + 1) > pmax) + return (NULL); + + /* Forward to Resource NULL position */ + n = (NBTNsResourceNULL *) ((u_char *) q + sizeof(NBTNsResource)); + + /* Check out of length */ + bcount = ntohs(q->rdlen); + + /* Resource Record Name Filed */ + q = (NBTNsResource *) AliasHandleName((u_char *) n, pmax); /* XXX */ + + if (q == NULL || (char *)((u_char *) n + bcount) > pmax) + return (NULL); + else + return ((u_char *) n + bcount); +} + +typedef struct { + u_short numnames; +} NBTNsResourceNBSTAT; + +static u_char * +AliasHandleResourceNBSTAT( + NBTNsResource * q, + char *pmax, + NBTArguments * nbtarg) +{ + NBTNsResourceNBSTAT *n; + u_short bcount; + + (void)nbtarg; + + if (q == NULL || (char *)(q + 1) > pmax) + return (NULL); + + /* Forward to Resource NBSTAT position */ + n = (NBTNsResourceNBSTAT *) ((u_char *) q + sizeof(NBTNsResource)); + + /* Check out of length */ + bcount = ntohs(q->rdlen); + + if ((char *)((u_char *) n + bcount) > pmax) + return (NULL); + else + return ((u_char *) n + bcount); +} + +static u_char * +AliasHandleResource( + u_short count, + NBTNsResource * q, + char *pmax, + NBTArguments + * nbtarg) +{ + while (count != 0) { + /* Resource Record Name Filed */ + q = (NBTNsResource *) AliasHandleName((u_char *) q, pmax); + + if (q == NULL || (char *)(q + 1) > pmax) + break; +#ifdef LIBALIAS_DEBUG + printf("type=%02x, count=%d\n", ntohs(q->type), count); +#endif + + /* Type and Class filed */ + switch (ntohs(q->type)) { + case RR_TYPE_NB: + q = (NBTNsResource *) AliasHandleResourceNB( + q, + pmax, + nbtarg + ); + break; + case RR_TYPE_A: + q = (NBTNsResource *) AliasHandleResourceA( + q, + pmax, + nbtarg + ); + break; + case RR_TYPE_NS: + q = (NBTNsResource *) AliasHandleResourceNS( + q, + pmax, + nbtarg + ); + break; + case RR_TYPE_NULL: + q = (NBTNsResource *) AliasHandleResourceNULL( + q, + pmax, + nbtarg + ); + break; + case RR_TYPE_NBSTAT: + q = (NBTNsResource *) AliasHandleResourceNBSTAT( + q, + pmax, + nbtarg + ); + break; + default: +#ifdef LIBALIAS_DEBUG + printf( + "\nUnknown Type of Resource %0x\n", + ntohs(q->type) + ); + fflush(stdout); +#endif + break; + } + count--; + } + return ((u_char *) q); +} + +static int +AliasHandleUdpNbtNS( + struct libalias *la, + struct ip *pip, /* IP packet to examine/patch */ + struct alias_link *lnk, + struct in_addr *alias_address, + u_short * alias_port, + struct in_addr *original_address, + u_short * original_port) +{ + struct udphdr *uh; + NbtNSHeader *nsh; + u_char *p; + char *pmax; + NBTArguments nbtarg; + + (void)la; + (void)lnk; + + /* Set up Common Parameter */ + nbtarg.oldaddr = *alias_address; + nbtarg.oldport = *alias_port; + nbtarg.newaddr = *original_address; + nbtarg.newport = *original_port; + + /* Calculate data length of UDP packet */ + uh = (struct udphdr *)ip_next(pip); + nbtarg.uh_sum = &(uh->uh_sum); + nsh = (NbtNSHeader *)udp_next(uh); + p = (u_char *) (nsh + 1); + pmax = (char *)uh + ntohs(uh->uh_ulen); + + /* IP header has been verified, cross-check uh_ulen */ + if (RT_UNLIKELY(pmax != (char *)pip + ntohs(pip->ip_len))) + return (-1); + + if ((char *)(nsh + 1) > pmax) + return (-1); + +#ifdef LIBALIAS_DEBUG + printf(" [%s] ID=%02x, op=%01x, flag=%02x, rcode=%01x, qd=%04x" + ", an=%04x, ns=%04x, ar=%04x, [%d]-->", + nsh->dir ? "Response" : "Request", + nsh->nametrid, + nsh->opcode, + nsh->nmflags, + nsh->rcode, + ntohs(nsh->qdcount), + ntohs(nsh->ancount), + ntohs(nsh->nscount), + ntohs(nsh->arcount), + (u_char *) p - (u_char *) nsh + ); +#endif + + /* Question Entries */ + if (ntohs(nsh->qdcount) != 0) { + p = AliasHandleQuestion( + ntohs(nsh->qdcount), + (NBTNsQuestion *) p, + pmax, + &nbtarg + ); + } + /* Answer Resource Records */ + if (ntohs(nsh->ancount) != 0) { + p = AliasHandleResource( + ntohs(nsh->ancount), + (NBTNsResource *) p, + pmax, + &nbtarg + ); + } + /* Authority Resource Recodrs */ + if (ntohs(nsh->nscount) != 0) { + p = AliasHandleResource( + ntohs(nsh->nscount), + (NBTNsResource *) p, + pmax, + &nbtarg + ); + } + /* Additional Resource Recodrs */ + if (ntohs(nsh->arcount) != 0) { + p = AliasHandleResource( + ntohs(nsh->arcount), + (NBTNsResource *) p, + pmax, + &nbtarg + ); + } +#ifdef LIBALIAS_DEBUG + PrintRcode(nsh->rcode); +#endif + return ((p == NULL) ? -1 : 0); +} diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_old.c b/src/VBox/Devices/Network/slirp/libalias/alias_old.c new file mode 100644 index 00000000..7a85be1b --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_old.c @@ -0,0 +1,216 @@ +/*- + * Copyright (c) 2004 Poul-Henning Kamp <phk@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_old.c,v 1.8.8.1 2009/04/15 03:14:26 kensmith Exp $"); + +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/proc.h> +#else +#include <sys/types.h> +#include <stdlib.h> +#endif + +#include <netinet/in.h> + +#ifdef _KERNEL +#include <netinet/libalias/alias.h> +#else +#include "alias.h" +#endif + +/* + * These functions are for backwards compatibility and because apps may + * be linked against shlib versions, they have to be actual functions, + * we cannot inline them. + */ + +static struct libalias *la; + +void +PacketAliasInit(void) +{ + + la = LibAliasInit(la); +} + +void +PacketAliasSetAddress(struct in_addr _addr) +{ + + LibAliasSetAddress(la, _addr); +} + +void +PacketAliasSetFWBase(unsigned int _base, unsigned int _num) +{ + + LibAliasSetFWBase(la, _base, _num); +} + +void +PacketAliasSetSkinnyPort(unsigned int _port) +{ + + LibAliasSetSkinnyPort(la, _port); +} + +unsigned int +PacketAliasSetMode(unsigned int _flags, unsigned int _mask) +{ + + return LibAliasSetMode(la, _flags, _mask); +} + +void +PacketAliasUninit(void) +{ + + LibAliasUninit(la); + la = NULL; +} + +int +PacketAliasIn(char *_ptr, int _maxpacketsize) +{ + return LibAliasIn(la, _ptr, _maxpacketsize); +} + +int +PacketAliasOut(char *_ptr, int _maxpacketsize) +{ + + return LibAliasOut(la, _ptr, _maxpacketsize); +} + +int +PacketUnaliasOut(char *_ptr, int _maxpacketsize) +{ + + return LibAliasUnaliasOut(la, _ptr, _maxpacketsize); +} + +int +PacketAliasAddServer(struct alias_link *_lnk, + struct in_addr _addr, unsigned short _port) +{ + + return LibAliasAddServer(la, _lnk, _addr, _port); +} + +struct alias_link * +PacketAliasRedirectAddr(struct in_addr _src_addr, + struct in_addr _alias_addr) +{ + + return LibAliasRedirectAddr(la, _src_addr, _alias_addr); +} + + +int +PacketAliasRedirectDynamic(struct alias_link *_lnk) +{ + + return LibAliasRedirectDynamic(la, _lnk); +} + +void +PacketAliasRedirectDelete(struct alias_link *_lnk) +{ + + LibAliasRedirectDelete(la, _lnk); +} + +struct alias_link * +PacketAliasRedirectPort(struct in_addr _src_addr, + unsigned short _src_port, struct in_addr _dst_addr, + unsigned short _dst_port, struct in_addr _alias_addr, + unsigned short _alias_port, unsigned char _proto) +{ + + return LibAliasRedirectPort(la, _src_addr, _src_port, _dst_addr, + _dst_port, _alias_addr, _alias_port, _proto); +} + +struct alias_link * +PacketAliasRedirectProto(struct in_addr _src_addr, + struct in_addr _dst_addr, struct in_addr _alias_addr, + unsigned char _proto) +{ + + return LibAliasRedirectProto(la, _src_addr, _dst_addr, _alias_addr, + _proto); +} + +void +PacketAliasFragmentIn(char *_ptr, char *_ptr_fragment) +{ + + LibAliasFragmentIn(la, _ptr, _ptr_fragment); +} + +char * +PacketAliasGetFragment(char *_ptr) +{ + + return LibAliasGetFragment(la, _ptr); +} + +int +PacketAliasSaveFragment(char *_ptr) +{ + return LibAliasSaveFragment(la, _ptr); +} + +int +PacketAliasCheckNewLink(void) +{ + + return LibAliasCheckNewLink(la); +} + +unsigned short +PacketAliasInternetChecksum(unsigned short *_ptr, int _nbytes) +{ + + return LibAliasInternetChecksum(la, _ptr, _nbytes); +} + +void +PacketAliasSetTarget(struct in_addr _target_addr) +{ + + LibAliasSetTarget(la, _target_addr); +} + +/* Transparent proxying routines. */ +int +PacketAliasProxyRule(const char *_cmd) +{ + + return LibAliasProxyRule(la, _cmd); +} diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_pptp.c b/src/VBox/Devices/Network/slirp/libalias/alias_pptp.c new file mode 100644 index 00000000..601e87a2 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_pptp.c @@ -0,0 +1,523 @@ +/* + * alias_pptp.c + * + * Copyright (c) 2000 Whistle Communications, Inc. + * All rights reserved. + * + * Subject to the following obligations and disclaimer of warranty, use and + * redistribution of this software, in source or object code forms, with or + * without modifications are expressly permitted by Whistle Communications; + * provided, however, that: + * 1. Any and all reproductions of the source or object code must include the + * copyright notice above and the following disclaimer of warranties; and + * 2. No rights are granted, in any manner or form, to use Whistle + * Communications, Inc. trademarks, including the mark "WHISTLE + * COMMUNICATIONS" on advertising, endorsements, or otherwise except as + * such appears in the above copyright notice or in the software. + * + * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND + * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO + * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE, + * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. + * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY + * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS + * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE. + * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES + * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING + * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY + * OF SUCH DAMAGE. + * + * Author: Erik Salander <erik@whistle.com> + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_pptp.c,v 1.15.8.1 2009/04/15 03:14:26 kensmith Exp $"); + +/* Includes */ +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/limits.h> +#include <sys/kernel.h> +#include <sys/module.h> +#else +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <stdio.h> +#endif + +#include <netinet/tcp.h> + +#ifdef _KERNEL +#include <netinet/libalias/alias.h> +#include <netinet/libalias/alias_local.h> +#include <netinet/libalias/alias_mod.h> +#else +#include "alias.h" +#include "alias_local.h" +#include "alias_mod.h" +#endif + +#define PPTP_CONTROL_PORT_NUMBER 1723 + +static void +AliasHandlePptpOut(struct libalias *, struct ip *, struct alias_link *); + +static void +AliasHandlePptpIn(struct libalias *, struct ip *, struct alias_link *); + +static int +AliasHandlePptpGreOut(struct libalias *, struct ip *); + +static int +AliasHandlePptpGreIn(struct libalias *, struct ip *); + +static int +fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL) + return (-1); + if (ntohs(*ah->dport) == PPTP_CONTROL_PORT_NUMBER + || ntohs(*ah->sport) == PPTP_CONTROL_PORT_NUMBER) + return (0); + return (-1); +} + +static int +fingerprintgre(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + return (0); +} + +static int +protohandlerin(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + AliasHandlePptpIn(la, pip, ah->lnk); + return (0); +} + +static int +protohandlerout(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + AliasHandlePptpOut(la, pip, ah->lnk); + return (0); +} + +static int +protohandlergrein(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY || + AliasHandlePptpGreIn(la, pip) == 0) + return (0); + return (-1); +} + +static int +protohandlergreout(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + if (AliasHandlePptpGreOut(la, pip) == 0) + return (0); + return (-1); +} + +/* Kernel module definition. */ +struct proto_handler handlers[] = { + { + .pri = 200, + .dir = IN, + .proto = TCP, + .fingerprint = &fingerprint, + .protohandler = &protohandlerin + }, + { + .pri = 210, + .dir = OUT, + .proto = TCP, + .fingerprint = &fingerprint, + .protohandler = &protohandlerout + }, +/* + * WATCH OUT!!! these 2 handlers NEED a priority of INT_MAX (highest possible) + * cause they will ALWAYS process packets, so they must be the last one + * in chain: look fingerprintgre() above. + */ + { + .pri = INT_MAX, + .dir = IN, + .proto = IP, + .fingerprint = &fingerprintgre, + .protohandler = &protohandlergrein + }, + { + .pri = INT_MAX, + .dir = OUT, + .proto = IP, + .fingerprint = &fingerprintgre, + .protohandler = &protohandlergreout + }, + { EOH } +}; +static int +mod_handler(module_t mod, int type, void *data) +{ + int error; + + switch (type) { + case MOD_LOAD: + error = 0; + LibAliasAttachHandlers(handlers); + break; + case MOD_UNLOAD: + error = 0; + LibAliasDetachHandlers(handlers); + break; + default: + error = EINVAL; + } + return (error); +} + +#ifdef _KERNEL +static +#endif +moduledata_t alias_mod = { + "alias_pptp", mod_handler, NULL +}; + +#ifdef _KERNEL +DECLARE_MODULE(alias_pptp, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND); +MODULE_VERSION(alias_pptp, 1); +MODULE_DEPEND(alias_pptp, libalias, 1, 1, 1); +#endif + +/* + Alias_pptp.c performs special processing for PPTP sessions under TCP. + Specifically, watch PPTP control messages and alias the Call ID or the + Peer's Call ID in the appropriate messages. Note, PPTP requires + "de-aliasing" of incoming packets, this is different than any other + TCP applications that are currently (ie. FTP, IRC and RTSP) aliased. + + For Call IDs encountered for the first time, a PPTP alias link is created. + The PPTP alias link uses the Call ID in place of the original port number. + An alias Call ID is created. + + For this routine to work, the PPTP control messages must fit entirely + into a single TCP packet. This is typically the case, but is not + required by the spec. + + Unlike some of the other TCP applications that are aliased (ie. FTP, + IRC and RTSP), the PPTP control messages that need to be aliased are + guaranteed to remain the same length. The aliased Call ID is a fixed + length field. + + Reference: RFC 2637 + + Initial version: May, 2000 (eds) + +*/ + +/* + * PPTP definitions + */ + +struct grehdr { /* Enhanced GRE header. */ + u_int16_t gh_flags; /* Flags. */ + u_int16_t gh_protocol; /* Protocol type. */ + u_int16_t gh_length; /* Payload length. */ + u_int16_t gh_call_id; /* Call ID. */ + u_int32_t gh_seq_no; /* Sequence number (optional). */ + u_int32_t gh_ack_no; /* Acknowledgment number + * (optional). */ +}; +typedef struct grehdr GreHdr; + +/* The PPTP protocol ID used in the GRE 'proto' field. */ +#define PPTP_GRE_PROTO 0x880b + +/* Bits that must be set a certain way in all PPTP/GRE packets. */ +#define PPTP_INIT_VALUE ((0x2001 << 16) | PPTP_GRE_PROTO) +#define PPTP_INIT_MASK 0xef7fffff + +#define PPTP_MAGIC 0x1a2b3c4d +#define PPTP_CTRL_MSG_TYPE 1 + +enum { + PPTP_StartCtrlConnRequest = 1, + PPTP_StartCtrlConnReply = 2, + PPTP_StopCtrlConnRequest = 3, + PPTP_StopCtrlConnReply = 4, + PPTP_EchoRequest = 5, + PPTP_EchoReply = 6, + PPTP_OutCallRequest = 7, + PPTP_OutCallReply = 8, + PPTP_InCallRequest = 9, + PPTP_InCallReply = 10, + PPTP_InCallConn = 11, + PPTP_CallClearRequest = 12, + PPTP_CallDiscNotify = 13, + PPTP_WanErrorNotify = 14, + PPTP_SetLinkInfo = 15 +}; + + /* Message structures */ +struct pptpMsgHead { + u_int16_t length; /* total length */ + u_int16_t msgType;/* PPTP message type */ + u_int32_t magic; /* magic cookie */ + u_int16_t type; /* control message type */ + u_int16_t resv0; /* reserved */ +}; +typedef struct pptpMsgHead *PptpMsgHead; + +struct pptpCodes { + u_int8_t resCode;/* Result Code */ + u_int8_t errCode;/* Error Code */ +}; +typedef struct pptpCodes *PptpCode; + +struct pptpCallIds { + u_int16_t cid1; /* Call ID field #1 */ + u_int16_t cid2; /* Call ID field #2 */ +}; +typedef struct pptpCallIds *PptpCallId; + +static PptpCallId AliasVerifyPptp(struct ip *, u_int16_t *); + + +static void +AliasHandlePptpOut(struct libalias *la, + struct ip *pip, /* IP packet to examine/patch */ + struct alias_link *lnk) +{ /* The PPTP control link */ + struct alias_link *pptp_lnk; + PptpCallId cptr; + PptpCode codes; + u_int16_t ctl_type; /* control message type */ + struct tcphdr *tc; + + /* Verify valid PPTP control message */ + if ((cptr = AliasVerifyPptp(pip, &ctl_type)) == NULL) + return; + + /* Modify certain PPTP messages */ + switch (ctl_type) { + case PPTP_OutCallRequest: + case PPTP_OutCallReply: + case PPTP_InCallRequest: + case PPTP_InCallReply: + /* + * Establish PPTP link for address and Call ID found in + * control message. + */ + pptp_lnk = AddPptp(la, GetOriginalAddress(lnk), GetDestAddress(lnk), + GetAliasAddress(lnk), cptr->cid1); + break; + case PPTP_CallClearRequest: + case PPTP_CallDiscNotify: + /* + * Find PPTP link for address and Call ID found in control + * message. + */ + pptp_lnk = FindPptpOutByCallId(la, GetOriginalAddress(lnk), + GetDestAddress(lnk), + cptr->cid1); + break; + default: + return; + } + + if (pptp_lnk != NULL) { + int accumulate = cptr->cid1; + + /* alias the Call Id */ + cptr->cid1 = GetAliasPort(pptp_lnk); + + /* Compute TCP checksum for revised packet */ + tc = (struct tcphdr *)ip_next(pip); + accumulate -= cptr->cid1; + ADJUST_CHECKSUM(accumulate, tc->th_sum); + + switch (ctl_type) { + case PPTP_OutCallReply: + case PPTP_InCallReply: + codes = (PptpCode) (cptr + 1); + if (codes->resCode == 1) /* Connection + * established, */ + SetDestCallId(pptp_lnk, /* note the Peer's Call + * ID. */ + cptr->cid2); + else + SetExpire(pptp_lnk, 0); /* Connection refused. */ + break; + case PPTP_CallDiscNotify: /* Connection closed. */ + SetExpire(pptp_lnk, 0); + break; + } + } +} + +static void +AliasHandlePptpIn(struct libalias *la, + struct ip *pip, /* IP packet to examine/patch */ + struct alias_link *lnk) +{ /* The PPTP control link */ + struct alias_link *pptp_lnk; + PptpCallId cptr; + u_int16_t *pcall_id; + u_int16_t ctl_type; /* control message type */ + struct tcphdr *tc; + + /* Verify valid PPTP control message */ + if ((cptr = AliasVerifyPptp(pip, &ctl_type)) == NULL) + return; + + /* Modify certain PPTP messages */ + switch (ctl_type) { + case PPTP_InCallConn: + case PPTP_WanErrorNotify: + case PPTP_SetLinkInfo: + pcall_id = &cptr->cid1; + break; + case PPTP_OutCallReply: + case PPTP_InCallReply: + pcall_id = &cptr->cid2; + break; + case PPTP_CallDiscNotify: /* Connection closed. */ + pptp_lnk = FindPptpInByCallId(la, GetDestAddress(lnk), + GetAliasAddress(lnk), + cptr->cid1); + if (pptp_lnk != NULL) + SetExpire(pptp_lnk, 0); + return; + default: + return; + } + + /* Find PPTP link for address and Call ID found in PPTP Control Msg */ + pptp_lnk = FindPptpInByPeerCallId(la, GetDestAddress(lnk), + GetAliasAddress(lnk), + *pcall_id); + + if (pptp_lnk != NULL) { + int accumulate = *pcall_id; + + /* De-alias the Peer's Call Id. */ + *pcall_id = GetOriginalPort(pptp_lnk); + + /* Compute TCP checksum for modified packet */ + tc = (struct tcphdr *)ip_next(pip); + accumulate -= *pcall_id; + ADJUST_CHECKSUM(accumulate, tc->th_sum); + + if (ctl_type == PPTP_OutCallReply || ctl_type == PPTP_InCallReply) { + PptpCode codes = (PptpCode) (cptr + 1); + + if (codes->resCode == 1) /* Connection + * established, */ + SetDestCallId(pptp_lnk, /* note the Call ID. */ + cptr->cid1); + else + SetExpire(pptp_lnk, 0); /* Connection refused. */ + } + } +} + +static PptpCallId +AliasVerifyPptp(struct ip *pip, u_int16_t * ptype) +{ /* IP packet to examine/patch */ + int hlen, tlen, dlen; + PptpMsgHead hptr; + struct tcphdr *tc; + + /* Calculate some lengths */ + tc = (struct tcphdr *)ip_next(pip); + hlen = (pip->ip_hl + tc->th_off) << 2; + tlen = ntohs(pip->ip_len); + dlen = tlen - hlen; + + /* Verify data length */ + if (dlen < (int)(sizeof(struct pptpMsgHead) + sizeof(struct pptpCallIds))) + return (NULL); + + /* Move up to PPTP message header */ + hptr = (PptpMsgHead) tcp_next(tc); + + /* Return the control message type */ + *ptype = ntohs(hptr->type); + + /* Verify PPTP Control Message */ + if ((ntohs(hptr->msgType) != PPTP_CTRL_MSG_TYPE) || + (ntohl(hptr->magic) != PPTP_MAGIC)) + return (NULL); + + /* Verify data length. */ + if ((*ptype == PPTP_OutCallReply || *ptype == PPTP_InCallReply) && + (dlen < (int)(sizeof(struct pptpMsgHead) + sizeof(struct pptpCallIds) + + sizeof(struct pptpCodes)))) + return (NULL); + else + return (PptpCallId) (hptr + 1); +} + +static int +AliasHandlePptpGreOut(struct libalias *la, struct ip *pip) +{ + GreHdr *gr; + struct alias_link *lnk; + + gr = (GreHdr *) ip_next(pip); + + /* Check GRE header bits. */ + if ((ntohl(*((u_int32_t *) gr)) & PPTP_INIT_MASK) != PPTP_INIT_VALUE) + return (-1); + + lnk = FindPptpOutByPeerCallId(la, pip->ip_src, pip->ip_dst, gr->gh_call_id); + if (lnk != NULL) { + struct in_addr alias_addr = GetAliasAddress(lnk); + + /* Change source IP address. */ + DifferentialChecksum(&pip->ip_sum, + &alias_addr, &pip->ip_src, 2); + pip->ip_src = alias_addr; + } + return (0); +} + +static int +AliasHandlePptpGreIn(struct libalias *la, struct ip *pip) +{ + GreHdr *gr; + struct alias_link *lnk; + + gr = (GreHdr *) ip_next(pip); + + /* Check GRE header bits. */ + if ((ntohl(*((u_int32_t *) gr)) & PPTP_INIT_MASK) != PPTP_INIT_VALUE) + return (-1); + + lnk = FindPptpInByPeerCallId(la, pip->ip_src, pip->ip_dst, gr->gh_call_id); + if (lnk != NULL) { + struct in_addr src_addr = GetOriginalAddress(lnk); + + /* De-alias the Peer's Call Id. */ + gr->gh_call_id = GetOriginalPort(lnk); + + /* Restore original IP address. */ + DifferentialChecksum(&pip->ip_sum, + &src_addr, &pip->ip_dst, 2); + pip->ip_dst = src_addr; + } + return (0); +} diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_proxy.c b/src/VBox/Devices/Network/slirp/libalias/alias_proxy.c new file mode 100644 index 00000000..b49b726c --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_proxy.c @@ -0,0 +1,999 @@ +/*- + * Copyright (c) 2001 Charles Mott <cm@linktel.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef VBOX +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_proxy.c,v 1.31.8.1 2009/04/15 03:14:26 kensmith Exp $"); + +/* file: alias_proxy.c + + This file encapsulates special operations related to transparent + proxy redirection. This is where packets with a particular destination, + usually tcp port 80, are redirected to a proxy server. + + When packets are proxied, the destination address and port are + modified. In certain cases, it is necessary to somehow encode + the original address/port info into the packet. Two methods are + presently supported: addition of a [DEST addr port] string at the + beginning of a tcp stream, or inclusion of an optional field + in the IP header. + + There is one public API function: + + PacketAliasProxyRule() -- Adds and deletes proxy + rules. + + Rules are stored in a linear linked list, so lookup efficiency + won't be too good for large lists. + + + Initial development: April, 1998 (cjm) +*/ + + +/* System includes */ +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/ctype.h> +#include <sys/libkern.h> +#include <sys/limits.h> +#else +#include <sys/types.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <netdb.h> +#include <string.h> +#endif + +#include <netinet/tcp.h> + +#ifdef _KERNEL +#include <netinet/libalias/alias.h> +#include <netinet/libalias/alias_local.h> +#include <netinet/libalias/alias_mod.h> +#else +#include <arpa/inet.h> +#include "alias.h" /* Public API functions for libalias */ +#include "alias_local.h" /* Functions used by alias*.c */ +#endif +#else /* VBOX */ +# include <iprt/ctype.h> +# include <iprt/string.h> +# include <slirp.h> +# include "alias.h" /* Public API functions for libalias */ +# include "alias_local.h" /* Functions used by alias*.c */ +# define tolower(ch) RT_C_TO_LOWER(ch) +#endif /* VBOX */ + +/* + Data structures + */ + +/* + * A linked list of arbitrary length, based on struct proxy_entry is + * used to store proxy rules. + */ +struct proxy_entry { + struct libalias *la; +#define PROXY_TYPE_ENCODE_NONE 1 +#define PROXY_TYPE_ENCODE_TCPSTREAM 2 +#define PROXY_TYPE_ENCODE_IPHDR 3 + int rule_index; + int proxy_type; + u_char proto; + u_short proxy_port; + u_short server_port; + + struct in_addr server_addr; + + struct in_addr src_addr; + struct in_addr src_mask; + + struct in_addr dst_addr; + struct in_addr dst_mask; + + struct proxy_entry *next; + struct proxy_entry *last; +}; + + + +/* + File scope variables +*/ + + + +/* Local (static) functions: + + IpMask() -- Utility function for creating IP + masks from integer (1-32) specification. + IpAddr() -- Utility function for converting string + to IP address + IpPort() -- Utility function for converting string + to port number + RuleAdd() -- Adds an element to the rule list. + RuleDelete() -- Removes an element from the rule list. + RuleNumberDelete() -- Removes all elements from the rule list + having a certain rule number. + ProxyEncodeTcpStream() -- Adds [DEST x.x.x.x xxxx] to the beginning + of a TCP stream. + ProxyEncodeIpHeader() -- Adds an IP option indicating the true + destination of a proxied IP packet +*/ + +#ifdef _KERNEL /* XXX: can it be moved to libkern? */ +static int inet_aton(const char *cp, struct in_addr *addr); +#endif +static int IpMask(int, struct in_addr *); +static int IpAddr(char *, struct in_addr *); +static int IpPort(char *, int, int *); +static void RuleAdd(struct libalias *la, struct proxy_entry *); +static void RuleDelete(struct proxy_entry *); +static int RuleNumberDelete(struct libalias *la, int); +static void ProxyEncodeTcpStream(struct alias_link *, struct ip *, int); +static void ProxyEncodeIpHeader(struct ip *, int); + +#ifdef _KERNEL +static int +inet_aton(cp, addr) + const char *cp; + struct in_addr *addr; +{ + u_long parts[4]; + in_addr_t val; + const char *c; + char *endptr; + int gotend, n; + + c = (const char *)cp; + n = 0; + /* + * Run through the string, grabbing numbers until + * the end of the string, or some error + */ + gotend = 0; + while (!gotend) { + unsigned long l; + + l = strtoul(c, &endptr, 0); + + if (l == ULONG_MAX || (l == 0 && endptr == c)) + return (0); + + val = (in_addr_t)l; + /* + * If the whole string is invalid, endptr will equal + * c.. this way we can make sure someone hasn't + * gone '.12' or something which would get past + * the next check. + */ + if (endptr == c) + return (0); + parts[n] = val; + c = endptr; + + /* Check the next character past the previous number's end */ + switch (*c) { + case '.' : + /* Make sure we only do 3 dots .. */ + if (n == 3) /* Whoops. Quit. */ + return (0); + n++; + c++; + break; + + case '\0': + gotend = 1; + break; + + default: + if (isspace((unsigned char)*c)) { + gotend = 1; + break; + } else + return (0); /* Invalid character, so fail */ + } + + } + + /* + * Concoct the address according to + * the number of parts specified. + */ + + switch (n) { + case 0: /* a -- 32 bits */ + /* + * Nothing is necessary here. Overflow checking was + * already done in strtoul(). + */ + break; + case 1: /* a.b -- 8.24 bits */ + if (val > 0xffffff || parts[0] > 0xff) + return (0); + val |= parts[0] << 24; + break; + + case 2: /* a.b.c -- 8.8.16 bits */ + if (val > 0xffff || parts[0] > 0xff || parts[1] > 0xff) + return (0); + val |= (parts[0] << 24) | (parts[1] << 16); + break; + + case 3: /* a.b.c.d -- 8.8.8.8 bits */ + if (val > 0xff || parts[0] > 0xff || parts[1] > 0xff || + parts[2] > 0xff) + return (0); + val |= (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8); + break; + } + + if (addr != NULL) + addr->s_addr = htonl(val); + return (1); +} +#endif + +static int +IpMask(int nbits, struct in_addr *mask) +{ + int i; + u_int imask; + + if (nbits < 0 || nbits > 32) + return (-1); + + imask = 0; + for (i = 0; i < nbits; i++) + imask = (imask >> 1) + 0x80000000; + mask->s_addr = htonl(imask); + + return (0); +} + +static int +IpAddr(char *s, struct in_addr *addr) +{ + if (inet_aton(s, addr) == 0) + return (-1); + else + return (0); +} + +static int +IpPort(char *s, int proto, int *port) +{ + int n; + + n = sscanf(s, "%d", port); + if (n != 1) +#ifndef _KERNEL /* XXX: we accept only numeric ports in kernel */ + { + struct servent *se; + + if (proto == IPPROTO_TCP) + se = getservbyname(s, "tcp"); + else if (proto == IPPROTO_UDP) + se = getservbyname(s, "udp"); + else + return (-1); + + if (se == NULL) + return (-1); + + *port = (u_int) ntohs(se->s_port); + } +#else + return (-1); +#endif + return (0); +} + +void +RuleAdd(struct libalias *la, struct proxy_entry *entry) +{ + int rule_index; + struct proxy_entry *ptr; + struct proxy_entry *ptr_last; + + LIBALIAS_LOCK_ASSERT(la); + + if (la->proxyList == NULL) { + la->proxyList = entry; + entry->last = NULL; + entry->next = NULL; + return; + } + entry->la = la; + + rule_index = entry->rule_index; + ptr = la->proxyList; + ptr_last = NULL; + while (ptr != NULL) { + if (ptr->rule_index >= rule_index) { + if (ptr_last == NULL) { + entry->next = la->proxyList; + entry->last = NULL; + la->proxyList->last = entry; + la->proxyList = entry; + return; + } + ptr_last->next = entry; + ptr->last = entry; + entry->last = ptr->last; + entry->next = ptr; + return; + } + ptr_last = ptr; + ptr = ptr->next; + } + + ptr_last->next = entry; + entry->last = ptr_last; + entry->next = NULL; +} + +static void +RuleDelete(struct proxy_entry *entry) +{ + struct libalias *la; + + la = entry->la; + LIBALIAS_LOCK_ASSERT(la); + if (entry->last != NULL) + entry->last->next = entry->next; + else + la->proxyList = entry->next; + + if (entry->next != NULL) + entry->next->last = entry->last; + + free(entry); +} + +static int +RuleNumberDelete(struct libalias *la, int rule_index) +{ + int err; + struct proxy_entry *ptr; + + LIBALIAS_LOCK_ASSERT(la); + err = -1; + ptr = la->proxyList; + while (ptr != NULL) { + struct proxy_entry *ptr_next; + + ptr_next = ptr->next; + if (ptr->rule_index == rule_index) { + err = 0; + RuleDelete(ptr); + } + ptr = ptr_next; + } + + return (err); +} + +static void +ProxyEncodeTcpStream(struct alias_link *lnk, + struct ip *pip, + int maxpacketsize) +{ + int slen; + char buffer[40]; + struct tcphdr *tc; + +/* Compute pointer to tcp header */ + tc = (struct tcphdr *)ip_next(pip); + +/* Don't modify if once already modified */ + + if (GetAckModified(lnk)) + return; + +/* Translate destination address and port to string form */ +#ifndef VBOX + snprintf(buffer, sizeof(buffer) - 2, "[DEST %s %d]", + inet_ntoa(GetProxyAddress(lnk)), (u_int) ntohs(GetProxyPort(lnk))); +#else + RTStrPrintf(buffer, sizeof(buffer) - 2, "[DEST %s %d]", + inet_ntoa(GetProxyAddress(lnk)), (u_int) ntohs(GetProxyPort(lnk))); +#endif + +/* Pad string out to a multiple of two in length */ + slen = (int)strlen(buffer); + switch (slen % 2) { + case 0: + strcat(buffer, " \n"); + slen += 2; + break; + case 1: + strcat(buffer, "\n"); + slen += 1; + } + +/* Check for packet overflow */ + if ((int)(ntohs(pip->ip_len) + strlen(buffer)) > maxpacketsize) + return; + +/* Shift existing TCP data and insert destination string */ + { + int dlen; + int hlen; + char *p; + + hlen = (pip->ip_hl + tc->th_off) << 2; + dlen = ntohs(pip->ip_len) - hlen; + +/* Modify first packet that has data in it */ + + if (dlen == 0) + return; + + p = (char *)pip; + p += hlen; + + bcopy(p, p + slen, dlen); + memcpy(p, buffer, slen); + } + +/* Save information about modfied sequence number */ + { + int delta; + + SetAckModified(lnk); + delta = GetDeltaSeqOut(pip, lnk); + AddSeq(pip, lnk, delta + slen); + } + +/* Update IP header packet length and checksum */ + { + int accumulate; + + accumulate = pip->ip_len; + pip->ip_len = htons(ntohs(pip->ip_len) + slen); + accumulate -= pip->ip_len; + + ADJUST_CHECKSUM(accumulate, pip->ip_sum); + } + +/* Update TCP checksum, Use TcpChecksum since so many things have + already changed. */ + + tc->th_sum = 0; +#ifdef _KERNEL + tc->th_x2 = 1; +#else + tc->th_sum = TcpChecksum(pip); +#endif +} + +static void +ProxyEncodeIpHeader(struct ip *pip, + int maxpacketsize) +{ +#define OPTION_LEN_BYTES 8 +#define OPTION_LEN_INT16 4 +#define OPTION_LEN_INT32 2 + u_char option[OPTION_LEN_BYTES]; + +#ifdef LIBALIAS_DEBUG + fprintf(stdout, " ip cksum 1 = %x\n", (u_int) IpChecksum(pip)); + fprintf(stdout, "tcp cksum 1 = %x\n", (u_int) TcpChecksum(pip)); +#endif + + (void)maxpacketsize; + +/* Check to see that there is room to add an IP option */ + if (pip->ip_hl > (0x0f - OPTION_LEN_INT32)) + return; + +/* Build option and copy into packet */ + { + u_char *ptr; + struct tcphdr *tc; + + ptr = (u_char *) pip; + ptr += 20; + memcpy(ptr + OPTION_LEN_BYTES, ptr, ntohs(pip->ip_len) - 20); + + option[0] = 0x64; /* class: 3 (reserved), option 4 */ + option[1] = OPTION_LEN_BYTES; + + memcpy(&option[2], (u_char *) & pip->ip_dst, 4); + + tc = (struct tcphdr *)ip_next(pip); + memcpy(&option[6], (u_char *) & tc->th_sport, 2); + + memcpy(ptr, option, 8); + } + +/* Update checksum, header length and packet length */ + { + int i; + int accumulate; + u_short *sptr; + + sptr = (u_short *) option; + accumulate = 0; + for (i = 0; i < OPTION_LEN_INT16; i++) + accumulate -= *(sptr++); + + sptr = (u_short *) pip; + accumulate += *sptr; + pip->ip_hl += OPTION_LEN_INT32; + accumulate -= *sptr; + + accumulate += pip->ip_len; + pip->ip_len = htons(ntohs(pip->ip_len) + OPTION_LEN_BYTES); + accumulate -= pip->ip_len; + + ADJUST_CHECKSUM(accumulate, pip->ip_sum); + } +#undef OPTION_LEN_BYTES +#undef OPTION_LEN_INT16 +#undef OPTION_LEN_INT32 +#ifdef LIBALIAS_DEBUG + fprintf(stdout, " ip cksum 2 = %x\n", (u_int) IpChecksum(pip)); + fprintf(stdout, "tcp cksum 2 = %x\n", (u_int) TcpChecksum(pip)); +#endif +} + + +/* Functions by other packet alias source files + + ProxyCheck() -- Checks whether an outgoing packet should + be proxied. + ProxyModify() -- Encodes the original destination address/port + for a packet which is to be redirected to + a proxy server. +*/ + +int +ProxyCheck(struct libalias *la, struct ip *pip, + struct in_addr *proxy_server_addr, + u_short * proxy_server_port) +{ + u_short dst_port; + struct in_addr src_addr; + struct in_addr dst_addr; + struct proxy_entry *ptr; + + LIBALIAS_LOCK_ASSERT(la); + src_addr = pip->ip_src; + dst_addr = pip->ip_dst; + dst_port = ((struct tcphdr *)ip_next(pip)) + ->th_dport; + + ptr = la->proxyList; + while (ptr != NULL) { + u_short proxy_port; + + proxy_port = ptr->proxy_port; + if ((dst_port == proxy_port || proxy_port == 0) + && pip->ip_p == ptr->proto + && src_addr.s_addr != ptr->server_addr.s_addr) { + struct in_addr src_addr_masked; + struct in_addr dst_addr_masked; + + src_addr_masked.s_addr = src_addr.s_addr & ptr->src_mask.s_addr; + dst_addr_masked.s_addr = dst_addr.s_addr & ptr->dst_mask.s_addr; + + if ((src_addr_masked.s_addr == ptr->src_addr.s_addr) + && (dst_addr_masked.s_addr == ptr->dst_addr.s_addr)) { + if ((*proxy_server_port = ptr->server_port) == 0) + *proxy_server_port = dst_port; + *proxy_server_addr = ptr->server_addr; + return (ptr->proxy_type); + } + } + ptr = ptr->next; + } + + return (0); +} + +void +ProxyModify(struct libalias *la, struct alias_link *lnk, + struct ip *pip, + int maxpacketsize, + int proxy_type) +{ + + LIBALIAS_LOCK_ASSERT(la); + (void)la; + + switch (proxy_type) { + case PROXY_TYPE_ENCODE_IPHDR: + ProxyEncodeIpHeader(pip, maxpacketsize); + break; + + case PROXY_TYPE_ENCODE_TCPSTREAM: + ProxyEncodeTcpStream(lnk, pip, maxpacketsize); + break; + } +} + + +/* + Public API functions +*/ + +int +LibAliasProxyRule(struct libalias *la, const char *cmd) +{ +/* + * This function takes command strings of the form: + * + * server <addr>[:<port>] + * [port <port>] + * [rule n] + * [proto tcp|udp] + * [src <addr>[/n]] + * [dst <addr>[/n]] + * [type encode_tcp_stream|encode_ip_hdr|no_encode] + * + * delete <rule number> + * + * Subfields can be in arbitrary order. Port numbers and addresses + * must be in either numeric or symbolic form. An optional rule number + * is used to control the order in which rules are searched. If two + * rules have the same number, then search order cannot be guaranteed, + * and the rules should be disjoint. If no rule number is specified, + * then 0 is used, and group 0 rules are always checked before any + * others. + */ + int i, n, len, ret; + int cmd_len; + int token_count; + int state; + char *token; + char buffer[256]; + char str_port[sizeof(buffer)]; + char str_server_port[sizeof(buffer)]; + char *res = buffer; + + int rule_index; + int proto; + int proxy_type; + int proxy_port; + int server_port; + struct in_addr server_addr; + struct in_addr src_addr, src_mask; + struct in_addr dst_addr, dst_mask; + struct proxy_entry *proxy_entry; + + LIBALIAS_LOCK(la); + ret = 0; +/* Copy command line into a buffer */ + cmd += strspn(cmd, " \t"); + cmd_len = (int)strlen(cmd); + if (cmd_len > (int)(sizeof(buffer) - 1)) { + ret = -1; + goto getout; + } + strcpy(buffer, cmd); + +/* Convert to lower case */ + len = (int)strlen(buffer); + for (i = 0; i < len; i++) + buffer[i] = tolower((unsigned char)buffer[i]); + +/* Set default proxy type */ + +/* Set up default values */ + rule_index = 0; + proxy_type = PROXY_TYPE_ENCODE_NONE; + proto = IPPROTO_TCP; + proxy_port = 0; + server_addr.s_addr = 0; + server_port = 0; + src_addr.s_addr = 0; + IpMask(0, &src_mask); + dst_addr.s_addr = 0; + IpMask(0, &dst_mask); + + str_port[0] = 0; + str_server_port[0] = 0; + +/* Parse command string with state machine */ +#define STATE_READ_KEYWORD 0 +#define STATE_READ_TYPE 1 +#define STATE_READ_PORT 2 +#define STATE_READ_SERVER 3 +#define STATE_READ_RULE 4 +#define STATE_READ_DELETE 5 +#define STATE_READ_PROTO 6 +#define STATE_READ_SRC 7 +#define STATE_READ_DST 8 + state = STATE_READ_KEYWORD; +#ifndef VBOX + token = strsep(&res, " \t"); +#else + token = RTStrStr(res, " \t"); +#endif + token_count = 0; + while (token != NULL) { + token_count++; + switch (state) { + case STATE_READ_KEYWORD: + if (strcmp(token, "type") == 0) + state = STATE_READ_TYPE; + else if (strcmp(token, "port") == 0) + state = STATE_READ_PORT; + else if (strcmp(token, "server") == 0) + state = STATE_READ_SERVER; + else if (strcmp(token, "rule") == 0) + state = STATE_READ_RULE; + else if (strcmp(token, "delete") == 0) + state = STATE_READ_DELETE; + else if (strcmp(token, "proto") == 0) + state = STATE_READ_PROTO; + else if (strcmp(token, "src") == 0) + state = STATE_READ_SRC; + else if (strcmp(token, "dst") == 0) + state = STATE_READ_DST; + else { + ret = -1; + goto getout; + } + break; + + case STATE_READ_TYPE: + if (strcmp(token, "encode_ip_hdr") == 0) + proxy_type = PROXY_TYPE_ENCODE_IPHDR; + else if (strcmp(token, "encode_tcp_stream") == 0) + proxy_type = PROXY_TYPE_ENCODE_TCPSTREAM; + else if (strcmp(token, "no_encode") == 0) + proxy_type = PROXY_TYPE_ENCODE_NONE; + else { + ret = -1; + goto getout; + } + state = STATE_READ_KEYWORD; + break; + + case STATE_READ_PORT: + strcpy(str_port, token); + state = STATE_READ_KEYWORD; + break; + + case STATE_READ_SERVER: + { + int err; + char *p; + char s[sizeof(buffer)]; + + p = token; + while (*p != ':' && *p != 0) + p++; + + if (*p != ':') { + err = IpAddr(token, &server_addr); + if (err) { + ret = -1; + goto getout; + } + } else { + *p = ' '; + + n = sscanf(token, "%255s %255s", s, str_server_port); + if (n != 2) { + ret = -1; + goto getout; + } + + err = IpAddr(s, &server_addr); + if (err) { + ret = -1; + goto getout; + } + } + } + state = STATE_READ_KEYWORD; + break; + + case STATE_READ_RULE: + n = sscanf(token, "%d", &rule_index); + if (n != 1 || rule_index < 0) { + ret = -1; + goto getout; + } + state = STATE_READ_KEYWORD; + break; + + case STATE_READ_DELETE: + { + int err; + int rule_to_delete; + + if (token_count != 2) { + ret = -1; + goto getout; + } + + n = sscanf(token, "%d", &rule_to_delete); + if (n != 1) { + ret = -1; + goto getout; + } + err = RuleNumberDelete(la, rule_to_delete); + if (err) + ret = -1; + ret = 0; + goto getout; + } + + case STATE_READ_PROTO: + if (strcmp(token, "tcp") == 0) + proto = IPPROTO_TCP; + else if (strcmp(token, "udp") == 0) + proto = IPPROTO_UDP; + else { + ret = -1; + goto getout; + } + state = STATE_READ_KEYWORD; + break; + + case STATE_READ_SRC: + case STATE_READ_DST: + { + int err; + char *p; + struct in_addr mask; + struct in_addr addr; + + p = token; + while (*p != '/' && *p != 0) + p++; + + if (*p != '/') { + IpMask(32, &mask); + err = IpAddr(token, &addr); + if (err) { + ret = -1; + goto getout; + } + } else { + int nbits; + char s[sizeof(buffer)]; + + *p = ' '; + n = sscanf(token, "%255s %d", s, &nbits); + if (n != 2) { + ret = -1; + goto getout; + } + + err = IpAddr(s, &addr); + if (err) { + ret = -1; + goto getout; + } + + err = IpMask(nbits, &mask); + if (err) { + ret = -1; + goto getout; + } + } + + if (state == STATE_READ_SRC) { + src_addr = addr; + src_mask = mask; + } else { + dst_addr = addr; + dst_mask = mask; + } + } + state = STATE_READ_KEYWORD; + break; + + default: + ret = -1; + goto getout; + break; + } + + do { +#ifndef VBOX + token = strsep(&res, " \t"); +#else + token = RTStrStr(res, " \t"); +#endif + } while (token != NULL && !*token); + } +#undef STATE_READ_KEYWORD +#undef STATE_READ_TYPE +#undef STATE_READ_PORT +#undef STATE_READ_SERVER +#undef STATE_READ_RULE +#undef STATE_READ_DELETE +#undef STATE_READ_PROTO +#undef STATE_READ_SRC +#undef STATE_READ_DST + +/* Convert port strings to numbers. This needs to be done after + the string is parsed, because the prototype might not be designated + before the ports (which might be symbolic entries in /etc/services) */ + + if (strlen(str_port) != 0) { + int err; + + err = IpPort(str_port, proto, &proxy_port); + if (err) { + ret = -1; + goto getout; + } + } else { + proxy_port = 0; + } + + if (strlen(str_server_port) != 0) { + int err; + + err = IpPort(str_server_port, proto, &server_port); + if (err) { + ret = -1; + goto getout; + } + } else { + server_port = 0; + } + +/* Check that at least the server address has been defined */ + if (server_addr.s_addr == 0) { + ret = -1; + goto getout; + } + +/* Add to linked list */ + proxy_entry = malloc(sizeof(struct proxy_entry)); + if (proxy_entry == NULL) { + ret = -1; + goto getout; + } + + proxy_entry->proxy_type = proxy_type; + proxy_entry->rule_index = rule_index; + proxy_entry->proto = proto; + proxy_entry->proxy_port = htons(proxy_port); + proxy_entry->server_port = htons(server_port); + proxy_entry->server_addr = server_addr; + proxy_entry->src_addr.s_addr = src_addr.s_addr & src_mask.s_addr; + proxy_entry->dst_addr.s_addr = dst_addr.s_addr & dst_mask.s_addr; + proxy_entry->src_mask = src_mask; + proxy_entry->dst_mask = dst_mask; + + RuleAdd(la, proxy_entry); + +getout: + LIBALIAS_UNLOCK(la); + return (ret); +} diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_skinny.c b/src/VBox/Devices/Network/slirp/libalias/alias_skinny.c new file mode 100644 index 00000000..4b9bab26 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_skinny.c @@ -0,0 +1,447 @@ +/*- + * alias_skinny.c + * + * Copyright (c) 2002, 2003 MarcusCom, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Joe Marcus Clarke <marcus@FreeBSD.org> + * + * $FreeBSD: src/sys/netinet/libalias/alias_skinny.c,v 1.14.8.1 2009/04/15 03:14:26 kensmith Exp $ + */ + +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/module.h> +#else +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#endif + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/tcp.h> + +#ifdef _KERNEL +#include <netinet/libalias/alias_local.h> +#include <netinet/libalias/alias_mod.h> +#else +#include "alias_local.h" +#include "alias_mod.h" +#endif + +static void +AliasHandleSkinny(struct libalias *, struct ip *, struct alias_link *); + +static int +fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL) + return (-1); + if (la->skinnyPort != 0 && (ntohs(*ah->sport) == la->skinnyPort || + ntohs(*ah->dport) == la->skinnyPort)) + return (0); + return (-1); +} + +static int +protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + AliasHandleSkinny(la, pip, ah->lnk); + return (0); +} + +struct proto_handler handlers[] = { + { + .pri = 110, + .dir = IN|OUT, + .proto = TCP, + .fingerprint = &fingerprint, + .protohandler = &protohandler + }, + { EOH } +}; + +static int +mod_handler(module_t mod, int type, void *data) +{ + int error; + + switch (type) { + case MOD_LOAD: + error = 0; + LibAliasAttachHandlers(handlers); + break; + case MOD_UNLOAD: + error = 0; + LibAliasDetachHandlers(handlers); + break; + default: + error = EINVAL; + } + return (error); +} + +#ifdef _KERNEL +static +#endif +moduledata_t alias_mod = { + "alias_skinny", mod_handler, NULL +}; + +#ifdef _KERNEL +DECLARE_MODULE(alias_skinny, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND); +MODULE_VERSION(alias_skinny, 1); +MODULE_DEPEND(alias_skinny, libalias, 1, 1, 1); +#endif + +/* + * alias_skinny.c handles the translation for the Cisco Skinny Station + * protocol. Skinny typically uses TCP port 2000 to set up calls between + * a Cisco Call Manager and a Cisco IP phone. When a phone comes on line, + * it first needs to register with the Call Manager. To do this it sends + * a registration message. This message contains the IP address of the + * IP phone. This message must then be translated to reflect our global + * IP address. Along with the registration message (and usually in the + * same packet), the phone sends an IP port message. This message indicates + * the TCP port over which it will communicate. + * + * When a call is placed from the phone, the Call Manager will send an + * Open Receive Channel message to the phone to let the caller know someone + * has answered. The phone then sends back an Open Receive Channel + * Acknowledgement. In this packet, the phone sends its IP address again, + * and the UDP port over which the voice traffic should flow. These values + * need translation. Right after the Open Receive Channel Acknowledgement, + * the Call Manager sends a Start Media Transmission message indicating the + * call is connected. This message contains the IP address and UDP port + * number of the remote (called) party. Once this message is translated, the + * call can commence. The called part sends the first UDP packet to the + * calling phone at the pre-arranged UDP port in the Open Receive Channel + * Acknowledgement. + * + * Skinny is a Cisco-proprietary protocol and is a trademark of Cisco Systems, + * Inc. All rights reserved. +*/ + +/* #define LIBALIAS_DEBUG 1 */ + +/* Message types that need translating */ +#define REG_MSG 0x00000001 +#define IP_PORT_MSG 0x00000002 +#define OPNRCVCH_ACK 0x00000022 +#define START_MEDIATX 0x0000008a + +struct skinny_header { + u_int32_t len; + u_int32_t reserved; + u_int32_t msgId; +}; + +struct RegisterMessage { + u_int32_t msgId; + char devName [16]; + u_int32_t uid; + u_int32_t instance; + u_int32_t ipAddr; + u_char devType; + u_int32_t maxStreams; +}; + +struct IpPortMessage { + u_int32_t msgId; + u_int32_t stationIpPort; /* Note: Skinny uses 32-bit port + * numbers */ +}; + +struct OpenReceiveChannelAck { + u_int32_t msgId; + u_int32_t status; + u_int32_t ipAddr; + u_int32_t port; + u_int32_t passThruPartyID; +}; + +struct StartMediaTransmission { + u_int32_t msgId; + u_int32_t conferenceID; + u_int32_t passThruPartyID; + u_int32_t remoteIpAddr; + u_int32_t remotePort; + u_int32_t MSPacket; + u_int32_t payloadCap; + u_int32_t precedence; + u_int32_t silenceSuppression; + u_short maxFramesPerPacket; + u_int32_t G723BitRate; +}; + +typedef enum { + ClientToServer = 0, + ServerToClient = 1 +} ConvDirection; + + +static int +alias_skinny_reg_msg(struct RegisterMessage *reg_msg, struct ip *pip, + struct tcphdr *tc, struct alias_link *lnk, + ConvDirection direction) +{ + (void)direction; + + reg_msg->ipAddr = (u_int32_t) GetAliasAddress(lnk).s_addr; + + tc->th_sum = 0; +#ifdef _KERNEL + tc->th_x2 = 1; +#else + tc->th_sum = TcpChecksum(pip); +#endif + + return (0); +} + +static int +alias_skinny_startmedia(struct StartMediaTransmission *start_media, + struct ip *pip, struct tcphdr *tc, + struct alias_link *lnk, u_int32_t localIpAddr, + ConvDirection direction) +{ + struct in_addr dst, src; + + (void)pip; + (void)tc; + (void)lnk; + (void)direction; + + dst.s_addr = start_media->remoteIpAddr; + src.s_addr = localIpAddr; + + /* + * XXX I should probably handle in bound global translations as + * well. + */ + + return (0); +} + +static int +alias_skinny_port_msg(struct IpPortMessage *port_msg, struct ip *pip, + struct tcphdr *tc, struct alias_link *lnk, + ConvDirection direction) +{ + (void)direction; + + port_msg->stationIpPort = (u_int32_t) ntohs(GetAliasPort(lnk)); + + tc->th_sum = 0; +#ifdef _KERNEL + tc->th_x2 = 1; +#else + tc->th_sum = TcpChecksum(pip); +#endif + return (0); +} + +static int +alias_skinny_opnrcvch_ack(struct libalias *la, struct OpenReceiveChannelAck *opnrcvch_ack, + struct ip *pip, struct tcphdr *tc, + struct alias_link *lnk, u_int32_t * localIpAddr, + ConvDirection direction) +{ + struct in_addr null_addr; + struct alias_link *opnrcv_lnk; + u_int32_t localPort; + + (void)lnk; + (void)direction; + + *localIpAddr = (u_int32_t) opnrcvch_ack->ipAddr; + localPort = opnrcvch_ack->port; + + null_addr.s_addr = INADDR_ANY; + opnrcv_lnk = FindUdpTcpOut(la, pip->ip_src, null_addr, + htons((u_short) opnrcvch_ack->port), 0, + IPPROTO_UDP, 1); + opnrcvch_ack->ipAddr = (u_int32_t) GetAliasAddress(opnrcv_lnk).s_addr; + opnrcvch_ack->port = (u_int32_t) ntohs(GetAliasPort(opnrcv_lnk)); + + tc->th_sum = 0; +#ifdef _KERNEL + tc->th_x2 = 1; +#else + tc->th_sum = TcpChecksum(pip); +#endif + return (0); +} + +static void +AliasHandleSkinny(struct libalias *la, struct ip *pip, struct alias_link *lnk) +{ + size_t hlen, tlen, dlen; + struct tcphdr *tc; + u_int32_t msgId, t, len, lip; + struct skinny_header *sd; + size_t orig_len, skinny_hdr_len = sizeof(struct skinny_header); + ConvDirection direction; + + lip = -1; + tc = (struct tcphdr *)ip_next(pip); + hlen = (pip->ip_hl + tc->th_off) << 2; + tlen = ntohs(pip->ip_len); + dlen = tlen - hlen; + + sd = (struct skinny_header *)tcp_next(tc); + + /* + * XXX This direction is reserved for future use. I still need to + * handle the scenario where the call manager is on the inside, and + * the calling phone is on the global outside. + */ + if (ntohs(tc->th_dport) == la->skinnyPort) { + direction = ClientToServer; + } else if (ntohs(tc->th_sport) == la->skinnyPort) { + direction = ServerToClient; + } else { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, + "PacketAlias/Skinny: Invalid port number, not a Skinny packet\n"); +#endif + return; + } + + orig_len = dlen; + /* + * Skinny packets can contain many messages. We need to loop + * through the packet using len to determine message boundaries. + * This comes into play big time with port messages being in the + * same packet as register messages. Also, open receive channel + * acks are usually buried in a pakcet some 400 bytes long. + */ + while (dlen >= skinny_hdr_len) { + len = (sd->len); + msgId = (sd->msgId); + t = len; + + if (t > orig_len || t > dlen) { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, + "PacketAlias/Skinny: Not a skinny packet, invalid length \n"); +#endif + return; + } + switch (msgId) { + case REG_MSG: { + struct RegisterMessage *reg_mesg; + + if (len < (int)sizeof(struct RegisterMessage)) { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, + "PacketAlias/Skinny: Not a skinny packet, bad registration message\n"); +#endif + return; + } + reg_mesg = (struct RegisterMessage *)&sd->msgId; +#ifdef LIBALIAS_DEBUG + fprintf(stderr, + "PacketAlias/Skinny: Received a register message"); +#endif + alias_skinny_reg_msg(reg_mesg, pip, tc, lnk, direction); + break; + } + case IP_PORT_MSG: { + struct IpPortMessage *port_mesg; + + if (len < (int)sizeof(struct IpPortMessage)) { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, + "PacketAlias/Skinny: Not a skinny packet, port message\n"); +#endif + return; + } +#ifdef LIBALIAS_DEBUG + fprintf(stderr, + "PacketAlias/Skinny: Received ipport message\n"); +#endif + port_mesg = (struct IpPortMessage *)&sd->msgId; + alias_skinny_port_msg(port_mesg, pip, tc, lnk, direction); + break; + } + case OPNRCVCH_ACK: { + struct OpenReceiveChannelAck *opnrcvchn_ack; + + if (len < (int)sizeof(struct OpenReceiveChannelAck)) { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, + "PacketAlias/Skinny: Not a skinny packet, packet,OpnRcvChnAckMsg\n"); +#endif + return; + } +#ifdef LIBALIAS_DEBUG + fprintf(stderr, + "PacketAlias/Skinny: Received open rcv channel msg\n"); +#endif + opnrcvchn_ack = (struct OpenReceiveChannelAck *)&sd->msgId; + alias_skinny_opnrcvch_ack(la, opnrcvchn_ack, pip, tc, lnk, &lip, direction); + break; + } + case START_MEDIATX: { + struct StartMediaTransmission *startmedia_tx; + + if (len < (int)sizeof(struct StartMediaTransmission)) { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, + "PacketAlias/Skinny: Not a skinny packet,StartMediaTx Message\n"); +#endif + return; + } + if (lip == -1) { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, + "PacketAlias/Skinny: received a" + " packet,StartMediaTx Message before" + " packet,OpnRcvChnAckMsg\n" +#endif + return; + } + +#ifdef LIBALIAS_DEBUG + fprintf(stderr, + "PacketAlias/Skinny: Received start media trans msg\n"); +#endif + startmedia_tx = (struct StartMediaTransmission *)&sd->msgId; + alias_skinny_startmedia(startmedia_tx, pip, tc, lnk, lip, direction); + break; + } + default: + break; + } + /* Place the pointer at the next message in the packet. */ + dlen -= len + (skinny_hdr_len - sizeof(msgId)); + sd = (struct skinny_header *)(((char *)&sd->msgId) + len); + } +} diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_smedia.c b/src/VBox/Devices/Network/slirp/libalias/alias_smedia.c new file mode 100644 index 00000000..75969567 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_smedia.c @@ -0,0 +1,547 @@ +/* + * alias_smedia.c + * + * Copyright (c) 2000 Whistle Communications, Inc. + * All rights reserved. + * + * Subject to the following obligations and disclaimer of warranty, use and + * redistribution of this software, in source or object code forms, with or + * without modifications are expressly permitted by Whistle Communications; + * provided, however, that: + * 1. Any and all reproductions of the source or object code must include the + * copyright notice above and the following disclaimer of warranties; and + * 2. No rights are granted, in any manner or form, to use Whistle + * Communications, Inc. trademarks, including the mark "WHISTLE + * COMMUNICATIONS" on advertising, endorsements, or otherwise except as + * such appears in the above copyright notice or in the software. + * + * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND + * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO + * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE, + * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. + * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY + * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS + * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE. + * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES + * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING + * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY + * OF SUCH DAMAGE. + * + * Copyright (c) 2000 Junichi SATOH <junichi@astec.co.jp> + * <junichi@junichi.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Authors: Erik Salander <erik@whistle.com> + * Junichi SATOH <junichi@astec.co.jp> + * <junichi@junichi.org> + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_smedia.c,v 1.17.8.1 2009/04/15 03:14:26 kensmith Exp $"); + +/* + Alias_smedia.c is meant to contain the aliasing code for streaming media + protocols. It performs special processing for RSTP sessions under TCP. + Specifically, when a SETUP request is sent by a client, or a 200 reply + is sent by a server, it is intercepted and modified. The address is + changed to the gateway machine and an aliasing port is used. + + More specifically, the "client_port" configuration parameter is + parsed for SETUP requests. The "server_port" configuration parameter is + parsed for 200 replies eminating from a server. This is intended to handle + the unicast case. + + RTSP also allows a redirection of a stream to another client by using the + "destination" configuration parameter. The destination config parm would + indicate a different IP address. This function is NOT supported by the + RTSP translation code below. + + The RTSP multicast functions without any address translation intervention. + + For this routine to work, the SETUP/200 must fit entirely + into a single TCP packet. This is typically the case, but exceptions + can easily be envisioned under the actual specifications. + + Probably the most troubling aspect of the approach taken here is + that the new SETUP/200 will typically be a different length, and + this causes a certain amount of bookkeeping to keep track of the + changes of sequence and acknowledgment numbers, since the client + machine is totally unaware of the modification to the TCP stream. + + Initial version: May, 2000 (eds) +*/ + +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/module.h> +#else +#include <errno.h> +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#endif + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/tcp.h> + +#ifdef _KERNEL +#include <netinet/libalias/alias.h> +#include <netinet/libalias/alias_local.h> +#include <netinet/libalias/alias_mod.h> +#else +#include "alias_local.h" +#include "alias_mod.h" +#endif + +#define RTSP_CONTROL_PORT_NUMBER_1 554 +#define RTSP_CONTROL_PORT_NUMBER_2 7070 +#define TFTP_PORT_NUMBER 69 + +static void +AliasHandleRtspOut(struct libalias *, struct ip *, struct alias_link *, + int maxpacketsize); +static int +fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + if (ah->dport != NULL && ah->aport != NULL && ah->sport != NULL && + ntohs(*ah->dport) == TFTP_PORT_NUMBER) + return (0); + if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || + ah->maxpktsize == 0) + return (-1); + if (ntohs(*ah->dport) == RTSP_CONTROL_PORT_NUMBER_1 + || ntohs(*ah->sport) == RTSP_CONTROL_PORT_NUMBER_1 + || ntohs(*ah->dport) == RTSP_CONTROL_PORT_NUMBER_2 + || ntohs(*ah->sport) == RTSP_CONTROL_PORT_NUMBER_2) + return (0); + return (-1); +} + +static int +protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + if (ntohs(*ah->dport) == TFTP_PORT_NUMBER) + FindRtspOut(la, pip->ip_src, pip->ip_dst, + *ah->sport, *ah->aport, IPPROTO_UDP); + else AliasHandleRtspOut(la, pip, ah->lnk, ah->maxpktsize); + return (0); +} + +struct proto_handler handlers[] = { + { + .pri = 100, + .dir = OUT, + .proto = TCP|UDP, + .fingerprint = &fingerprint, + .protohandler = &protohandler + }, + { EOH } +}; + +static int +mod_handler(module_t mod, int type, void *data) +{ + int error; + + switch (type) { + case MOD_LOAD: + error = 0; + LibAliasAttachHandlers(handlers); + break; + case MOD_UNLOAD: + error = 0; + LibAliasDetachHandlers(handlers); + break; + default: + error = EINVAL; + } + return (error); +} + +#ifdef _KERNEL +static +#endif +moduledata_t alias_mod = { + "alias_smedia", mod_handler, NULL +}; + +#ifdef _KERNEL +DECLARE_MODULE(alias_smedia, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND); +MODULE_VERSION(alias_smedia, 1); +MODULE_DEPEND(alias_smedia, libalias, 1, 1, 1); +#endif + +#define RTSP_CONTROL_PORT_NUMBER_1 554 +#define RTSP_CONTROL_PORT_NUMBER_2 7070 +#define RTSP_PORT_GROUP 2 + +#define ISDIGIT(a) (((a) >= '0') && ((a) <= '9')) + +static int +search_string(char *data, int dlen, const char *search_str) +{ + int i, j, k; + int search_str_len; + + search_str_len = strlen(search_str); + for (i = 0; i < dlen - search_str_len; i++) { + for (j = i, k = 0; j < dlen - search_str_len; j++, k++) { + if (data[j] != search_str[k] && + data[j] != search_str[k] - ('a' - 'A')) { + break; + } + if (k == search_str_len - 1) { + return (j + 1); + } + } + } + return (-1); +} + +static int +alias_rtsp_out(struct libalias *la, struct ip *pip, + struct alias_link *lnk, + char *data, + const char *port_str) +{ + int hlen, tlen, dlen; + struct tcphdr *tc; + int i, j, pos, state, port_dlen, new_dlen, delta; + u_short p[2], new_len; + u_short sport, eport, base_port; + u_short salias = 0, ealias = 0, base_alias = 0; + const char *transport_str = "transport:"; + char newdata[2048], *port_data, *port_newdata, stemp[80]; + int links_created = 0, pkt_updated = 0; + struct alias_link *rtsp_lnk = NULL; + struct in_addr null_addr; + + /* Calculate data length of TCP packet */ + tc = (struct tcphdr *)ip_next(pip); + hlen = (pip->ip_hl + tc->th_off) << 2; + tlen = ntohs(pip->ip_len); + dlen = tlen - hlen; + + /* Find keyword, "Transport: " */ + pos = search_string(data, dlen, transport_str); + if (pos < 0) { + return (-1); + } + port_data = data + pos; + port_dlen = dlen - pos; + + memcpy(newdata, data, pos); + port_newdata = newdata + pos; + + while (port_dlen > (int)strlen(port_str)) { + /* Find keyword, appropriate port string */ + pos = search_string(port_data, port_dlen, port_str); + if (pos < 0) { + break; + } + memcpy(port_newdata, port_data, pos + 1); + port_newdata += (pos + 1); + + p[0] = p[1] = 0; + sport = eport = 0; + state = 0; + for (i = pos; i < port_dlen; i++) { + switch (state) { + case 0: + if (port_data[i] == '=') { + state++; + } + break; + case 1: + if (ISDIGIT(port_data[i])) { + p[0] = p[0] * 10 + port_data[i] - '0'; + } else { + if (port_data[i] == ';') { + state = 3; + } + if (port_data[i] == '-') { + state++; + } + } + break; + case 2: + if (ISDIGIT(port_data[i])) { + p[1] = p[1] * 10 + port_data[i] - '0'; + } else { + state++; + } + break; + case 3: + base_port = p[0]; + sport = htons(p[0]); + eport = htons(p[1]); + + if (!links_created) { + + links_created = 1; + /* + * Find an even numbered port + * number base that satisfies the + * contiguous number of ports we + * need + */ + null_addr.s_addr = 0; + if (0 == (salias = FindNewPortGroup(la, null_addr, + FindAliasAddress(la, pip->ip_src), + sport, 0, + RTSP_PORT_GROUP, + IPPROTO_UDP, 1))) { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, + "PacketAlias/RTSP: Cannot find contiguous RTSP data ports\n"); +#endif + } else { + + base_alias = ntohs(salias); + for (j = 0; j < RTSP_PORT_GROUP; j++) { + /* + * Establish link + * to port found in + * RTSP packet + */ + rtsp_lnk = FindRtspOut(la, GetOriginalAddress(lnk), null_addr, + htons(base_port + j), htons(base_alias + j), + IPPROTO_UDP); + if (rtsp_lnk != NULL) { +#ifndef NO_FW_PUNCH + /* + * Punch + * hole in + * firewall + */ + PunchFWHole(rtsp_lnk); +#endif + } else { +#ifdef LIBALIAS_DEBUG + fprintf(stderr, + "PacketAlias/RTSP: Cannot allocate RTSP data ports\n"); +#endif + break; + } + } + } + ealias = htons(base_alias + (RTSP_PORT_GROUP - 1)); + } + if (salias && rtsp_lnk) { + + pkt_updated = 1; + + /* Copy into IP packet */ + sprintf(stemp, "%d", ntohs(salias)); + memcpy(port_newdata, stemp, strlen(stemp)); + port_newdata += strlen(stemp); + + if (eport != 0) { + *port_newdata = '-'; + port_newdata++; + + /* Copy into IP packet */ + sprintf(stemp, "%d", ntohs(ealias)); + memcpy(port_newdata, stemp, strlen(stemp)); + port_newdata += strlen(stemp); + } + *port_newdata = ';'; + port_newdata++; + } + state++; + break; + } + if (state > 3) { + break; + } + } + port_data += i; + port_dlen -= i; + } + + if (!pkt_updated) + return (-1); + + memcpy(port_newdata, port_data, port_dlen); + port_newdata += port_dlen; + *port_newdata = '\0'; + + /* Create new packet */ + new_dlen = port_newdata - newdata; + memcpy(data, newdata, new_dlen); + + SetAckModified(lnk); + delta = GetDeltaSeqOut(pip, lnk); + AddSeq(pip, lnk, delta + new_dlen - dlen); + + new_len = htons(hlen + new_dlen); + DifferentialChecksum(&pip->ip_sum, + &new_len, + &pip->ip_len, + 1); + pip->ip_len = new_len; + + tc->th_sum = 0; +#ifdef _KERNEL + tc->th_x2 = 1; +#else + tc->th_sum = TcpChecksum(pip); +#endif + return (0); +} + +/* Support the protocol used by early versions of RealPlayer */ + +static int +alias_pna_out(struct libalias *la, struct ip *pip, + struct alias_link *lnk, + char *data, + int dlen) +{ + struct alias_link *pna_links; + u_short msg_id, msg_len; + char *work; + u_short alias_port, port; + struct tcphdr *tc; + + work = data; + work += 5; + while (work + 4 < data + dlen) { + memcpy(&msg_id, work, 2); + work += 2; + memcpy(&msg_len, work, 2); + work += 2; + if (ntohs(msg_id) == 0) { + /* end of options */ + return (0); + } + if ((ntohs(msg_id) == 1) || (ntohs(msg_id) == 7)) { + memcpy(&port, work, 2); + pna_links = FindUdpTcpOut(la, pip->ip_src, GetDestAddress(lnk), + port, 0, IPPROTO_UDP, 1); + if (pna_links != NULL) { +#ifndef NO_FW_PUNCH + /* Punch hole in firewall */ + PunchFWHole(pna_links); +#endif + tc = (struct tcphdr *)ip_next(pip); + alias_port = GetAliasPort(pna_links); + memcpy(work, &alias_port, 2); + + /* Compute TCP checksum for revised packet */ + tc->th_sum = 0; +#ifdef _KERNEL + tc->th_x2 = 1; +#else + tc->th_sum = TcpChecksum(pip); +#endif + } + } + work += ntohs(msg_len); + } + + return (0); +} + +static void +AliasHandleRtspOut(struct libalias *la, struct ip *pip, struct alias_link *lnk, int maxpacketsize) +{ + int hlen, tlen, dlen; + struct tcphdr *tc; + char *data; + const char *setup = "SETUP", *pna = "PNA", *str200 = "200"; + const char *okstr = "OK", *client_port_str = "client_port"; + const char *server_port_str = "server_port"; + int i, parseOk; + + (void)maxpacketsize; + + tc = (struct tcphdr *)ip_next(pip); + hlen = (pip->ip_hl + tc->th_off) << 2; + tlen = ntohs(pip->ip_len); + dlen = tlen - hlen; + + data = (char *)pip; + data += hlen; + + /* When aliasing a client, check for the SETUP request */ + if ((ntohs(tc->th_dport) == RTSP_CONTROL_PORT_NUMBER_1) || + (ntohs(tc->th_dport) == RTSP_CONTROL_PORT_NUMBER_2)) { + + if (dlen >= (int)strlen(setup)) { + if (memcmp(data, setup, strlen(setup)) == 0) { + alias_rtsp_out(la, pip, lnk, data, client_port_str); + return; + } + } + if (dlen >= (int)strlen(pna)) { + if (memcmp(data, pna, strlen(pna)) == 0) { + alias_pna_out(la, pip, lnk, data, dlen); + } + } + } else { + + /* + * When aliasing a server, check for the 200 reply + * Accomodate varying number of blanks between 200 & OK + */ + + if (dlen >= (int)strlen(str200)) { + + for (parseOk = 0, i = 0; + i <= dlen - (int)strlen(str200); + i++) { + if (memcmp(&data[i], str200, strlen(str200)) == 0) { + parseOk = 1; + break; + } + } + if (parseOk) { + + i += strlen(str200); /* skip string found */ + while (data[i] == ' ') /* skip blank(s) */ + i++; + + if ((dlen - i) >= (int)strlen(okstr)) { + + if (memcmp(&data[i], okstr, strlen(okstr)) == 0) + alias_rtsp_out(la, pip, lnk, data, server_port_str); + + } + } + } + } +} diff --git a/src/VBox/Devices/Network/slirp/libalias/alias_util.c b/src/VBox/Devices/Network/slirp/libalias/alias_util.c new file mode 100644 index 00000000..9a0310d3 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/alias_util.c @@ -0,0 +1,182 @@ +/*- + * Copyright (c) 2001 Charles Mott <cm@linktel.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef VBOX +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_util.c,v 1.20.8.1 2009/04/15 03:14:26 kensmith Exp $"); + + +/* + Alias_util.c contains general utilities used by other functions + in the packet aliasing module. At the moment, there are functions + for computing IP header and TCP packet checksums. + + The checksum routines are based upon example code in a Unix networking + text written by Stevens (sorry, I can't remember the title -- but + at least this is a good author). + + Initial Version: August, 1996 (cjm) + + Version 1.7: January 9, 1997 + Added differential checksum update function. +*/ + +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/proc.h> +#else +#include <sys/types.h> +#include <stdio.h> +#endif + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/tcp.h> + +#ifdef _KERNEL +#include <netinet/libalias/alias.h> +#include <netinet/libalias/alias_local.h> +#else +#include "alias.h" +#include "alias_local.h" +#endif +#else /* VBOX */ +# include <slirp.h> +# include "alias.h" +# include "alias_local.h" +#endif /* VBOX */ + +/* + * Note: the checksum routines assume that the actual checksum word has + * been zeroed out. If the checksum word is filled with the proper value, + * then these routines will give a result of zero (useful for testing + * purposes); + */ +u_short +LibAliasInternetChecksum(struct libalias *la __unused, u_short * ptr, + int nbytes) +{ + int sum, oddbyte; + + LIBALIAS_LOCK(la); + sum = 0; + while (nbytes > 1) { + sum += *ptr++; + nbytes -= 2; + } + if (nbytes == 1) { + oddbyte = 0; + ((u_char *) & oddbyte)[0] = *(u_char *) ptr; + ((u_char *) & oddbyte)[1] = 0; + sum += oddbyte; + } + sum = (sum >> 16) + (sum & 0xffff); + sum += (sum >> 16); + LIBALIAS_UNLOCK(la); + return (~sum); +} + +#ifndef _KERNEL +u_short +IpChecksum(struct ip *pip) +{ + return (LibAliasInternetChecksum(NULL, (u_short *) pip, + (pip->ip_hl << 2))); + +} + +u_short +TcpChecksum(struct ip *pip) +{ + u_short *ptr; + struct tcphdr *tc; + int nhdr, ntcp, nbytes; + int sum, oddbyte; + + nhdr = pip->ip_hl << 2; + ntcp = ntohs(pip->ip_len) - nhdr; + + tc = (struct tcphdr *)ip_next(pip); + ptr = (u_short *) tc; + +/* Add up TCP header and data */ + nbytes = ntcp; + sum = 0; + while (nbytes > 1) { + sum += *ptr++; + nbytes -= 2; + } + if (nbytes == 1) { + oddbyte = 0; + ((u_char *) & oddbyte)[0] = *(u_char *) ptr; + ((u_char *) & oddbyte)[1] = 0; + sum += oddbyte; + } +/* "Pseudo-header" data */ + ptr = (u_short *) & (pip->ip_dst); + sum += *ptr++; + sum += *ptr; + ptr = (u_short *) & (pip->ip_src); + sum += *ptr++; + sum += *ptr; + sum += htons((u_short) ntcp); + sum += htons((u_short) pip->ip_p); + +/* Roll over carry bits */ + sum = (sum >> 16) + (sum & 0xffff); + sum += (sum >> 16); + +/* Return checksum */ + return ((u_short) ~ sum); +} +#endif /* not _KERNEL */ + +void +DifferentialChecksum(u_short * cksum, void *newp, void *oldp, int n) +{ + int i; + int accumulate; + u_short *new = newp; + u_short *old = oldp; + + accumulate = *cksum; + for (i = 0; i < n; i++) { + accumulate -= *new++; + accumulate += *old++; + } + + if (accumulate < 0) { + accumulate = -accumulate; + accumulate = (accumulate >> 16) + (accumulate & 0xffff); + accumulate += accumulate >> 16; + *cksum = (u_short) ~ accumulate; + } else { + accumulate = (accumulate >> 16) + (accumulate & 0xffff); + accumulate += accumulate >> 16; + *cksum = (u_short) accumulate; + } +} diff --git a/src/VBox/Devices/Network/slirp/libalias/libalias.3 b/src/VBox/Devices/Network/slirp/libalias/libalias.3 new file mode 100644 index 00000000..367e37df --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libalias/libalias.3 @@ -0,0 +1,1458 @@ +.\"- +.\" Copyright (c) 2001 Charles Mott <cm@linktel.net> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: src/sys/netinet/libalias/libalias.3,v 1.58.8.1 2009/04/15 03:14:26 kensmith Exp $ +.\" +.Dd October 1, 2006 +.Dt LIBALIAS 3 +.Os +.Sh NAME +.Nm libalias +.Nd packet aliasing library for masquerading and network address translation +.Sh SYNOPSIS +.In sys/types.h +.In netinet/in.h +.In alias.h +.Pp +Function prototypes are given in the main body of the text. +.Sh DESCRIPTION +The +.Nm +library is a collection of functions for aliasing and de-aliasing of IP +packets, intended for masquerading and network address translation (NAT). +.Sh INTRODUCTION +This library is a moderately portable set of functions designed to assist +in the process of IP masquerading and network address translation. +Outgoing packets from a local network with unregistered IP addresses can +be aliased to appear as if they came from an accessible IP address. +Incoming packets are then de-aliased so that they are sent to the correct +machine on the local network. +.Pp +A certain amount of flexibility is built into the packet aliasing engine. +In the simplest mode of operation, a many-to-one address mapping takes +place between local network and the packet aliasing host. +This is known as IP masquerading. +In addition, one-to-one mappings between local and public addresses can +also be implemented, which is known as static NAT. +In between these extremes, different groups of private addresses can be +linked to different public addresses, comprising several distinct +many-to-one mappings. +Also, a given public address and port can be statically redirected to a +private address/port. +.Pp +The packet aliasing engine was designed to operate in user space outside +of the kernel, without any access to private kernel data structure, but +the source code can also be ported to a kernel environment. +.Sh INITIALIZATION AND CONTROL +One special function, +.Fn LibAliasInit , +must always be called before any packet handling may be performed and +the returned instance pointer passed to all the other functions. +Normally, the +.Fn LibAliasSetAddress +function is called afterwards, to set the default aliasing address. +In addition, the operating mode of the packet aliasing engine can be +customized by calling +.Fn LibAliasSetMode . +.Pp +.Ft "struct libalias *" +.Fn LibAliasInit "struct libalias *" +.Bd -ragged -offset indent +This function is used to initialize +internal data structures. +When called the first time, a +.Dv NULL +pointer should be passed as an argument. +The following mode bits are always set after calling +.Fn LibAliasInit . +See the description of +.Fn LibAliasSetMode +below for the meaning of these mode bits. +.Pp +.Bl -item -offset indent -compact +.It +.Dv PKT_ALIAS_SAME_PORTS +.It +.Dv PKT_ALIAS_USE_SOCKETS +.It +.Dv PKT_ALIAS_RESET_ON_ADDR_CHANGE +.El +.Pp +This function will always return the packet aliasing engine to the same +initial state. +The +.Fn LibAliasSetAddress +function is normally called afterwards, and any desired changes from the +default mode bits listed above require a call to +.Fn LibAliasSetMode . +.Pp +It is mandatory that this function be called at the beginning of a program +prior to any packet handling. +.Ed +.Pp +.Ft void +.Fn LibAliasUninit "struct libalias *" +.Bd -ragged -offset indent +This function has no return value and is used to clear any +resources attached to internal data structures. +.Pp +This functions should be called when a program stops using the aliasing +engine; it does, amongst other things, clear out any firewall holes. +To provide backwards compatibility and extra security, it is added to +the +.Xr atexit 3 +chain by +.Fn LibAliasInit . +.Ed +.Pp +.Ft void +.Fn LibAliasSetAddress "struct libalias *" "struct in_addr addr" +.Bd -ragged -offset indent +This function sets the source address to which outgoing packets from the +local area network are aliased. +All outgoing packets are re-mapped to this address unless overridden by a +static address mapping established by +.Fn LibAliasRedirectAddr . +If this function is not called, and no static rules match, an outgoing +packet retains its source address. +.Pp +If the +.Dv PKT_ALIAS_RESET_ON_ADDR_CHANGE +mode bit is set (the default mode of operation), then the internal aliasing +link tables will be reset any time the aliasing address changes. +This is useful for interfaces such as +.Xr ppp 8 , +where the IP +address may or may not change on successive dial-up attempts. +.Pp +If the +.Dv PKT_ALIAS_RESET_ON_ADDR_CHANGE +mode bit is set to zero, this function can also be used to dynamically change +the aliasing address on a packet to packet basis (it is a low overhead call). +.Pp +It is mandatory that this function be called prior to any packet handling. +.Ed +.Pp +.Ft unsigned int +.Fn LibAliasSetMode "struct libalias *" "unsigned int flags" "unsigned int mask" +.Bd -ragged -offset indent +This function sets or clears mode bits +according to the value of +.Fa flags . +Only bits marked in +.Fa mask +are affected. +The following mode bits are defined in +.In alias.h : +.Bl -tag -width indent +.It Dv PKT_ALIAS_LOG +Enables logging into +.Pa /var/log/alias.log . +Each time an aliasing link is created or deleted, the log file is appended +with the current number of ICMP, TCP and UDP links. +Mainly useful for debugging when the log file is viewed continuously with +.Xr tail 1 . +.It Dv PKT_ALIAS_DENY_INCOMING +If this mode bit is set, all incoming packets associated with new TCP +connections or new UDP transactions will be marked for being ignored +.Fn ( LibAliasIn +returns +.Dv PKT_ALIAS_IGNORED +code) +by the calling program. +Response packets to connections or transactions initiated from the packet +aliasing host or local network will be unaffected. +This mode bit is useful for implementing a one-way firewall. +.It Dv PKT_ALIAS_SAME_PORTS +If this mode bit is set, the packet aliasing engine will attempt to leave +the alias port numbers unchanged from the actual local port numbers. +This can be done as long as the quintuple (proto, alias addr, alias port, +remote addr, remote port) is unique. +If a conflict exists, a new aliasing port number is chosen even if this +mode bit is set. +.It Dv PKT_ALIAS_USE_SOCKETS +This bit should be set when the packet aliasing host originates network +traffic as well as forwards it. +When the packet aliasing host is waiting for a connection from an unknown +host address or unknown port number (e.g.\& an FTP data connection), this +mode bit specifies that a socket be allocated as a place holder to prevent +port conflicts. +Once a connection is established, usually within a minute or so, the socket +is closed. +.It Dv PKT_ALIAS_UNREGISTERED_ONLY +If this mode bit is set, traffic on the local network which does not +originate from unregistered address spaces will be ignored. +Standard Class A, B and C unregistered addresses are: +.Bd -literal -offset indent +10.0.0.0 -> 10.255.255.255 (Class A subnet) +172.16.0.0 -> 172.31.255.255 (Class B subnets) +192.168.0.0 -> 192.168.255.255 (Class C subnets) +.Ed +.Pp +This option is useful in the case that packet aliasing host has both +registered and unregistered subnets on different interfaces. +The registered subnet is fully accessible to the outside world, so traffic +from it does not need to be passed through the packet aliasing engine. +.It Dv PKT_ALIAS_RESET_ON_ADDR_CHANGE +When this mode bit is set and +.Fn LibAliasSetAddress +is called to change the aliasing address, the internal link table of the +packet aliasing engine will be cleared. +This operating mode is useful for +.Xr ppp 8 +links where the interface address can sometimes change or remain the same +between dial-up attempts. +If this mode bit is not set, the link table will never be reset in the event +of an address change. +.It Dv PKT_ALIAS_PUNCH_FW +This option makes +.Nm +`punch holes' in an +.Xr ipfirewall 4 +based firewall for FTP/IRC DCC connections. +The holes punched are bound by from/to IP address and port; it will not be +possible to use a hole for another connection. +A hole is removed when the connection that uses it dies. +To cater to unexpected death of a program using +.Nm +(e.g.\& kill -9), +changing the state of the flag will clear the entire firewall range +allocated for holes. +This will also happen on the initial call to +.Fn LibAliasSetFWBase . +This call must happen prior to setting this flag. +.It Dv PKT_ALIAS_REVERSE +This option makes +.Nm +reverse the way it handles incoming and outgoing packets, allowing it +to be fed with data that passes through the internal interface rather +than the external one. +.It Dv PKT_ALIAS_PROXY_ONLY +This option tells +.Nm +to obey transparent proxy rules only. +Normal packet aliasing is not performed. +See +.Fn LibAliasProxyRule +below for details. +.El +.Ed +.Pp +.Ft void +.Fn LibAliasSetFWBase "struct libalias *" "unsigned int base" "unsigned int num" +.Bd -ragged -offset indent +Set firewall range allocated for punching firewall holes (with the +.Dv PKT_ALIAS_PUNCH_FW +flag). +The range will be cleared for all rules on initialization. +.Ed +.Pp +.Ft void +.Fn LibAliasSkinnyPort "struct libalias *" "unsigned int port" +.Bd -ragged -offset indent +Set the TCP port used by the Skinny Station protocol. +Skinny is used by Cisco IP phones to communicate with +Cisco Call Managers to set up voice over IP calls. +If this is not set, Skinny aliasing will not be done. +The typical port used by Skinny is 2000. +.Ed +.Sh PACKET HANDLING +The packet handling functions are used to modify incoming (remote to local) +and outgoing (local to remote) packets. +The calling program is responsible for receiving and sending packets via +network interfaces. +.Pp +Along with +.Fn LibAliasInit +and +.Fn LibAliasSetAddress , +the two packet handling functions, +.Fn LibAliasIn +and +.Fn LibAliasOut , +comprise minimal set of functions needed for a basic IP masquerading +implementation. +.Pp +.Ft int +.Fn LibAliasIn "struct libalias *" "char *buffer" "int maxpacketsize" +.Bd -ragged -offset indent +An incoming packet coming from a remote machine to the local network is +de-aliased by this function. +The IP packet is pointed to by +.Fa buffer , +and +.Fa maxpacketsize +indicates the size of the data structure containing the packet and should +be at least as large as the actual packet size. +.Pp +Return codes: +.Bl -tag -width indent +.It Dv PKT_ALIAS_OK +The packet aliasing process was successful. +.It Dv PKT_ALIAS_IGNORED +The packet was ignored and not de-aliased. +This can happen if the protocol is unrecognized, possibly an ICMP message +type is not handled or if incoming packets for new connections are being +ignored (if +.Dv PKT_ALIAS_DENY_INCOMING +mode bit was set by +.Fn LibAliasSetMode ) . +.It Dv PKT_ALIAS_UNRESOLVED_FRAGMENT +This is returned when a fragment cannot be resolved because the header +fragment has not been sent yet. +In this situation, fragments must be saved with +.Fn LibAliasSaveFragment +until a header fragment is found. +.It Dv PKT_ALIAS_FOUND_HEADER_FRAGMENT +The packet aliasing process was successful, and a header fragment was found. +This is a signal to retrieve any unresolved fragments with +.Fn LibAliasGetFragment +and de-alias them with +.Fn LibAliasFragmentIn . +.It Dv PKT_ALIAS_ERROR +An internal error within the packet aliasing engine occurred. +.El +.Ed +.Pp +.Ft int +.Fn LibAliasOut "struct libalias *" "char *buffer" "int maxpacketsize" +.Bd -ragged -offset indent +An outgoing packet coming from the local network to a remote machine is +aliased by this function. +The IP packet is pointed to by +.Fa buffer , +and +.Fa maxpacketsize +indicates the maximum packet size permissible should the packet length be +changed. +IP encoding protocols place address and port information in the encapsulated +data stream which has to be modified and can account for changes in packet +length. +Well known examples of such protocols are FTP and IRC DCC. +.Pp +Return codes: +.Bl -tag -width indent +.It Dv PKT_ALIAS_OK +The packet aliasing process was successful. +.It Dv PKT_ALIAS_IGNORED +The packet was ignored and not aliased. +This can happen if the protocol is unrecognized, or possibly an ICMP message +type is not handled. +.It Dv PKT_ALIAS_ERROR +An internal error within the packet aliasing engine occurred. +.El +.Ed +.Sh PORT AND ADDRESS REDIRECTION +The functions described in this section allow machines on the local network +to be accessible in some degree to new incoming connections from the external +network. +Individual ports can be re-mapped or static network address translations can +be designated. +.Pp +.Ft struct alias_link * +.Fo LibAliasRedirectPort +.Fa "struct libalias *" +.Fa "struct in_addr local_addr" +.Fa "u_short local_port" +.Fa "struct in_addr remote_addr" +.Fa "u_short remote_port" +.Fa "struct in_addr alias_addr" +.Fa "u_short alias_port" +.Fa "u_char proto" +.Fc +.Bd -ragged -offset indent +This function specifies that traffic from a given remote address/port to +an alias address/port be redirected to a specified local address/port. +The parameter +.Fa proto +can be either +.Dv IPPROTO_TCP +or +.Dv IPPROTO_UDP , +as defined in +.In netinet/in.h . +.Pp +If +.Fa local_addr +or +.Fa alias_addr +is zero, this indicates that the packet aliasing address as established +by +.Fn LibAliasSetAddress +is to be used. +Even if +.Fn LibAliasSetAddress +is called to change the address after +.Fn LibAliasRedirectPort +is called, a zero reference will track this change. +.Pp +If the link is further set up to operate for a load sharing, then +.Fa local_addr +and +.Fa local_port +are ignored, and are selected dynamically from the server pool, as described in +.Fn LibAliasAddServer +below. +.Pp +If +.Fa remote_addr +is zero, this indicates to redirect packets from any remote address. +Likewise, if +.Fa remote_port +is zero, this indicates to redirect packets originating from any remote +port number. +Almost always, the remote port specification will be zero, but non-zero +remote addresses can sometimes be useful for firewalling. +If two calls to +.Fn LibAliasRedirectPort +overlap in their address/port specifications, then the most recent call +will have precedence. +.Pp +This function returns a pointer which can subsequently be used by +.Fn LibAliasRedirectDelete . +If +.Dv NULL +is returned, then the function call did not complete successfully. +.Pp +All port numbers should be in network address byte order, so it is necessary +to use +.Xr htons 3 +to convert these parameters from internally readable numbers to network byte +order. +Addresses are also in network byte order, which is implicit in the use of the +.Fa struct in_addr +data type. +.Ed +.Pp +.Ft struct alias_link * +.Fo LibAliasRedirectAddr +.Fa "struct libalias *" +.Fa "struct in_addr local_addr" +.Fa "struct in_addr alias_addr" +.Fc +.Bd -ragged -offset indent +This function designates that all incoming traffic to +.Fa alias_addr +be redirected to +.Fa local_addr . +Similarly, all outgoing traffic from +.Fa local_addr +is aliased to +.Fa alias_addr . +.Pp +If +.Fa local_addr +or +.Fa alias_addr +is zero, this indicates that the packet aliasing address as established by +.Fn LibAliasSetAddress +is to be used. +Even if +.Fn LibAliasSetAddress +is called to change the address after +.Fn LibAliasRedirectAddr +is called, a zero reference will track this change. +.Pp +If the link is further set up to operate for a load sharing, then +.Fa local_addr +is ignored, and is selected dynamically from the server pool, as described in +.Fn LibAliasAddServer +below. +.Pp +If subsequent calls to +.Fn LibAliasRedirectAddr +use the same aliasing address, all new incoming traffic to this aliasing +address will be redirected to the local address made in the last function +call. +New traffic generated by any of the local machines, designated in the +several function calls, will be aliased to the same address. +Consider the following example: +.Bd -literal -offset indent +LibAliasRedirectAddr(la, inet_aton("192.168.0.2"), + inet_aton("141.221.254.101")); +LibAliasRedirectAddr(la, inet_aton("192.168.0.3"), + inet_aton("141.221.254.101")); +LibAliasRedirectAddr(la, inet_aton("192.168.0.4"), + inet_aton("141.221.254.101")); +.Ed +.Pp +Any outgoing connections such as +.Xr telnet 1 +or +.Xr ftp 1 +from 192.168.0.2, 192.168.0.3 and 192.168.0.4 will appear to come from +141.221.254.101. +Any incoming connections to 141.221.254.101 will be directed to 192.168.0.4. +.Pp +Any calls to +.Fn LibAliasRedirectPort +will have precedence over address mappings designated by +.Fn LibAliasRedirectAddr . +.Pp +This function returns a pointer which can subsequently be used by +.Fn LibAliasRedirectDelete . +If +.Dv NULL +is returned, then the function call did not complete successfully. +.Ed +.Pp +.Ft int +.Fo LibAliasAddServer +.Fa "struct libalias *" +.Fa "struct alias_link *link" +.Fa "struct in_addr addr" +.Fa "u_short port" +.Fc +.Bd -ragged -offset indent +This function sets the +.Fa link +up for Load Sharing using IP Network Address Translation (RFC 2391, LSNAT). +LSNAT operates as follows. +A client attempts to access a server by using the server virtual address. +The LSNAT router transparently redirects the request to one of the hosts +in server pool, selected using a real-time load sharing algorithm. +Multiple sessions may be initiated from the same client, and each session +could be directed to a different host based on load balance across server +pool hosts at the time. +If load share is desired for just a few specific services, the configuration +on LSNAT could be defined to restrict load share for just the services +desired. +.Pp +Currently, only the simplest selection algorithm is implemented, where a +host is selected on a round-robin basis only, without regard to load on +the host. +.Pp +First, the +.Fa link +is created by either +.Fn LibAliasRedirectPort +or +.Fn LibAliasRedirectAddr . +Then, +.Fn LibAliasAddServer +is called multiple times to add entries to the +.Fa link Ns 's +server pool. +.Pp +For links created with +.Fn LibAliasRedirectAddr , +the +.Fa port +argument is ignored and could have any value, e.g.\& htons(~0). +.Pp +This function returns 0 on success, \-1 otherwise. +.Ed +.Pp +.Ft int +.Fn LibAliasRedirectDynamic "struct libalias *" "struct alias_link *link" +.Bd -ragged -offset indent +This function marks the specified static redirect rule entered by +.Fn LibAliasRedirectPort +as dynamic. +This can be used to e.g.\& dynamically redirect a single TCP connection, +after which the rule is removed. +Only fully specified links can be made dynamic. +(See the +.Sx STATIC AND DYNAMIC LINKS +and +.Sx PARTIALLY SPECIFIED ALIASING LINKS +sections below for a definition of static vs.\& dynamic, +and partially vs.\& fully specified links.) +.Pp +This function returns 0 on success, \-1 otherwise. +.Ed +.Pp +.Ft void +.Fn LibAliasRedirectDelete "struct libalias *" "struct alias_link *link" +.Bd -ragged -offset indent +This function will delete a specific static redirect rule entered by +.Fn LibAliasRedirectPort +or +.Fn LibAliasRedirectAddr . +The parameter +.Fa link +is the pointer returned by either of the redirection functions. +If an invalid pointer is passed to +.Fn LibAliasRedirectDelete , +then a program crash or unpredictable operation could result, so it is +necessary to be careful using this function. +.Ed +.Pp +.Ft int +.Fn LibAliasProxyRule "struct libalias *" "const char *cmd" +.Bd -ragged -offset indent +The passed +.Fa cmd +string consists of one or more pairs of words. +The first word in each pair is a token and the second is the value that +should be applied for that token. +Tokens and their argument types are as follows: +.Bl -tag -width indent +.It Cm type encode_ip_hdr | encode_tcp_stream | no_encode +In order to support transparent proxying, it is necessary to somehow +pass the original address and port information into the new destination +server. +If +.Cm encode_ip_hdr +is specified, the original destination address and port are passed +as an extra IP option. +If +.Cm encode_tcp_stream +is specified, the original destination address and port are passed +as the first piece of data in the TCP stream in the format +.Dq Li DEST Ar IP port . +.It Cm port Ar portnum +Only packets with the destination port +.Ar portnum +are proxied. +.It Cm server Ar host Ns Op : Ns Ar portnum +This specifies the +.Ar host +and +.Ar portnum +that the data is to be redirected to. +.Ar host +must be an IP address rather than a DNS host name. +If +.Ar portnum +is not specified, the destination port number is not changed. +.Pp +The +.Ar server +specification is mandatory unless the +.Cm delete +command is being used. +.It Cm rule Ar index +Normally, each call to +.Fn LibAliasProxyRule +inserts the next rule at the start of a linear list of rules. +If an +.Ar index +is specified, the new rule will be checked after all rules with lower +indices. +Calls to +.Fn LibAliasProxyRule +that do not specify a rule are assigned rule 0. +.It Cm delete Ar index +This token and its argument MUST NOT be used with any other tokens. +When used, all existing rules with the given +.Ar index +are deleted. +.It Cm proto tcp | udp +If specified, only packets of the given protocol type are matched. +.It Cm src Ar IP Ns Op / Ns Ar bits +If specified, only packets with a source address matching the given +.Ar IP +are matched. +If +.Ar bits +is also specified, then the first +.Ar bits +bits of +.Ar IP +are taken as a network specification, and all IP addresses from that +network will be matched. +.It Cm dst Ar IP Ns Op / Ns Ar bits +If specified, only packets with a destination address matching the given +.Ar IP +are matched. +If +.Ar bits +is also specified, then the first +.Ar bits +bits of +.Ar IP +are taken as a network specification, and all IP addresses from that +network will be matched. +.El +.Pp +This function is usually used to redirect outgoing connections for +internal machines that are not permitted certain types of internet +access, or to restrict access to certain external machines. +.Ed +.Pp +.Ft struct alias_link * +.Fo LibAliasRedirectProto +.Fa "struct libalias *" +.Fa "struct in_addr local_addr" +.Fa "struct in_addr remote_addr" +.Fa "struct in_addr alias_addr" +.Fa "u_char proto" +.Fc +.Bd -ragged -offset indent +This function specifies that any IP packet with protocol number of +.Fa proto +from a given remote address to an alias address be +redirected to a specified local address. +.Pp +If +.Fa local_addr +or +.Fa alias_addr +is zero, this indicates that the packet aliasing address as established +by +.Fn LibAliasSetAddress +is to be used. +Even if +.Fn LibAliasSetAddress +is called to change the address after +.Fn LibAliasRedirectProto +is called, a zero reference will track this change. +.Pp +If +.Fa remote_addr +is zero, this indicates to redirect packets from any remote address. +Non-zero remote addresses can sometimes be useful for firewalling. +.Pp +If two calls to +.Fn LibAliasRedirectProto +overlap in their address specifications, then the most recent call +will have precedence. +.Pp +This function returns a pointer which can subsequently be used by +.Fn LibAliasRedirectDelete . +If +.Dv NULL +is returned, then the function call did not complete successfully. +.Ed +.Sh FRAGMENT HANDLING +The functions in this section are used to deal with incoming fragments. +.Pp +Outgoing fragments are handled within +.Fn LibAliasOut +by changing the address according to any applicable mapping set by +.Fn LibAliasRedirectAddr , +or the default aliasing address set by +.Fn LibAliasSetAddress . +.Pp +Incoming fragments are handled in one of two ways. +If the header of a fragmented IP packet has already been seen, then all +subsequent fragments will be re-mapped in the same manner the header +fragment was. +Fragments which arrive before the header are saved and then retrieved +once the header fragment has been resolved. +.Pp +.Ft int +.Fn LibAliasSaveFragment "struct libalias *" "char *ptr" +.Bd -ragged -offset indent +When +.Fn LibAliasIn +returns +.Dv PKT_ALIAS_UNRESOLVED_FRAGMENT , +this function can be used to save the pointer to the unresolved fragment. +.Pp +It is implicitly assumed that +.Fa ptr +points to a block of memory allocated by +.Xr malloc 3 . +If the fragment is never resolved, the packet aliasing engine will +automatically free the memory after a timeout period. +[Eventually this function should be modified so that a callback function +for freeing memory is passed as an argument.] +.Pp +This function returns +.Dv PKT_ALIAS_OK +if it was successful and +.Dv PKT_ALIAS_ERROR +if there was an error. +.Ed +.Pp +.Ft char * +.Fn LibAliasGetFragment "struct libalias *" "char *buffer" +.Bd -ragged -offset indent +This function can be used to retrieve fragment pointers saved by +.Fn LibAliasSaveFragment . +The IP header fragment pointed to by +.Fa buffer +is the header fragment indicated when +.Fn LibAliasIn +returns +.Dv PKT_ALIAS_FOUND_HEADER_FRAGMENT . +Once a fragment pointer is retrieved, it becomes the calling program's +responsibility to free the dynamically allocated memory for the fragment. +.Pp +The +.Fn LibAliasGetFragment +function can be called sequentially until there are no more fragments +available, at which time it returns +.Dv NULL . +.Ed +.Pp +.Ft void +.Fn LibAliasFragmentIn "struct libalias *" "char *header" "char *fragment" +.Bd -ragged -offset indent +When a fragment is retrieved with +.Fn LibAliasGetFragment , +it can then be de-aliased with a call to +.Fn LibAliasFragmentIn . +The +.Fa header +argument is the pointer to a header fragment used as a template, and +.Fa fragment +is the pointer to the packet to be de-aliased. +.Ed +.Sh MISCELLANEOUS FUNCTIONS +.Ft void +.Fn LibAliasSetTarget "struct libalias *" "struct in_addr addr" +.Bd -ragged -offset indent +When an incoming packet not associated with any pre-existing aliasing link +arrives at the host machine, it will be sent to the address indicated by a +call to +.Fn LibAliasSetTarget . +.Pp +If this function is called with an +.Dv INADDR_NONE +address argument, then all new incoming packets go to the address set by +.Fn LibAliasSetAddress . +.Pp +If this function is not called, or is called with an +.Dv INADDR_ANY +address argument, then all new incoming packets go to the address specified +in the packet. +This allows external machines to talk directly to internal machines if they +can route packets to the machine in question. +.Ed +.Pp +.Ft int +.Fn LibAliasCheckNewLink "struct libalias *" +.Bd -ragged -offset indent +This function returns a non-zero value when a new aliasing link is created. +In circumstances where incoming traffic is being sequentially sent to +different local servers, this function can be used to trigger when +.Fn LibAliasSetTarget +is called to change the default target address. +.Ed +.Pp +.Ft u_short +.Fn LibAliasInternetChecksum "struct libalias *" "u_short *buffer" "int nbytes" +.Bd -ragged -offset indent +This is a utility function that does not seem to be available elsewhere and +is included as a convenience. +It computes the internet checksum, which is used in both IP and +protocol-specific headers (TCP, UDP, ICMP). +.Pp +The +.Fa buffer +argument points to the data block to be checksummed, and +.Fa nbytes +is the number of bytes. +The 16-bit checksum field should be zeroed before computing the checksum. +.Pp +Checksums can also be verified by operating on a block of data including +its checksum. +If the checksum is valid, +.Fn LibAliasInternetChecksum +will return zero. +.Ed +.Pp +.Ft int +.Fn LibAliasUnaliasOut "struct libalias *" "char *buffer" "int maxpacketsize" +.Bd -ragged -offset indent +An outgoing packet, which has already been aliased, +has its private address/port information restored by this function. +The IP packet is pointed to by +.Fa buffer , +and +.Fa maxpacketsize +is provided for error checking purposes. +This function can be used if an already-aliased packet needs to have its +original IP header restored for further processing (e.g.\& logging). +.Ed +.Sh AUTHORS +.An Charles Mott Aq cm@linktel.net , +versions 1.0 - 1.8, 2.0 - 2.4. +.An Eivind Eklund Aq eivind@FreeBSD.org , +versions 1.8b, 1.9 and 2.5. +Added IRC DCC support as well as contributing a number of architectural +improvements; added the firewall bypass for FTP/IRC DCC. +.An Erik Salander Aq erik@whistle.com +added support for PPTP and RTSP. +.An Junichi Satoh Aq junichi@junichi.org +added support for RTSP/PNA. +.An Ruslan Ermilov Aq ru@FreeBSD.org +added support for PPTP and LSNAT as well as general hacking. +.An Paolo Pisati Aq piso@FreeBSD.org +made the library modular, moving support for all +protocols (except for IP, TCP and UDP) to external modules. +.Sh ACKNOWLEDGMENTS +Listed below, in approximate chronological order, are individuals who +have provided valuable comments and/or debugging assistance. +.Pp +.Bd -ragged -offset indent +.An -split +.An Gary Roberts +.An Tom Torrance +.An Reto Burkhalter +.An Martin Renters +.An Brian Somers +.An Paul Traina +.An Ari Suutari +.An Dave Remien +.An J. Fortes +.An Andrzej Bialecki +.An Gordon Burditt +.Ed +.Sh CONCEPTUAL BACKGROUND +This section is intended for those who are planning to modify the source +code or want to create somewhat esoteric applications using the packet +aliasing functions. +.Pp +The conceptual framework under which the packet aliasing engine operates +is described here. +Central to the discussion is the idea of an +.Em aliasing link +which describes the relationship for a given packet transaction between +the local machine, aliased identity and remote machine. +It is discussed how such links come into existence and are destroyed. +.Ss ALIASING LINKS +There is a notion of an +.Em aliasing link , +which is a 7-tuple describing a specific translation: +.Bd -literal -offset indent +(local addr, local port, alias addr, alias port, + remote addr, remote port, protocol) +.Ed +.Pp +Outgoing packets have the local address and port number replaced with the +alias address and port number. +Incoming packets undergo the reverse process. +The packet aliasing engine attempts to match packets against an internal +table of aliasing links to determine how to modify a given IP packet. +Both the IP header and protocol dependent headers are modified as necessary. +Aliasing links are created and deleted as necessary according to network +traffic. +.Pp +Protocols can be TCP, UDP or even ICMP in certain circumstances. +(Some types of ICMP packets can be aliased according to sequence or ID +number which acts as an equivalent port number for identifying how +individual packets should be handled.) +.Pp +Each aliasing link must have a unique combination of the following five +quantities: alias address/port, remote address/port and protocol. +This ensures that several machines on a local network can share the +same aliasing IP address. +In cases where conflicts might arise, the aliasing port is chosen so that +uniqueness is maintained. +.Ss STATIC AND DYNAMIC LINKS +Aliasing links can either be static or dynamic. +Static links persist indefinitely and represent fixed rules for translating +IP packets. +Dynamic links come into existence for a specific TCP connection or UDP +transaction or ICMP ECHO sequence. +For the case of TCP, the connection can be monitored to see when the +associated aliasing link should be deleted. +Aliasing links for UDP transactions (and ICMP ECHO and TIMESTAMP requests) +work on a simple timeout rule. +When no activity is observed on a dynamic link for a certain amount of time +it is automatically deleted. +Timeout rules also apply to TCP connections which do not open or close +properly. +.Ss PARTIALLY SPECIFIED ALIASING LINKS +Aliasing links can be partially specified, meaning that the remote address +and/or remote port are unknown. +In this case, when a packet matching the incomplete specification is found, +a fully specified dynamic link is created. +If the original partially specified link is dynamic, it will be deleted +after the fully specified link is created, otherwise it will persist. +.Pp +For instance, a partially specified link might be +.Bd -literal -offset indent +(192.168.0.4, 23, 204.228.203.215, 8066, 0, 0, tcp) +.Ed +.Pp +The zeros denote unspecified components for the remote address and port. +If this link were static it would have the effect of redirecting all +incoming traffic from port 8066 of 204.228.203.215 to port 23 (telnet) +of machine 192.168.0.4 on the local network. +Each individual telnet connection would initiate the creation of a distinct +dynamic link. +.Ss DYNAMIC LINK CREATION +In addition to aliasing links, there are also address mappings that can be +stored within the internal data table of the packet aliasing mechanism. +.Bd -literal -offset indent +(local addr, alias addr) +.Ed +.Pp +Address mappings are searched when creating new dynamic links. +.Pp +All outgoing packets from the local network automatically create a dynamic +link if they do not match an already existing fully specified link. +If an address mapping exists for the outgoing packet, this determines +the alias address to be used. +If no mapping exists, then a default address, usually the address of the +packet aliasing host, is used. +If necessary, this default address can be changed as often as each individual +packet arrives. +.Pp +The aliasing port number is determined such that the new dynamic link does +not conflict with any existing links. +In the default operating mode, the packet aliasing engine attempts to set +the aliasing port equal to the local port number. +If this results in a conflict, then port numbers are randomly chosen until +a unique aliasing link can be established. +In an alternate operating mode, the first choice of an aliasing port is also +random and unrelated to the local port number. +.Sh MODULAR ARCHITECTURE (AND Xr ipfw 4 Sh SUPPORT) +One of the latest improvements to +.Nm +was to make its support +for new protocols independent from the rest of the library, giving it +the ability to load/unload support for new protocols at run-time. +To achieve this feature, all the code for protocol handling was moved +to a series of modules outside of the main library. +These modules are compiled from the same sources but work in +different ways, depending on whether they are compiled to work inside a kernel +or as part of the userland library. +.Ss LIBALIAS MODULES IN KERNEL LAND +When compiled for the kernel, +.Nm +modules are plain KLDs recognizable with the +.Pa alias_ +prefix. +.Pp +To add support for a new protocol, load the corresponding module. +For example: +.Pp +.Dl "kldload alias_ftp" +.Pp +When support for a protocol is no longer needed, its module can be unloaded: +.Pp +.Dl "kldunload alias_ftp" +.Ss LIBALIAS MODULES IN USERLAND +Due to the differences between kernel and userland (no KLD mechanism, +many different address spaces, etc.), we had to change a bit how to +handle module loading/tracking/unloading in userland. +.Pp +While compiled for a userland +.Nm , +all the modules are plain libraries, residing in +.Pa /usr/lib , +and recognizable with the +.Pa libalias_ +prefix. +.Pp +There is a configuration file, +.Pa /etc/libalias.conf , +with the following contents (by default): +.Bd -literal -offset indent +/usr/lib/libalias_cuseeme.so +/usr/lib/libalias_ftp.so +/usr/lib/libalias_irc.so +/usr/lib/libalias_nbt.so +/usr/lib/libalias_pptp.so +/usr/lib/libalias_skinny.so +/usr/lib/libalias_smedia.so +.Ed +.Pp +This file contains the paths to the modules that +.Nm +will load. +To load/unload a new module, just add its path to +.Pa libalias.conf +and call +.Fn LibAliasRefreshModules +from the program. +In case the application provides a +.Dv SIGHUP +signal handler, add a call to +.Fn LibAliasRefreshModules +inside the handler, and everytime you want to refresh the loaded modules, +send it the +.Dv SIGHUP +signal: +.Pp +.Dl "kill -HUP <process_pid>" +.Ss MODULAR ARCHITECURE: HOW IT WORKS +The modular architecture of +.Nm +works similar whether it is running inside the +kernel or in userland. +From +.Pa alias_mod.c : +.Bd -literal +/* Protocol and userland module handlers chains. */ +LIST_HEAD(handler_chain, proto_handler) handler_chain ... +\&... +SLIST_HEAD(dll_chain, dll) dll_chain ... +.Ed +.Pp +.Va handler_chain +keep tracks of all the protocol handlers loaded, while +.Va ddl_chain +takes care of userland modules loaded. +.Pp +.Va handler_chain +is composed of +.Vt "struct proto_handler" +entries: +.Bd -literal +struct proto_handler { + u_int pri; + int16_t dir; + uint8_t proto; + int (*fingerprint)(struct libalias *la, + struct ip *pip, struct alias_data *ah); + int (*protohandler)(struct libalias *la, + struct ip *pip, struct alias_data *ah); + LIST_ENTRY(proto_handler) entries; +}; +.Ed +.Pp +where: +.Bl -inset +.It Va pri +is the priority assigned to a protocol handler, lower +is better. +.It Va dir +is the direction of packets: ingoing or outgoing. +.It Va proto +says at which protocol this packet belongs: IP, TCP or UDP. +.It Va fingerprint +points to the fingerprint function while protohandler points +to the protocol handler function. +.El +.Pp +The +.Va fingerprint +function has the double of scope of checking if the +incoming packet is found and if it belongs to any categories that this +module can handle. +.Pp +The +.Va protohandler +function actually manipulates +the packet to make +.Nm +correctly NAT it. +.Pp +When a packet enters +.Nm , +if it meets a module hook, +.Va handler_chain +is searched to see if there is an handler that matches +this type of a packet (it checks protocol and direction of packet), then if +more than one handler is found, it starts with the module with +the lowest priority number: it calls the +.Va fingerprint +function and interprets the result. +.Pp +If the result value is equal to 0 then it calls the protocol handler +of this handler and returns. +Otherwise, it proceeds to the next eligible module until the +.Va handler_chain +is exhausted. +.Pp +Inside +.Nm , +the module hook looks like this: +.Bd -literal -offset indent +struct alias_data ad = { + lnk, + &original_address, + &alias_address, + &alias_port, + &ud->uh_sport, /* original source port */ + &ud->uh_dport, /* original dest port */ + 256 /* maxpacketsize */ +}; + +\&... + +/* walk out chain */ +err = find_handler(IN, UDP, la, pip, &ad); +.Ed +.Pp +All data useful to a module are gathered together in an +.Vt alias_data +structure, then +.Fn find_handler +is called. +The +.Fn find_handler +function is responsible for walking out the handler +chain, it receives as input parameters: +.Bl -tag -width indent +.It Fa IN +direction +.It Fa UDP +working protocol +.It Fa la +pointer to this instance of libalias +.It Fa pip +pointer to a +.Vt "struct ip" +.It Fa ad +pointer to +.Vt "struct alias_data" +(see above) +.El +.Pp +In this case, +.Fn find_handler +will search only for modules registered for +supporting INcoming UDP packets. +.Pp +As was mentioned earlier, +.Nm +in userland is a bit different, cause +care has to be taken of module handling too (avoiding duplicate load of +module, avoiding module with same name, etc.) so +.Va dll_chain +was introduced. +.Pp +.Va dll_chain +contains a list of all userland +.Nm +modules loaded. +.Pp +When an application calls +.Fn LibAliasRefreshModules , +.Nm +first unloads all the loaded modules, then reloads all the modules listed in +.Pa /etc/libalias.conf : +for every module loaded, a new entry to +.Va dll_chain +is added. +.Pp +.Va dll_chain +is composed of +.Vt "struct dll" +entries: +.Bd -literal +struct dll { + /* name of module */ + char name[DLL_LEN]; + /* + * ptr to shared obj obtained through + * dlopen() - use this ptr to get access + * to any symbols from a loaded module + * via dlsym() + */ + void *handle; + struct dll *next; +}; +.Ed +.Bl -inset +.It Va name +is the name of the module +.It Va handle +is a pointer to the module obtained through +.Xr dlopen 3 +.El +Whenever a module is loaded in userland, an entry is added to +.Va dll_chain , +then every protocol handler present in that module +is resolved and registered in +.Va handler_chain . +.Ss HOW TO WRITE A MODULE FOR LIBALIAS +There is a module (called +.Pa alias_dummy.[ch] ) +in +.Nm +that can be used as a skeleton for future work, here we analyse some parts of that +module. +From +.Pa alias_dummy.c : +.Bd -literal +struct proto_handler handlers [] = {{666, IN|OUT, UDP|TCP, + &fingerprint, &protohandler}}; +.Ed +.Pp +The variable +.Va handlers +is the +.Dq "most important thing" +in a module +cause it describes the handlers present and lets the outside world use +it in an opaque way. +.Pp +It must ALWAYS be present in every module, and it MUST retain +the name +.Va handlers , +otherwise attempting to load a module in userland will fail and +complain about missing symbols: for more information about module +load/unload, please refer to +.Fn LibAliasRefreshModules , +.Fn LibAliasLoadModule +and +.Fn LibAliasUnloadModule +in +.Pa alias.c . +.Pp +.Va handlers +contains all the +.Vt proto_handler +structures present in a module. +.Bd -literal +static int +mod_handler(module_t mod, int type, void *data) +{ + int error; + + switch (type) { + case MOD_LOAD: + error = 0; + attach_handlers(handlers); + break; + case MOD_UNLOAD: + error = 0; + detach_handlers(handlers; + break; + default: + error = EINVAL; + } + return (error); +} +.Ed +When running as KLD, +.Fn mod_handler +register/deregister the module using +.Fn attach_handlers +and +.Fn detach_handlers , +respectively. +.Pp +Every module must contain at least 2 functions: one fingerprint +function and a protocol handler function. +.Bd -literal +#ifdef _KERNEL +static +#endif +int +fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + +\&... +} + +#ifdef _KERNEL +static +#endif +int +protohandler(struct libalias *la, struct ip *pip, + struct alias_data *ah) +{ + +\&... +} +.Ed +and they must accept exactly these input parameters. +.Ss PATCHING AN APPLICATION FOR USERLAND LIBALIAS MODULES +To add module support into an application that uses +.Nm , +the following simple steps can be followed. +.Bl -enum +.It +Find the main file of an application +(let us call it +.Pa main.c ) . +.It +Add this to the header section of +.Pa main.c , +if not already present: +.Pp +.Dl "#include <signal.h>" +.Pp +and this just after the header section: +.Pp +.Dl "static void signal_handler(int);" +.It +Add the following line to the init function of an application or, +if it does not have any init function, put it in +.Fn main : +.Pp +.Dl "signal(SIGHUP, signal_handler);" +.Pp +and place the +.Fn signal_handler +function somewhere in +.Pa main.c : +.Bd -literal -offset indent +static void +signal_handler(int sig) +{ + + LibAliasRefreshModules(); +} +.Ed +.Pp +Otherwise, if an application already traps the +.Dv SIGHUP +signal, just add a call to +.Fn LibAliasRefreshModules +in the signal handler function. +.El +For example, to patch +.Xr natd 8 +to use +.Nm +modules, just add the following line to +.Fn RefreshAddr "int sig __unused" : +.Pp +.Dl "LibAliasRefreshModules()" +.Pp +recompile and you are done. +.Ss LOGGING SUPPORT IN KERNEL LAND +When working as KLD, +.Nm +now has log support that +happens on a buffer allocated inside +.Vt "struct libalias" +(from +.Pa alias_local.h ) : +.Bd -literal +struct libalias { + ... + + /* log descriptor */ +#ifdef KERNEL_LOG + char *logDesc; /* + * ptr to an auto-malloced + * memory buffer when libalias + * works as kld + */ +#else + FILE *logDesc; /* + * ptr to /var/log/alias.log + * when libalias runs as a + * userland lib + */ +#endif + + ... +} +.Ed +so all applications using +.Nm +will be able to handle their +own logs, if they want, accessing +.Va logDesc . +Moreover, every change to a log buffer is automatically added to +.Xr syslog 3 +with the +.Dv LOG_SECURITY +facility and the +.Dv LOG_INFO +level. diff --git a/src/VBox/Devices/Network/slirp/libslirp.h b/src/VBox/Devices/Network/slirp/libslirp.h new file mode 100644 index 00000000..8e70d002 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/libslirp.h @@ -0,0 +1,200 @@ +/* $Id: libslirp.h $ */ +/** @file + * NAT - slirp interface. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef _LIBSLIRP_H +#define _LIBSLIRP_H + +#ifdef RT_OS_WINDOWS +# include <iprt/win/winsock2.h> +# ifdef __cplusplus +extern "C" { +# endif +int inet_aton(const char *cp, struct in_addr *ia); +# ifdef __cplusplus +} +# endif +#else +# ifdef RT_OS_OS2 /* temporary workaround, see ticket #127 */ +# include <sys/time.h> +# endif +# include <sys/select.h> +# include <poll.h> +# include <arpa/inet.h> +#endif + +#include <VBox/types.h> +#include <iprt/req.h> + +typedef struct NATState *PNATState; +struct mbuf; + +#ifdef __cplusplus +extern "C" { +#endif + +int slirp_init(PNATState *, uint32_t, uint32_t, bool, bool, int, int, bool, void *); +void slirp_register_statistics(PNATState pData, PPDMDRVINS pDrvIns); +void slirp_deregister_statistics(PNATState pData, PPDMDRVINS pDrvIns); +void slirp_term(PNATState); +void slirp_link_up(PNATState); +void slirp_link_down(PNATState); + +#if defined(RT_OS_WINDOWS) +void slirp_select_fill(PNATState pData, int *pndfs); + +void slirp_select_poll(PNATState pData, int fTimeout); +#else /* RT_OS_WINDOWS */ +void slirp_select_fill(PNATState pData, int *pnfds, struct pollfd *polls); +void slirp_select_poll(PNATState pData, struct pollfd *polls, int ndfs); +#endif /* !RT_OS_WINDOWS */ + +void slirp_input(PNATState pData, struct mbuf *m, size_t cbBuf); + +/* you must provide the following functions: */ +void slirp_arm_fast_timer(void *pvUser); +int slirp_can_output(void * pvUser); +void slirp_output(void * pvUser, struct mbuf *m, const uint8_t *pkt, int pkt_len); +void slirp_output_pending(void * pvUser); +void slirp_urg_output(void *pvUser, struct mbuf *, const uint8_t *pu8Buf, int cb); +void slirp_post_sent(PNATState pData, void *pvArg); + +int slirp_call(void *pvUser, PRTREQ *ppReq, RTMSINTERVAL cMillies, + unsigned fFlags, PFNRT pfnFunction, unsigned cArgs, ...); + +int slirp_call_hostres(void *pvUser, PRTREQ *ppReq, RTMSINTERVAL cMillies, + unsigned fFlags, PFNRT pfnFunction, unsigned cArgs, ...); + + +void slirp_update_guest_addr_guess(PNATState pData, uint32_t guess, const char *msg); + +int slirp_add_redirect(PNATState pData, int is_udp, struct in_addr host_addr, + int host_port, struct in_addr guest_addr, + int guest_port); +int slirp_remove_redirect(PNATState pData, int is_udp, struct in_addr host_addr, + int host_port, struct in_addr guest_addr, + int guest_port); +int slirp_add_exec(PNATState pData, int do_pty, const char *args, int addr_low_byte, + int guest_port); + +void slirp_set_dhcp_TFTP_prefix(PNATState pData, const char *tftpPrefix); +void slirp_set_dhcp_TFTP_bootfile(PNATState pData, const char *bootFile); +void slirp_set_dhcp_next_server(PNATState pData, const char *nextServer); +void slirp_set_dhcp_dns_proxy(PNATState pData, bool fDNSProxy); +void slirp_set_rcvbuf(PNATState pData, int kilobytes); +void slirp_set_sndbuf(PNATState pData, int kilobytes); +void slirp_set_tcp_rcvspace(PNATState pData, int kilobytes); +void slirp_set_tcp_sndspace(PNATState pData, int kilobytes); + +int slirp_set_binding_address(PNATState, char *addr); +void slirp_set_mtu(PNATState, int); +void slirp_info(PNATState pData, const void *pvArg, const char *pszArgs); +void slirp_set_somaxconn(PNATState pData, int iSoMaxConn); + +/** + * This macrodefinition is shortcut for check of hosts where Slirp, + * receives notifications from host. For now it's Darwin only. But + * Main API has primitives for listening DNS change event since 4.3. + */ +#if defined(RT_OS_DARWIN) || defined(RT_OS_WINDOWS) +# define HAVE_NOTIFICATION_FOR_DNS_UPDATE 1 +#else +# define HAVE_NOTIFICATION_FOR_DNS_UPDATE 0 +#endif + + +/** + * This method help DrvNAT to select strategy: about VMRESUMEREASON_HOST_RESUME: + * - proceed with link termination (we let guest track host DNS settings) + * VBOX_NAT_DNS_EXTERNAL + * - enforce internal DNS update (we are using dnsproxy and track but don't export DNS host settings) + * VBOX_NAT_DNS_DNSPROXY + * - ignore (NAT configured to use hostresolver - we aren't track any host DNS changes) + * VBOX_NAT_DNS_HOSTRESOLVER + * @note: It's safe to call this method from any thread, because settings we're checking + * are immutable at runtime. + */ +#define VBOX_NAT_DNS_EXTERNAL 0 +#define VBOX_NAT_DNS_DNSPROXY 1 +#define VBOX_NAT_DNS_HOSTRESOLVER 2 +int slirp_host_network_configuration_change_strategy_selector(const PNATState); +#if defined(RT_OS_WINDOWS) + + +/* + * ICMP handle state change + */ +# define VBOX_ICMP_EVENT_INDEX 0 + +/** + * This event is for + * - slirp_input + * - slirp_link_up + * - slirp_link_down + * - wakeup + */ +# define VBOX_WAKEUP_EVENT_INDEX 1 + +/* + * UDP/TCP socket state change (socket ready to receive, to send, ...) + */ +# define VBOX_SOCKET_EVENT_INDEX 2 + +/* + * The number of events for WSAWaitForMultipleEvents(). + */ +# define VBOX_EVENT_COUNT 3 + +HANDLE *slirp_get_events(PNATState pData); +void slirp_register_external_event(PNATState pData, HANDLE hEvent, int index); +#endif /* RT_OS_WINDOWS */ + +struct mbuf *slirp_ext_m_get(PNATState pData, size_t cbMin, void **ppvBuf, size_t *pcbBuf); +void slirp_ext_m_free(PNATState pData, struct mbuf *, uint8_t *pu8Buf); + +/* + * Returns the timeout. + */ +unsigned int slirp_get_timeout_ms(PNATState pData); + +# ifndef RT_OS_WINDOWS +/* + * Returns the number of sockets. + */ +int slirp_get_nsock(PNATState pData); +# endif + +#ifdef VBOX_WITH_DNSMAPPING_IN_HOSTRESOLVER +void slirp_add_host_resolver_mapping(PNATState pData, + const char *pszHostName, bool fPattern, + uint32_t u32HostIP); +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/VBox/Devices/Network/slirp/main.h b/src/VBox/Devices/Network/slirp/main.h new file mode 100644 index 00000000..be9014e2 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/main.h @@ -0,0 +1,41 @@ +/* $Id: main.h $ */ +/** @file + * NAT - main. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifdef HAVE_SYS_SELECT_H +#include <sys/select.h> +#endif +#define ETH_ENCAP_URG 1 +void if_encap(PNATState pData, uint16_t eth_proto, struct mbuf *m, int flags); diff --git a/src/VBox/Devices/Network/slirp/misc.c b/src/VBox/Devices/Network/slirp/misc.c new file mode 100644 index 00000000..cfb82eea --- /dev/null +++ b/src/VBox/Devices/Network/slirp/misc.c @@ -0,0 +1,560 @@ +/* $Id: misc.c $ */ +/** @file + * NAT - helpers. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef VBOX_NAT_TST_QUEUE +#include <slirp.h> +#include "zone.h" + +# ifndef HAVE_INET_ATON +int +inet_aton(const char *cp, struct in_addr *ia) +{ + u_int32_t addr = inet_addr(cp); + if (addr == 0xffffffff) + return 0; + ia->s_addr = addr; + return 1; +} +# endif + +/* + * Get our IP address and put it in our_addr + */ +void +getouraddr(PNATState pData) +{ + our_addr.s_addr = loopback_addr.s_addr; +} +#else /* VBOX_NAT_TST_QUEUE */ +# include <iprt/cdefs.h> +# include <iprt/types.h> +# include "misc.h" +#endif +struct quehead +{ + struct quehead *qh_link; + struct quehead *qh_rlink; +}; + +void +insque(PNATState pData, void *a, void *b) +{ + register struct quehead *element = (struct quehead *) a; + register struct quehead *head = (struct quehead *) b; + NOREF(pData); + element->qh_link = head->qh_link; + head->qh_link = (struct quehead *)element; + element->qh_rlink = (struct quehead *)head; + ((struct quehead *)(element->qh_link))->qh_rlink = (struct quehead *)element; +} + +void +remque(PNATState pData, void *a) +{ + register struct quehead *element = (struct quehead *) a; + NOREF(pData); + ((struct quehead *)(element->qh_link))->qh_rlink = element->qh_rlink; + ((struct quehead *)(element->qh_rlink))->qh_link = element->qh_link; + element->qh_rlink = NULL; + /* element->qh_link = NULL; TCP FIN1 crashes if you do this. Why ? */ +} + +#ifndef VBOX_NAT_TST_QUEUE + +/* + * Set fd blocking and non-blocking + */ +void +fd_nonblock(int fd) +{ +# ifdef FIONBIO +# ifdef RT_OS_WINDOWS + u_long opt = 1; +# else + int opt = 1; +# endif + ioctlsocket(fd, FIONBIO, &opt); +# else /* !FIONBIO */ + int opt; + + opt = fcntl(fd, F_GETFL, 0); + opt |= O_NONBLOCK; + fcntl(fd, F_SETFL, opt); +# endif +} + + +# if defined(VBOX_NAT_MEM_DEBUG) +# define NATMEM_LOG_FLOW_FUNC(a) LogFlowFunc(a) +# define NATMEM_LOG_FLOW_FUNC_ENTER() LogFlowFuncEnter() +# define NATMEM_LOG_FLOW_FUNC_LEAVE() LogFlowFuncLeave() +# define NATMEM_LOG_2(a) Log2(a) +# else +# define NATMEM_LOG_FLOW_FUNC(a) do { } while (0) +# define NATMEM_LOG_FLOW_FUNC_ENTER() do { } while (0) +# define NATMEM_LOG_FLOW_FUNC_LEAVE() do { } while (0) +# define NATMEM_LOG_2(a) do { } while (0) +# endif + + +/** + * Called when memory becomes available, works pfnXmitPending. + * + * @note This will LEAVE the critical section of the zone and RE-ENTER it + * again. Changes to the zone data should be expected across calls to + * this function! + * + * @param zone The zone. + */ +DECLINLINE(void) slirp_zone_check_and_send_pending(uma_zone_t zone) +{ + NATMEM_LOG_FLOW_FUNC(("ENTER: zone:%R[mzone]\n", zone)); + if ( zone->fDoXmitPending + && zone->master_zone == NULL) + { + int rc2; + zone->fDoXmitPending = false; + rc2 = RTCritSectLeave(&zone->csZone); AssertRC(rc2); + + slirp_output_pending(zone->pData->pvUser); + + rc2 = RTCritSectEnter(&zone->csZone); AssertRC(rc2); + } + NATMEM_LOG_FLOW_FUNC_LEAVE(); +} + +static void *slirp_uma_alloc(uma_zone_t zone, + int size, uint8_t *pflags, int fWait) +{ + struct item *it; + uint8_t *sub_area; + void *ret = NULL; + int rc; + + NATMEM_LOG_FLOW_FUNC(("ENTER: %R[mzone], size:%d, pflags:%p, %RTbool\n", zone, size, pflags, fWait)); RT_NOREF(size, pflags, fWait); + RTCritSectEnter(&zone->csZone); + for (;;) + { + if (!LIST_EMPTY(&zone->free_items)) + { + it = LIST_FIRST(&zone->free_items); + Assert(it->magic == ITEM_MAGIC); + rc = 0; + if (zone->pfInit) + rc = zone->pfInit(zone->pData, (void *)&it[1], (int /*sigh*/)zone->size, M_DONTWAIT); + if (rc == 0) + { + zone->cur_items++; + LIST_REMOVE(it, list); + LIST_INSERT_HEAD(&zone->used_items, it, list); + slirp_zone_check_and_send_pending(zone); /* may exit+enter the cs! */ + ret = (void *)&it[1]; + } + else + { + AssertMsgFailed(("NAT: item initialization failed for zone %s\n", zone->name)); + ret = NULL; + } + break; + } + + if (!zone->master_zone) + { + /* We're on the master zone and we can't allocate more. */ + NATMEM_LOG_2(("NAT: no room on %s zone\n", zone->name)); + /* AssertMsgFailed(("NAT: OOM!")); */ + zone->fDoXmitPending = true; + break; + } + + /* we're on a sub-zone, we need get a chunk from the master zone and split + * it into sub-zone conforming chunks. + */ + sub_area = slirp_uma_alloc(zone->master_zone, (int /*sigh*/)zone->master_zone->size, NULL, 0); + if (!sub_area) + { + /* No room on master */ + NATMEM_LOG_2(("NAT: no room on %s zone for %s zone\n", zone->master_zone->name, zone->name)); + break; + } + zone->max_items++; + it = &((struct item *)sub_area)[-1]; + /* It's the chunk descriptor of the master zone, we should remove it + * from the master list first. + */ + Assert((it->zone && it->zone->magic == ZONE_MAGIC)); + RTCritSectEnter(&it->zone->csZone); + /** @todo should we alter count of master counters? */ + LIST_REMOVE(it, list); + RTCritSectLeave(&it->zone->csZone); + + /** @todo '+ zone->size' should be depend on flag */ + memset(it, 0, sizeof(struct item)); + it->zone = zone; + it->magic = ITEM_MAGIC; + LIST_INSERT_HEAD(&zone->free_items, it, list); + if (zone->cur_items >= zone->max_items) + LogRel(("NAT: Zone(%s) has reached it maximum\n", zone->name)); + } + RTCritSectLeave(&zone->csZone); + NATMEM_LOG_FLOW_FUNC(("LEAVE: %p\n", ret)); + return ret; +} + +static void slirp_uma_free(void *item, int size, uint8_t flags) +{ + struct item *it; + uma_zone_t zone; + + Assert(item); + it = &((struct item *)item)[-1]; + NATMEM_LOG_FLOW_FUNC(("ENTER: item:%p(%R[mzoneitem]), size:%d, flags:%RX8\n", item, it, size, flags)); RT_NOREF(size, flags); + Assert(it->magic == ITEM_MAGIC); + zone = it->zone; + /* check border magic */ + Assert((*(uint32_t *)(((uint8_t *)&it[1]) + zone->size) == 0xabadbabe)); + + RTCritSectEnter(&zone->csZone); + Assert(zone->magic == ZONE_MAGIC); + LIST_REMOVE(it, list); + if (zone->pfFini) + { + zone->pfFini(zone->pData, item, (int /*sigh*/)zone->size); + } + if (zone->pfDtor) + { + zone->pfDtor(zone->pData, item, (int /*sigh*/)zone->size, NULL); + } + LIST_INSERT_HEAD(&zone->free_items, it, list); + zone->cur_items--; + slirp_zone_check_and_send_pending(zone); /* may exit+enter the cs! */ + RTCritSectLeave(&zone->csZone); + NATMEM_LOG_FLOW_FUNC_LEAVE(); +} + +uma_zone_t uma_zcreate(PNATState pData, char *name, size_t size, + ctor_t ctor, dtor_t dtor, zinit_t init, zfini_t fini, int flags1, int flags2) +{ + uma_zone_t zone = NULL; + NATMEM_LOG_FLOW_FUNC(("ENTER: name:%s size:%d, ctor:%p, dtor:%p, init:%p, fini:%p, flags1:%RX32, flags2:%RX32\n", + name, ctor, dtor, init, fini, flags1, flags2)); RT_NOREF(flags1, flags2); + zone = RTMemAllocZ(sizeof(struct uma_zone)); + Assert((pData)); + zone->magic = ZONE_MAGIC; + zone->pData = pData; + zone->name = name; + zone->size = size; + zone->pfCtor = ctor; + zone->pfDtor = dtor; + zone->pfInit = init; + zone->pfFini = fini; + zone->pfAlloc = slirp_uma_alloc; + zone->pfFree = slirp_uma_free; + RTCritSectInit(&zone->csZone); + NATMEM_LOG_FLOW_FUNC(("LEAVE: %R[mzone]\n", zone)); + return zone; + +} +uma_zone_t uma_zsecond_create(char *name, ctor_t ctor, + dtor_t dtor, zinit_t init, zfini_t fini, uma_zone_t master) +{ + uma_zone_t zone; + Assert(master); + NATMEM_LOG_FLOW_FUNC(("ENTER: name:%s ctor:%p, dtor:%p, init:%p, fini:%p, master:%R[mzone]\n", + name, ctor, dtor, init, fini, master)); + zone = RTMemAllocZ(sizeof(struct uma_zone)); + if (zone == NULL) + { + NATMEM_LOG_FLOW_FUNC(("LEAVE: %R[mzone]\n", NULL)); + return NULL; + } + + Assert((master && master->pData)); + zone->magic = ZONE_MAGIC; + zone->pData = master->pData; + zone->name = name; + zone->pfCtor = ctor; + zone->pfDtor = dtor; + zone->pfInit = init; + zone->pfFini = fini; + zone->pfAlloc = slirp_uma_alloc; + zone->pfFree = slirp_uma_free; + zone->size = master->size; + zone->master_zone = master; + RTCritSectInit(&zone->csZone); + NATMEM_LOG_FLOW_FUNC(("LEAVE: %R[mzone]\n", zone)); + return zone; +} + +void uma_zone_set_max(uma_zone_t zone, int max) +{ + int i = 0; + struct item *it; + NATMEM_LOG_FLOW_FUNC(("ENTER: zone:%R[mzone], max:%d\n", zone, max)); + zone->max_items = max; + zone->area = RTMemAllocZ(max * (sizeof(struct item) + zone->size + sizeof(uint32_t))); + for (; i < max; ++i) + { + it = (struct item *)(((uint8_t *)zone->area) + i*(sizeof(struct item) + zone->size + sizeof(uint32_t))); + it->magic = ITEM_MAGIC; + it->zone = zone; + *(uint32_t *)(((uint8_t *)&it[1]) + zone->size) = 0xabadbabe; + LIST_INSERT_HEAD(&zone->free_items, it, list); + } + NATMEM_LOG_FLOW_FUNC_LEAVE(); +} + +void uma_zone_set_allocf(uma_zone_t zone, uma_alloc_t pfAlloc) +{ + NATMEM_LOG_FLOW_FUNC(("ENTER: zone:%R[mzone], pfAlloc:%Rfn\n", zone, pfAlloc)); + zone->pfAlloc = pfAlloc; + NATMEM_LOG_FLOW_FUNC_LEAVE(); +} + +void uma_zone_set_freef(uma_zone_t zone, uma_free_t pfFree) +{ + NATMEM_LOG_FLOW_FUNC(("ENTER: zone:%R[mzone], pfAlloc:%Rfn\n", zone, pfFree)); + zone->pfFree = pfFree; + NATMEM_LOG_FLOW_FUNC_LEAVE(); +} + +uint32_t *uma_find_refcnt(uma_zone_t zone, void *mem) +{ + /** @todo (vvl) this function supposed to work with special zone storing + reference counters */ + struct item *it = NULL; + NATMEM_LOG_FLOW_FUNC(("ENTER: zone:%R[mzone], mem:%p\n", zone, mem)); RT_NOREF(zone); + it = (struct item *)mem; /* 1st element */ + Assert(mem != NULL); + Assert(zone->magic == ZONE_MAGIC); + /* for returning pointer to counter we need get 0 elemnt */ + Assert(it[-1].magic == ITEM_MAGIC); + NATMEM_LOG_FLOW_FUNC(("LEAVE: %p\n", &it[-1].ref_count)); + return &it[-1].ref_count; +} + +void *uma_zalloc_arg(uma_zone_t zone, void *args, int how) +{ + void *mem; + Assert(zone->magic == ZONE_MAGIC); + NATMEM_LOG_FLOW_FUNC(("ENTER: zone:%R[mzone], args:%p, how:%RX32\n", zone, args, how)); RT_NOREF(how); + if (zone->pfAlloc == NULL) + { + NATMEM_LOG_FLOW_FUNC(("LEAVE: NULL\n")); + return NULL; + } + RTCritSectEnter(&zone->csZone); + mem = zone->pfAlloc(zone, (int /*sigh*/)zone->size, NULL, 0); + if (mem != NULL) + { + if (zone->pfCtor) + zone->pfCtor(zone->pData, mem, (int /*sigh*/)zone->size, args, M_DONTWAIT); + } + RTCritSectLeave(&zone->csZone); + NATMEM_LOG_FLOW_FUNC(("LEAVE: %p\n", mem)); + return mem; +} + +void uma_zfree(uma_zone_t zone, void *item) +{ + NATMEM_LOG_FLOW_FUNC(("ENTER: zone:%R[mzone], item:%p\n", zone, item)); + uma_zfree_arg(zone, item, NULL); + NATMEM_LOG_FLOW_FUNC_LEAVE(); +} + +void uma_zfree_arg(uma_zone_t zone, void *mem, void *flags) +{ + struct item *it; + Assert(zone->magic == ZONE_MAGIC); + Assert((zone->pfFree)); + Assert((mem)); + NATMEM_LOG_FLOW_FUNC(("ENTER: zone:%R[mzone], mem:%p, flags:%p\n", zone, mem, flags)); RT_NOREF(flags); + + RTCritSectEnter(&zone->csZone); + it = &((struct item *)mem)[-1]; + Assert((it->magic == ITEM_MAGIC)); + Assert((zone->magic == ZONE_MAGIC && zone == it->zone)); + + zone->pfFree(mem, 0, 0); + RTCritSectLeave(&zone->csZone); + + NATMEM_LOG_FLOW_FUNC_LEAVE(); +} + +int uma_zone_exhausted_nolock(uma_zone_t zone) +{ + int fExhausted; + NATMEM_LOG_FLOW_FUNC(("ENTER: zone:%R[mzone]\n", zone)); + RTCritSectEnter(&zone->csZone); + fExhausted = (zone->cur_items == zone->max_items); + RTCritSectLeave(&zone->csZone); + NATMEM_LOG_FLOW_FUNC(("LEAVE: %RTbool\n", fExhausted)); + return fExhausted; +} + +void zone_drain(uma_zone_t zone) +{ + struct item *it; + uma_zone_t master_zone; + + /* vvl: Huh? What to do with zone which hasn't got backstore ? */ + Assert((zone->master_zone)); + NATMEM_LOG_FLOW_FUNC(("ENTER: zone:%R[mzone]\n", zone)); + master_zone = zone->master_zone; + while (!LIST_EMPTY(&zone->free_items)) + { + it = LIST_FIRST(&zone->free_items); + Assert((it->magic == ITEM_MAGIC)); + + RTCritSectEnter(&zone->csZone); + LIST_REMOVE(it, list); + zone->max_items--; + RTCritSectLeave(&zone->csZone); + + it->zone = master_zone; + + RTCritSectEnter(&master_zone->csZone); + LIST_INSERT_HEAD(&master_zone->free_items, it, list); + master_zone->cur_items--; + slirp_zone_check_and_send_pending(master_zone); /* may exit+enter the cs! */ + RTCritSectLeave(&master_zone->csZone); + } + NATMEM_LOG_FLOW_FUNC_LEAVE(); +} + +void slirp_null_arg_free(void *mem, void *arg) +{ + /** @todo (vvl) make it wiser */ + NATMEM_LOG_FLOW_FUNC(("ENTER: mem:%p, arg:%p\n", mem, arg)); + RT_NOREF(arg); + Assert(mem); + RTMemFree(mem); + NATMEM_LOG_FLOW_FUNC_LEAVE(); +} + +void *uma_zalloc(uma_zone_t zone, int len) +{ + NATMEM_LOG_FLOW_FUNC(("ENTER: zone:%R[mzone], len:%d\n", zone, len)); + RT_NOREF(zone, len); + NATMEM_LOG_FLOW_FUNC(("LEAVE: NULL")); + return NULL; +} + +struct mbuf *slirp_ext_m_get(PNATState pData, size_t cbMin, void **ppvBuf, size_t *pcbBuf) +{ + struct mbuf *m; + int size = MCLBYTES; + NATMEM_LOG_FLOW_FUNC(("ENTER: cbMin:%d, ppvBuf:%p, pcbBuf:%p\n", cbMin, ppvBuf, pcbBuf)); + + *ppvBuf = NULL; + *pcbBuf = 0; + + if (cbMin < MCLBYTES) + size = MCLBYTES; + else if (cbMin < MJUM9BYTES) + size = MJUM9BYTES; + else if (cbMin < MJUM16BYTES) + size = MJUM16BYTES; + else + { + AssertMsgFailed(("Unsupported size %zu", cbMin)); + NATMEM_LOG_FLOW_FUNC(("LEAVE: NULL (bad size %zu)\n", cbMin)); + return NULL; + } + + m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size); + if (m == NULL) + { + NATMEM_LOG_FLOW_FUNC(("LEAVE: NULL\n")); + return NULL; + } + m->m_len = size; + *ppvBuf = mtod(m, void *); + *pcbBuf = size; + NATMEM_LOG_FLOW_FUNC(("LEAVE: %p\n", m)); + return m; +} + +void slirp_ext_m_free(PNATState pData, struct mbuf *m, uint8_t *pu8Buf) +{ + + NATMEM_LOG_FLOW_FUNC(("ENTER: m:%p, pu8Buf:%p\n", m, pu8Buf)); + if ( !pu8Buf + && pu8Buf != mtod(m, uint8_t *)) + RTMemFree(pu8Buf); /* This buffer was allocated on heap */ + m_freem(pData, m); + NATMEM_LOG_FLOW_FUNC_LEAVE(); +} + +static void zone_destroy(uma_zone_t zone) +{ + RTCritSectEnter(&zone->csZone); + NATMEM_LOG_FLOW_FUNC(("ENTER: zone:%R[mzone]\n", zone)); + LogRel(("NAT: Zone(nm:%s, used:%d)\n", zone->name, zone->cur_items)); + RTMemFree(zone->area); + RTCritSectLeave(&zone->csZone); + RTCritSectDelete(&zone->csZone); + RTMemFree(zone); + NATMEM_LOG_FLOW_FUNC_LEAVE(); +} + +void m_fini(PNATState pData) +{ + NATMEM_LOG_FLOW_FUNC_ENTER(); +# define ZONE_DESTROY(zone) do { zone_destroy((zone)); (zone) = NULL;} while (0) + ZONE_DESTROY(pData->zone_clust); + ZONE_DESTROY(pData->zone_pack); + ZONE_DESTROY(pData->zone_mbuf); + ZONE_DESTROY(pData->zone_jumbop); + ZONE_DESTROY(pData->zone_jumbo9); + ZONE_DESTROY(pData->zone_jumbo16); + ZONE_DESTROY(pData->zone_ext_refcnt); +# undef ZONE_DESTROY + /** @todo do finalize here.*/ + NATMEM_LOG_FLOW_FUNC_LEAVE(); +} + +void +if_init(PNATState pData) +{ + /* 14 for ethernet */ + if_maxlinkhdr = 14; + if_comp = IF_AUTOCOMP; + if_mtu = 1500; + if_mru = 1500; +} + +#endif /* VBOX_NAT_TST_QUEUE */ diff --git a/src/VBox/Devices/Network/slirp/misc.h b/src/VBox/Devices/Network/slirp/misc.h new file mode 100644 index 00000000..ac25be25 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/misc.h @@ -0,0 +1,88 @@ +/* $Id: misc.h $ */ +/** @file + * NAT - helpers (declarations/defines). + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef _MISC_H_ +#define _MISC_H_ + +#ifdef VBOX_NAT_TST_QUEUE +typedef void *PNATState; +#endif + +void slirp_insque (PNATState, void *, void *); +void slirp_remque (PNATState, void *); +# ifndef VBOX_NAT_TST_QUEUE +void getouraddr (PNATState); +void fd_nonblock (int); + +/* UVM interface */ +#define UMA_ALIGN_PTR (1 << 0) +#define UMA_ZONE_REFCNT (1 << 1) +#define UMA_ZONE_MAXBUCKET (1 << 2) +#define UMA_ZONE_ZINIT (1 << 3) +#define UMA_SLAB_KERNEL (1 << 4) +#define UMA_ZFLAG_FULL (1 << 5) + +struct uma_zone; +typedef struct uma_zone *uma_zone_t; + +typedef void *(*uma_alloc_t)(uma_zone_t, int, u_int8_t *, int); +typedef void (*uma_free_t)(void *, int, u_int8_t); + +typedef int (*ctor_t)(PNATState, void *, int, void *, int); +typedef void (*dtor_t)(PNATState, void *, int, void *); +typedef int (*zinit_t)(PNATState, void *, int, int); +typedef void (*zfini_t)(PNATState, void *, int); +uma_zone_t uma_zcreate(PNATState, char *, size_t, ctor_t, dtor_t, zinit_t, zfini_t, int, int); +uma_zone_t uma_zsecond_create(char *, ctor_t, dtor_t, zinit_t, zfini_t, uma_zone_t); +void uma_zone_set_max(uma_zone_t, int); +void uma_zone_set_allocf(uma_zone_t, uma_alloc_t); +void uma_zone_set_freef(uma_zone_t, uma_free_t); + +uint32_t *uma_find_refcnt(uma_zone_t, void *); +void *uma_zalloc(uma_zone_t, int); +void *uma_zalloc_arg(uma_zone_t, void *, int); +void uma_zfree(uma_zone_t, void *); +void uma_zfree_arg(uma_zone_t, void *, void *); +int uma_zone_exhausted_nolock(uma_zone_t); +void zone_drain(uma_zone_t); + +void slirp_null_arg_free(void *, void *); +void m_fini(PNATState pData); +# else /* VBOX_NAT_TST_QUEUE */ +# define insque slirp_insque +# define remque slirp_remque +# endif +#endif diff --git a/src/VBox/Devices/Network/slirp/queue.h b/src/VBox/Devices/Network/slirp/queue.h new file mode 100644 index 00000000..c686c0a2 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/queue.h @@ -0,0 +1,653 @@ +/* $Id: queue.h $ */ +/** @file + * NAT - Queue handling. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + * $FreeBSD: src/sys/sys/queue.h,v 1.68 2006/10/24 11:20:29 ru Exp $ + */ + +#ifndef _SYS_QUEUE_H_ +#define _SYS_QUEUE_H_ + +#include <iprt/cdefs.h> +#ifdef RT_OS_WINDOWS +#ifdef SLIST_ENTRY +/*Here is a conflict with winnt.h*/ +#undef SLIST_ENTRY +#endif +#endif /* RT_OS_WINDOWS */ + +/* + * This file defines four types of data structures: singly-linked lists, + * singly-linked tail queues, lists and tail queues. + * + * A singly-linked list is headed by a single forward pointer. The elements + * are singly linked for minimum space and pointer manipulation overhead at + * the expense of O(n) removal for arbitrary elements. New elements can be + * added to the list after an existing element or at the head of the list. + * Elements being removed from the head of the list should use the explicit + * macro for this purpose for optimum efficiency. A singly-linked list may + * only be traversed in the forward direction. Singly-linked lists are ideal + * for applications with large datasets and few or no removals or for + * implementing a LIFO queue. + * + * A singly-linked tail queue is headed by a pair of pointers, one to the + * head of the list and the other to the tail of the list. The elements are + * singly linked for minimum space and pointer manipulation overhead at the + * expense of O(n) removal for arbitrary elements. New elements can be added + * to the list after an existing element, at the head of the list, or at the + * end of the list. Elements being removed from the head of the tail queue + * should use the explicit macro for this purpose for optimum efficiency. + * A singly-linked tail queue may only be traversed in the forward direction. + * Singly-linked tail queues are ideal for applications with large datasets + * and few or no removals or for implementing a FIFO queue. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * For details on the use of these macros, see the queue(3) manual page. + * + * + * SLIST LIST STAILQ TAILQ + * _HEAD + + + + + * _HEAD_INITIALIZER + + + + + * _ENTRY + + + + + * _INIT + + + + + * _EMPTY + + + + + * _FIRST + + + + + * _NEXT + + + + + * _PREV - - - + + * _LAST - - + + + * _FOREACH + + + + + * _FOREACH_SAFE + + + + + * _FOREACH_REVERSE - - - + + * _FOREACH_REVERSE_SAFE - - - + + * _INSERT_HEAD + + + + + * _INSERT_BEFORE - + - + + * _INSERT_AFTER + + + + + * _INSERT_TAIL - - + + + * _CONCAT - - + + + * _REMOVE_HEAD + - + - + * _REMOVE + + + + + * + */ +#ifdef QUEUE_MACRO_DEBUG +/* Store the last 2 places the queue element or head was altered */ +struct qm_trace { + char * lastfile; + int lastline; + char * prevfile; + int prevline; +}; + +#define TRACEBUF struct qm_trace trace; +#define TRASHIT(x) do {(x) = (void *)-1;} while (0) + +#define QMD_TRACE_HEAD(head) do { \ + (head)->trace.prevline = (head)->trace.lastline; \ + (head)->trace.prevfile = (head)->trace.lastfile; \ + (head)->trace.lastline = __LINE__; \ + (head)->trace.lastfile = __FILE__; \ +} while (0) + +#define QMD_TRACE_ELEM(elem) do { \ + (elem)->trace.prevline = (elem)->trace.lastline; \ + (elem)->trace.prevfile = (elem)->trace.lastfile; \ + (elem)->trace.lastline = __LINE__; \ + (elem)->trace.lastfile = __FILE__; \ +} while (0) + +#else +#define QMD_TRACE_ELEM(elem) +#define QMD_TRACE_HEAD(head) +#define TRACEBUF +#define TRASHIT(x) +#endif /* QUEUE_MACRO_DEBUG */ + +/* + * Singly-linked List declarations. + */ +#define SLIST_HEAD(name, type) \ +struct name { \ + struct type *slh_first; /* first element */ \ +} + +#define SLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define SLIST_ENTRY(type) \ +struct { \ + struct type *sle_next; /* next element */ \ +} + +/* + * Singly-linked List functions. + */ +#define SLIST_EMPTY(head) ((head)->slh_first == NULL) + +#define SLIST_FIRST(head) ((head)->slh_first) + +#define SLIST_FOREACH(var, head, field) \ + for ((var) = SLIST_FIRST((head)); \ + (var); \ + (var) = SLIST_NEXT((var), field)) + +#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = SLIST_FIRST((head)); \ + (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \ + for ((varp) = &SLIST_FIRST((head)); \ + ((var) = *(varp)) != NULL; \ + (varp) = &SLIST_NEXT((var), field)) + +#define SLIST_INIT(head) do { \ + SLIST_FIRST((head)) = NULL; \ +} while (0) + +#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \ + SLIST_NEXT((slistelm), field) = (elm); \ +} while (0) + +#define SLIST_INSERT_HEAD(head, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \ + SLIST_FIRST((head)) = (elm); \ +} while (0) + +#define SLIST_NEXT(elm, field) ((elm)->field.sle_next) + +#define SLIST_REMOVE(head, elm, type, field) do { \ + if (SLIST_FIRST((head)) == (elm)) { \ + SLIST_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = SLIST_FIRST((head)); \ + while (SLIST_NEXT(curelm, field) != (elm)) \ + curelm = SLIST_NEXT(curelm, field); \ + SLIST_NEXT(curelm, field) = \ + SLIST_NEXT(SLIST_NEXT(curelm, field), field); \ + } \ + TRASHIT((elm)->field.sle_next); \ +} while (0) + +#define SLIST_REMOVE_HEAD(head, field) do { \ + SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \ +} while (0) + +/* + * Singly-linked Tail queue declarations. + */ +#define STAILQ_HEAD(name, type) \ +struct name { \ + struct type *stqh_first;/* first element */ \ + struct type **stqh_last;/* addr of last next element */ \ +} + +#define STAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).stqh_first } + +#define STAILQ_ENTRY(type) \ +struct { \ + struct type *stqe_next; /* next element */ \ +} + +/* + * Singly-linked Tail queue functions. + */ +#define STAILQ_CONCAT(head1, head2) do { \ + if (!STAILQ_EMPTY((head2))) { \ + *(head1)->stqh_last = (head2)->stqh_first; \ + (head1)->stqh_last = (head2)->stqh_last; \ + STAILQ_INIT((head2)); \ + } \ +} while (0) + +#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL) + +#define STAILQ_FIRST(head) ((head)->stqh_first) + +#define STAILQ_FOREACH(var, head, field) \ + for((var) = STAILQ_FIRST((head)); \ + (var); \ + (var) = STAILQ_NEXT((var), field)) + + +#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = STAILQ_FIRST((head)); \ + (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define STAILQ_INIT(head) do { \ + STAILQ_FIRST((head)) = NULL; \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_NEXT((tqelm), field) = (elm); \ +} while (0) + +#define STAILQ_INSERT_HEAD(head, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL) \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_FIRST((head)) = (elm); \ +} while (0) + +#define STAILQ_INSERT_TAIL(head, elm, field) do { \ + STAILQ_NEXT((elm), field) = NULL; \ + *(head)->stqh_last = (elm); \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ +} while (0) + +#define STAILQ_LAST(head, type, field) \ + (STAILQ_EMPTY((head)) ? \ + NULL : \ + ((struct type *)(void *) \ + ((char *)((head)->stqh_last) - __offsetof(struct type, field)))) + +#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) + +#define STAILQ_REMOVE(head, elm, type, field) do { \ + if (STAILQ_FIRST((head)) == (elm)) { \ + STAILQ_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = STAILQ_FIRST((head)); \ + while (STAILQ_NEXT(curelm, field) != (elm)) \ + curelm = STAILQ_NEXT(curelm, field); \ + if ((STAILQ_NEXT(curelm, field) = \ + STAILQ_NEXT(STAILQ_NEXT(curelm, field), field)) == NULL)\ + (head)->stqh_last = &STAILQ_NEXT((curelm), field);\ + } \ + TRASHIT((elm)->field.stqe_next); \ +} while (0) + +#define STAILQ_REMOVE_HEAD(head, field) do { \ + if ((STAILQ_FIRST((head)) = \ + STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL) \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +/* + * List declarations. + */ +#define LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define LIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List functions. + */ + +#if (defined(_KERNEL) && defined(INVARIANTS)) +#define QMD_LIST_CHECK_HEAD(head, field) do { \ + if (LIST_FIRST((head)) != NULL && \ + LIST_FIRST((head))->field.le_prev != \ + &LIST_FIRST((head))) \ + panic("Bad list head %p first->prev != head", (head)); \ +} while (0) + +#define QMD_LIST_CHECK_NEXT(elm, field) do { \ + if (LIST_NEXT((elm), field) != NULL && \ + LIST_NEXT((elm), field)->field.le_prev != \ + &((elm)->field.le_next)) \ + panic("Bad link elm %p next->prev != elm", (elm)); \ +} while (0) + +#define QMD_LIST_CHECK_PREV(elm, field) do { \ + if (*(elm)->field.le_prev != (elm)) \ + panic("Bad link elm %p prev->next != elm", (elm)); \ +} while (0) +#else +#define QMD_LIST_CHECK_HEAD(head, field) +#define QMD_LIST_CHECK_NEXT(elm, field) +#define QMD_LIST_CHECK_PREV(elm, field) +#endif /* (_KERNEL && INVARIANTS) */ + +#define LIST_EMPTY(head) ((head)->lh_first == NULL) + +#define LIST_FIRST(head) ((head)->lh_first) + +#define LIST_FOREACH(var, head, field) \ + for ((var) = LIST_FIRST((head)); \ + (var); \ + (var) = LIST_NEXT((var), field)) + +#define LIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = LIST_FIRST((head)); \ + (var) && ((tvar) = LIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define LIST_INIT(head) do { \ + LIST_FIRST((head)) = NULL; \ +} while (0) + +#define LIST_INSERT_AFTER(listelm, elm, field) do { \ + QMD_LIST_CHECK_NEXT(listelm, field); \ + if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\ + LIST_NEXT((listelm), field)->field.le_prev = \ + &LIST_NEXT((elm), field); \ + LIST_NEXT((listelm), field) = (elm); \ + (elm)->field.le_prev = &LIST_NEXT((listelm), field); \ +} while (0) + +#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ + QMD_LIST_CHECK_PREV(listelm, field); \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + LIST_NEXT((elm), field) = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &LIST_NEXT((elm), field); \ +} while (0) + +#define LIST_INSERT_HEAD(head, elm, field) do { \ + QMD_LIST_CHECK_HEAD((head), field); \ + if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \ + LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\ + LIST_FIRST((head)) = (elm); \ + (elm)->field.le_prev = &LIST_FIRST((head)); \ +} while (0) + +#define LIST_NEXT(elm, field) ((elm)->field.le_next) + +#define LIST_REMOVE(elm, field) do { \ + QMD_LIST_CHECK_NEXT(elm, field); \ + QMD_LIST_CHECK_PREV(elm, field); \ + if (LIST_NEXT((elm), field) != NULL) \ + LIST_NEXT((elm), field)->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = LIST_NEXT((elm), field); \ + TRASHIT((elm)->field.le_next); \ + TRASHIT((elm)->field.le_prev); \ +} while (0) + +/* + * Tail queue declarations. + */ +#define TAILQ_HEAD(name, type) \ +struct name { \ + struct type *tqh_first; /* first element */ \ + struct type **tqh_last; /* addr of last next element */ \ + TRACEBUF \ +} + +#define TAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).tqh_first } + +#define TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ + TRACEBUF \ +} + +/* + * Tail queue functions. + */ +#if (defined(_KERNEL) && defined(INVARIANTS)) +#define QMD_TAILQ_CHECK_HEAD(head, field) do { \ + if (!TAILQ_EMPTY(head) && \ + TAILQ_FIRST((head))->field.tqe_prev != \ + &TAILQ_FIRST((head))) \ + panic("Bad tailq head %p first->prev != head", (head)); \ +} while (0) + +#define QMD_TAILQ_CHECK_TAIL(head, field) do { \ + if (*(head)->tqh_last != NULL) \ + panic("Bad tailq NEXT(%p->tqh_last) != NULL", (head)); \ +} while (0) + +#define QMD_TAILQ_CHECK_NEXT(elm, field) do { \ + if (TAILQ_NEXT((elm), field) != NULL && \ + TAILQ_NEXT((elm), field)->field.tqe_prev != \ + &((elm)->field.tqe_next)) \ + panic("Bad link elm %p next->prev != elm", (elm)); \ +} while (0) + +#define QMD_TAILQ_CHECK_PREV(elm, field) do { \ + if (*(elm)->field.tqe_prev != (elm)) \ + panic("Bad link elm %p prev->next != elm", (elm)); \ +} while (0) +#else +#define QMD_TAILQ_CHECK_HEAD(head, field) +#define QMD_TAILQ_CHECK_TAIL(head, headname) +#define QMD_TAILQ_CHECK_NEXT(elm, field) +#define QMD_TAILQ_CHECK_PREV(elm, field) +#endif /* (_KERNEL && INVARIANTS) */ + +#define TAILQ_CONCAT(head1, head2, field) do { \ + if (!TAILQ_EMPTY(head2)) { \ + *(head1)->tqh_last = (head2)->tqh_first; \ + (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ + (head1)->tqh_last = (head2)->tqh_last; \ + TAILQ_INIT((head2)); \ + QMD_TRACE_HEAD(head1); \ + QMD_TRACE_HEAD(head2); \ + } \ +} while (0) + +#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) + +#define TAILQ_FIRST(head) ((head)->tqh_first) + +#define TAILQ_FOREACH(var, head, field) \ + for ((var) = TAILQ_FIRST((head)); \ + (var); \ + (var) = TAILQ_NEXT((var), field)) + +#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = TAILQ_FIRST((head)); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var); \ + (var) = TAILQ_PREV((var), headname, field)) + +#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + +#define TAILQ_INIT(head) do { \ + TAILQ_FIRST((head)) = NULL; \ + (head)->tqh_last = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ +} while (0) + +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + QMD_TAILQ_CHECK_NEXT(listelm, field); \ + if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else { \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + } \ + TAILQ_NEXT((listelm), field) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + QMD_TAILQ_CHECK_PREV(listelm, field); \ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ + TAILQ_NEXT((elm), field) = (listelm); \ + *(listelm)->field.tqe_prev = (elm); \ + (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define TAILQ_INSERT_HEAD(head, elm, field) do { \ + QMD_TAILQ_CHECK_HEAD(head, field); \ + if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \ + TAILQ_FIRST((head))->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + TAILQ_FIRST((head)) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_INSERT_TAIL(head, elm, field) do { \ + QMD_TAILQ_CHECK_TAIL(head, field); \ + TAILQ_NEXT((elm), field) = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) + +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +#define TAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) + +#define TAILQ_REMOVE(head, elm, field) do { \ + QMD_TAILQ_CHECK_NEXT(elm, field); \ + QMD_TAILQ_CHECK_PREV(elm, field); \ + if ((TAILQ_NEXT((elm), field)) != NULL) \ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else { \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + QMD_TRACE_HEAD(head); \ + } \ + *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \ + TRASHIT((elm)->field.tqe_next); \ + TRASHIT((elm)->field.tqe_prev); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + + +#ifdef _KERNEL + +/* + * XXX insque() and remque() are an old way of handling certain queues. + * They bogusly assumes that all queue heads look alike. + */ + +struct quehead { + struct quehead *qh_link; + struct quehead *qh_rlink; +}; + +#ifdef __CC_SUPPORTS___INLINE + +static __inline void +insque(void *a, void *b) +{ + struct quehead *element = (struct quehead *)a, + *head = (struct quehead *)b; + + element->qh_link = head->qh_link; + element->qh_rlink = head; + head->qh_link = element; + element->qh_link->qh_rlink = element; +} + +static __inline void +remque(void *a) +{ + struct quehead *element = (struct quehead *)a; + + element->qh_link->qh_rlink = element->qh_rlink; + element->qh_rlink->qh_link = element->qh_link; + element->qh_rlink = 0; +} + +#else /* !__CC_SUPPORTS___INLINE */ + +void insque(void *a, void *b); +void remque(void *a); + +#endif /* __CC_SUPPORTS___INLINE */ + +#endif /* _KERNEL */ + +#endif /* !_SYS_QUEUE_H_ */ diff --git a/src/VBox/Devices/Network/slirp/resolv_conf_parser.c b/src/VBox/Devices/Network/slirp/resolv_conf_parser.c new file mode 100644 index 00000000..7e89ac0e --- /dev/null +++ b/src/VBox/Devices/Network/slirp/resolv_conf_parser.c @@ -0,0 +1,521 @@ +/* $Id: resolv_conf_parser.c $ */ +/** @file + * resolv_conf_parser.c - parser of resolv.conf resolver(5) + */ + +/* + * Copyright (C) 2016-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifdef RCP_STANDALONE +#define IN_RING3 +#endif + +#ifndef LOG_GROUP +# define LOG_GROUP LOG_GROUP_DRV_NAT +#endif + +#include <iprt/assert.h> +#include <iprt/err.h> +#include <iprt/net.h> +#include <iprt/string.h> +#include <iprt/stream.h> +#include <iprt/thread.h> + +#include <VBox/log.h> + +#ifdef RT_OS_FREEBSD +# include <sys/socket.h> +#endif + +#include <arpa/inet.h> + +#include "resolv_conf_parser.h" + +#if !defined(RCP_ACCEPT_PORT) +# if defined(RT_OS_DARWIN) +# define RCP_ACCEPT_PORT +# endif +#endif + +static int rcp_address_trailer(char **ppszNext, PRTNETADDR pNetAddr, RTNETADDRTYPE enmType); +static char *getToken(char *psz, char **ppszSavePtr); + +#if 0 +#undef Log2 +#define Log2 LogRel +#endif + +#ifdef RCP_STANDALONE +#undef LogRel +#define LogRel(a) RTPrintf a +#endif + + +#ifdef RCP_STANDALONE +int main(int argc, char **argv) +{ + struct rcp_state state; + int i; + int rc; + + rc = rcp_parse(&state, NULL); + if (RT_FAILURE(rc)) + { + RTPrintf(">>> Failed: %Rrc\n", rc); + return 1; + } + + RTPrintf(">>> Success:\n"); + + RTPrintf("rcps_num_nameserver = %u\n", state.rcps_num_nameserver); + for (i = 0; i < state.rcps_num_nameserver; ++i) + { + if (state.rcps_str_nameserver[i] == NULL) + LogRel((" nameserver %RTnaddr\n", + &state.rcps_nameserver[i])); + else + LogRel((" nameserver %RTnaddr (from \"%s\")\n", + &state.rcps_nameserver[i], state.rcps_str_nameserver[i])); + } + + if (state.rcps_domain != NULL) + RTPrintf("domain %s\n", state.rcps_domain); + + RTPrintf("rcps_num_searchlist = %u\n", state.rcps_num_searchlist); + for (i = 0; i < state.rcps_num_searchlist; ++i) + { + RTPrintf("... %s\n", state.rcps_searchlist[i] ? state.rcps_searchlist[i] : "(null)"); + } + + return 0; +} +#endif + + +int rcp_parse(struct rcp_state *state, const char *filename) +{ + PRTSTREAM stream; +# define RCP_BUFFER_SIZE 256 + char buf[RCP_BUFFER_SIZE]; + char *pszAddrBuf; + size_t cbAddrBuf; + char *pszSearchBuf; + size_t cbSearchBuf; + uint32_t flags; +#ifdef RCP_ACCEPT_PORT /* OS X extention */ + uint32_t default_port = RTNETADDR_PORT_NA; +#endif + unsigned i; + int rc; + + AssertPtrReturn(state, VERR_INVALID_PARAMETER); + flags = state->rcps_flags; + + RT_ZERO(*state); + state->rcps_flags = flags; + + if (RT_UNLIKELY(filename == NULL)) + { +#ifdef RCP_STANDALONE + stream = g_pStdIn; /* for testing/debugging */ +#else + return VERR_INVALID_PARAMETER; +#endif + } + else + { + rc = RTStrmOpen(filename, "r", &stream); + if (RT_FAILURE(rc)) + return rc; + } + + + pszAddrBuf = state->rcps_nameserver_str_buffer; + cbAddrBuf = sizeof(state->rcps_nameserver_str_buffer); + + pszSearchBuf = state->rcps_searchlist_buffer; + cbSearchBuf = sizeof(state->rcps_searchlist_buffer); + + for (;;) + { + char *s, *tok; + + rc = RTStrmGetLine(stream, buf, sizeof(buf)); + if (RT_FAILURE(rc)) + { + if (rc == VERR_EOF) + rc = VINF_SUCCESS; + break; + } + + /* + * Strip comment if present. + * + * This is not how ad-hoc parser in bind's res_init.c does it, + * btw, so this code will accept more input as valid compared + * to res_init. (e.g. "nameserver 1.1.1.1; comment" is + * misparsed by res_init). + */ + for (s = buf; *s != '\0'; ++s) + { + if (*s == '#' || *s == ';') + { + *s = '\0'; + break; + } + } + + tok = getToken(buf, &s); + if (tok == NULL) + continue; + + + /* + * NAMESERVER + */ + if (RTStrCmp(tok, "nameserver") == 0) + { + RTNETADDR NetAddr; + const char *pszAddr; + char *pszNext; + + if (RT_UNLIKELY(state->rcps_num_nameserver >= RCPS_MAX_NAMESERVERS)) + { + LogRel(("NAT: resolv.conf: too many nameserver lines, ignoring %s\n", s)); + continue; + } + + /* XXX: TODO: don't save strings unless asked to */ + if (RT_UNLIKELY(cbAddrBuf == 0)) + { + LogRel(("NAT: resolv.conf: no buffer space, ignoring %s\n", s)); + continue; + } + + + /* + * parse next token as an IP address + */ + tok = getToken(NULL, &s); + if (tok == NULL) + { + LogRel(("NAT: resolv.conf: nameserver line without value\n")); + continue; + } + + pszAddr = tok; + RT_ZERO(NetAddr); + NetAddr.uPort = RTNETADDR_PORT_NA; + + /* if (NetAddr.enmType == RTNETADDRTYPE_INVALID) */ + { + rc = RTNetStrToIPv4AddrEx(tok, &NetAddr.uAddr.IPv4, &pszNext); + if (RT_SUCCESS(rc)) + { + rc = rcp_address_trailer(&pszNext, &NetAddr, RTNETADDRTYPE_IPV4); + if (RT_FAILURE(rc)) + { + LogRel(("NAT: resolv.conf: garbage at the end of IPv4 address %s\n", tok)); + continue; + } + + LogRel(("NAT: resolv.conf: nameserver %RTnaddr\n", &NetAddr)); + } + } /* IPv4 */ + + if (NetAddr.enmType == RTNETADDRTYPE_INVALID) + { + rc = RTNetStrToIPv6AddrEx(tok, &NetAddr.uAddr.IPv6, &pszNext); + if (RT_SUCCESS(rc)) + { + if (*pszNext == '%') /* XXX: TODO: IPv6 zones */ + { + size_t zlen = RTStrOffCharOrTerm(pszNext, '.'); + LogRel(("NAT: resolv.conf: FIXME: ignoring IPv6 zone %*.*s\n", + zlen, zlen, pszNext)); + pszNext += zlen; + } + + rc = rcp_address_trailer(&pszNext, &NetAddr, RTNETADDRTYPE_IPV6); + if (RT_FAILURE(rc)) + { + LogRel(("NAT: resolv.conf: garbage at the end of IPv6 address %s\n", tok)); + continue; + } + + LogRel(("NAT: resolv.conf: nameserver %RTnaddr\n", &NetAddr)); + } + } /* IPv6 */ + + if (NetAddr.enmType == RTNETADDRTYPE_INVALID) + { + LogRel(("NAT: resolv.conf: bad nameserver address %s\n", tok)); + continue; + } + + + tok = getToken(NULL, &s); + if (tok != NULL) + LogRel(("NAT: resolv.conf: ignoring unexpected trailer on the nameserver line\n")); + + if ((flags & RCPSF_IGNORE_IPV6) && NetAddr.enmType == RTNETADDRTYPE_IPV6) + { + Log2(("NAT: resolv.conf: IPv6 address ignored\n")); + continue; + } + + /* seems ok, save it */ + { + i = state->rcps_num_nameserver; + + state->rcps_nameserver[i] = NetAddr; + + /* XXX: TODO: don't save strings unless asked to */ + Log2(("NAT: resolv.conf: saving address @%td,+%zu\n", + pszAddrBuf - state->rcps_nameserver_str_buffer, cbAddrBuf)); + state->rcps_str_nameserver[i] = pszAddrBuf; + rc = RTStrCopyP(&pszAddrBuf, &cbAddrBuf, pszAddr); + if (RT_SUCCESS(rc)) + { + ++pszAddrBuf; /* skip '\0' */ + if (cbAddrBuf > 0) /* on overflow we get 1 (for the '\0'), but be defensive */ + --cbAddrBuf; + ++state->rcps_num_nameserver; + } + else + { + Log2(("NAT: resolv.conf: ... truncated\n")); + } + } + + continue; + } + + +#ifdef RCP_ACCEPT_PORT /* OS X extention */ + /* + * PORT + */ + if (RTStrCmp(tok, "port") == 0) + { + uint16_t port; + + if (default_port != RTNETADDR_PORT_NA) + { + LogRel(("NAT: resolv.conf: ignoring multiple port lines\n")); + continue; + } + + tok = getToken(NULL, &s); + if (tok == NULL) + { + LogRel(("NAT: resolv.conf: port line without value\n")); + continue; + } + + rc = RTStrToUInt16Full(tok, 10, &port); + if (RT_SUCCESS(rc)) + { + if (port != 0) + default_port = port; + else + LogRel(("NAT: resolv.conf: port 0 is invalid\n")); + } + + continue; + } +#endif + + + /* + * DOMAIN + */ + if (RTStrCmp(tok, "domain") == 0) + { + if (state->rcps_domain != NULL) + { + LogRel(("NAT: resolv.conf: ignoring multiple domain lines\n")); + continue; + } + + tok = getToken(NULL, &s); + if (tok == NULL) + { + LogRel(("NAT: resolv.conf: domain line without value\n")); + continue; + } + + rc = RTStrCopy(state->rcps_domain_buffer, sizeof(state->rcps_domain_buffer), tok); + if (RT_SUCCESS(rc)) + { + state->rcps_domain = state->rcps_domain_buffer; + } + else + { + LogRel(("NAT: resolv.conf: domain name too long\n")); + RT_ZERO(state->rcps_domain_buffer); + } + + continue; + } + + + /* + * SEARCH + */ + if (RTStrCmp(tok, "search") == 0) + { + while ((tok = getToken(NULL, &s)) && tok != NULL) + { + i = state->rcps_num_searchlist; + if (RT_UNLIKELY(i >= RCPS_MAX_SEARCHLIST)) + { + LogRel(("NAT: resolv.conf: too many search domains, ignoring %s\n", tok)); + continue; + } + + Log2(("NAT: resolv.conf: saving search %s @%td,+%zu\n", + tok, pszSearchBuf - state->rcps_searchlist_buffer, cbSearchBuf)); + state->rcps_searchlist[i] = pszSearchBuf; + rc = RTStrCopyP(&pszSearchBuf, &cbSearchBuf, tok); + if (RT_SUCCESS(rc)) + { + ++pszSearchBuf; /* skip '\0' */ + if (cbSearchBuf > 0) /* on overflow we get 1 (for the '\0'), but be defensive */ + --cbSearchBuf; + ++state->rcps_num_searchlist; + } + else + { + LogRel(("NAT: resolv.conf: no buffer space, ignoring search domain %s\n", tok)); + pszSearchBuf = state->rcps_searchlist[i]; + cbSearchBuf = sizeof(state->rcps_searchlist_buffer) + - (pszSearchBuf - state->rcps_searchlist_buffer); + Log2(("NAT: resolv.conf: backtracking to @%td,+%zu\n", + pszSearchBuf - state->rcps_searchlist_buffer, cbSearchBuf)); + } + } + + continue; + } + + + LogRel(("NAT: resolv.conf: ignoring \"%s %s\"\n", tok, s)); + } + + if (filename != NULL) + RTStrmClose(stream); + + if (RT_FAILURE(rc)) + return rc; + + + /* XXX: I don't like that OS X would return a different result here */ +#ifdef RCP_ACCEPT_PORT /* OS X extention */ + if (default_port == RTNETADDR_PORT_NA) + default_port = 53; + + for (i = 0; i < state->rcps_num_nameserver; ++i) + { + RTNETADDR *addr = &state->rcps_nameserver[i]; + if (addr->uPort == RTNETADDR_PORT_NA || addr->uPort == 0) + addr->uPort = (uint16_t)default_port; + } +#endif + + if ( state->rcps_domain == NULL + && state->rcps_num_searchlist > 0) + { + state->rcps_domain = state->rcps_searchlist[0]; + } + + return VINF_SUCCESS; +} + + +static int +rcp_address_trailer(char **ppszNext, PRTNETADDR pNetAddr, RTNETADDRTYPE enmType) +{ + char *pszNext = *ppszNext; + int rc = VINF_SUCCESS; + + if (*pszNext == '\0') + { + pNetAddr->enmType = enmType; + rc = VINF_SUCCESS; + } +#ifdef RCP_ACCEPT_PORT /* OS X extention */ + else if (*pszNext == '.') + { + uint16_t port; + + rc = RTStrToUInt16Ex(++pszNext, NULL, 10, &port); + if (RT_SUCCESS(rc)) + { + pNetAddr->enmType = enmType; + pNetAddr->uPort = port; + } + } +#endif + else + { + rc = VERR_TRAILING_CHARS; + } + + return rc; +} + + +static char *getToken(char *psz, char **ppszSavePtr) +{ + char *pszToken; + + AssertPtrReturn(ppszSavePtr, NULL); + + if (psz == NULL) + { + psz = *ppszSavePtr; + if (psz == NULL) + return NULL; + } + + while (*psz == ' ' || *psz == '\t') + ++psz; + + if (*psz == '\0') + { + *ppszSavePtr = NULL; + return NULL; + } + + pszToken = psz; + while (*psz && *psz != ' ' && *psz != '\t') + ++psz; + + if (*psz == '\0') + psz = NULL; + else + *psz++ = '\0'; + + *ppszSavePtr = psz; + return pszToken; +} diff --git a/src/VBox/Devices/Network/slirp/resolv_conf_parser.h b/src/VBox/Devices/Network/slirp/resolv_conf_parser.h new file mode 100644 index 00000000..d486be02 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/resolv_conf_parser.h @@ -0,0 +1,135 @@ +/* $Id: resolv_conf_parser.h $ */ +/** @file + * resolv_conf_parser.h - interface to parser of resolv.conf resolver(5) + */ + +/* + * Copyright (C) 2014-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef __RESOLV_CONF_PARSER_H__ +#define __RESOLV_CONF_PARSER_H__ + +#include <iprt/cdefs.h> +#include <iprt/net.h> + +RT_C_DECLS_BEGIN + +#define RCPS_MAX_NAMESERVERS 3 +#define RCPS_MAX_SEARCHLIST 10 +#define RCPS_BUFFER_SIZE 256 +#define RCPS_IPVX_SIZE 47 + +/** + * RESOLV_CONF_FILE can be defined in external tests for verification of Slirp behaviour. + */ +#ifndef RESOLV_CONF_FILE +# ifndef RT_OS_OS2 +# define RESOLV_CONF_FILE "/etc/resolv.conf" +# else +# define RESOLV_CONF_FILE "\\MPTN\\ETC\\RESOLV2" +# endif +#endif + +/** + * In Slirp we don't need IPv6 for general case (only for dnsproxy mode + * it's potentially acceptable) + */ +#define RCPSF_IGNORE_IPV6 RT_BIT(0) + +/** + * This flag used to request just the strings in rcps_str_nameserver, + * but no addresses in rcps_nameserver. This is not very useful, + * since we need to validate addresses anyway. This flag is ignored + * now. + */ +#define RCPSF_NO_STR2IPCONV RT_BIT(1) + + +struct rcp_state +{ + uint16_t rcps_port; + /** + * Filling of this array ommited iff RCPSF_NO_STR2IPCONF in rcp_state::rcps_flags set. + */ + RTNETADDR rcps_nameserver[RCPS_MAX_NAMESERVERS]; + /** + * this array contains non-NULL (pointing to rcp_state::rcps_nameserver_str_buffer) iff + * RCPSF_NO_STR2IPCONF in rcp_state::rcps_flags set. + */ + char *rcps_str_nameserver[RCPS_MAX_NAMESERVERS]; + unsigned rcps_num_nameserver; + /** + * Shortcuts to storage, note that domain is optional + * and if it's missed in resolv.conf rcps_domain should be equal + * to rcps_search_list[0] + */ + char *rcps_domain; + char *rcps_searchlist[RCPS_MAX_SEARCHLIST]; + unsigned rcps_num_searchlist; + + uint32_t rcps_flags; + + char rcps_domain_buffer[RCPS_BUFFER_SIZE]; + char rcps_searchlist_buffer[RCPS_BUFFER_SIZE]; + char rcps_nameserver_str_buffer[RCPS_MAX_NAMESERVERS * RCPS_IPVX_SIZE]; +}; + + +/** + * This function parses specified file (expected to conform resolver (5) Mac OSX or resolv.conf (3) Linux) + * and fills the structure. + * @return 0 - on success + * -1 - on fail. + * <code> + * struct rcp_state state; + * int rc; + * + * rc = rcp_parse(&state, "/etc/resolv.conf"); + * for(i = 0; rc == 0 && i != state.rcps_num_nameserver; ++i) + * { + * if ((state.rcps_flags & RCPSF_NO_STR2IPCONV) == 0) + * { + * const RTNETADDR *addr = &state.rcps_nameserver[i]; + * + * switch (state.rcps_nameserver[i].enmType) + * { + * case RTNETADDRTYPE_IPV4: + * RTPrintf("nameserver[%d]: [%RTnaipv4]:%d\n", i, addr->uAddr.IPv4, addr->uPort); + * break; + * case RTNETADDRTYPE_IPV6: + * RTPrintf("nameserver[%d]: [%RTnaipv6]:%d\n", i, &addr->uAddr.IPv6, addr->uPort); + * break; + * default: + * break; + * } + * } + * else + * RTPrintf("nameserver[%d]: %s\n", i, state.rcps_str_nameserver[i]); + * } + * </code> + * + */ +int rcp_parse(struct rcp_state *, const char *); + +RT_C_DECLS_END + +#endif diff --git a/src/VBox/Devices/Network/slirp/sbuf.c b/src/VBox/Devices/Network/slirp/sbuf.c new file mode 100644 index 00000000..9b29a32c --- /dev/null +++ b/src/VBox/Devices/Network/slirp/sbuf.c @@ -0,0 +1,292 @@ +/* $Id: sbuf.c $ */ +/** @file + * NAT - sbuf implemenation. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> + +/* Done as a macro in socket.h */ +/* int + * sbspace(struct sockbuff *sb) + * { + * return SB_DATALEN - sb->sb_cc; + * } + */ + +void +sbfree(struct sbuf *sb) +{ + /* + * Catch double frees. Actually tcp_close() already filters out listening sockets + * passing NULL. + */ + Assert((sb->sb_data)); + + /* + * Don't call RTMemFree() for an already freed buffer, the EFence could complain + */ + if (sb->sb_data) + { + RTMemFree(sb->sb_data); + sb->sb_data = NULL; + } +} + +void +sbdrop(struct sbuf *sb, int num) +{ + /* + * We can only drop how much we have + * This should never succeed + */ + if (num > sb->sb_cc) + num = sb->sb_cc; + sb->sb_cc -= num; + sb->sb_rptr += num; + if (sb->sb_rptr >= sb->sb_data + sb->sb_datalen) + sb->sb_rptr -= sb->sb_datalen; + +} + +void +sbreserve(PNATState pData, struct sbuf *sb, int size) +{ + NOREF(pData); + if (sb->sb_data) + { + /* Already alloced, realloc if necessary */ + if (sb->sb_datalen != (u_int)size) + { + sb->sb_wptr = + sb->sb_rptr = + sb->sb_data = (char *)RTMemReallocZ(sb->sb_data, sb->sb_datalen, size); + sb->sb_cc = 0; + if (sb->sb_wptr) + sb->sb_datalen = size; + else + sb->sb_datalen = 0; + } + } + else + { + sb->sb_wptr = sb->sb_rptr = sb->sb_data = (char *)RTMemAllocZ(size); + sb->sb_cc = 0; + if (sb->sb_wptr) + sb->sb_datalen = size; + else + sb->sb_datalen = 0; + } +} + +/* + * Try and write() to the socket, whatever doesn't get written + * append to the buffer... for a host with a fast net connection, + * this prevents an unnecessary copy of the data + * (the socket is non-blocking, so we won't hang) + */ +void +sbappend(PNATState pData, struct socket *so, struct mbuf *m) +{ + int ret = 0; + int mlen = 0; + + STAM_PROFILE_START(&pData->StatIOSBAppend_pf, a); + LogFlow(("sbappend: so = %p, m = %p, m->m_len = %d\n", so, m, m ? m->m_len : 0)); + + STAM_COUNTER_INC(&pData->StatIOSBAppend); + /* Shouldn't happen, but... e.g. foreign host closes connection */ + mlen = m_length(m, NULL); + if (mlen <= 0) + { + STAM_COUNTER_INC(&pData->StatIOSBAppend_zm); + goto done; + } + + /* + * If there is urgent data, call sosendoob + * if not all was sent, sowrite will take care of the rest + * (The rest of this function is just an optimisation) + */ + if (so->so_urgc) + { + sbappendsb(pData, &so->so_rcv, m); + m_freem(pData, m); + sosendoob(so); + return; + } + + /* + * We only write if there's nothing in the buffer, + * otherwise it'll arrive out of order, and hence corrupt + */ + if (so->so_rcv.sb_cc == 0) + { + caddr_t buf = NULL; + + if (m->m_next) + { + buf = RTMemAllocZ(mlen); + if (buf == NULL) + { + ret = 0; + goto no_sent; + } + m_copydata(m, 0, mlen, buf); + } + else + buf = mtod(m, char *); + + ret = send(so->s, buf, mlen, 0); + + if (m->m_next) + RTMemFree(buf); + } +no_sent: + + if (ret <= 0) + { + STAM_COUNTER_INC(&pData->StatIOSBAppend_wf); + /* + * Nothing was written + * It's possible that the socket has closed, but + * we don't need to check because if it has closed, + * it will be detected in the normal way by soread() + */ + sbappendsb(pData, &so->so_rcv, m); + STAM_PROFILE_STOP(&pData->StatIOSBAppend_pf_wf, a); + goto done; + } + else if (ret != mlen) + { + STAM_COUNTER_INC(&pData->StatIOSBAppend_wp); + /* + * Something was written, but not everything.. + * sbappendsb the rest + */ + m_adj(m, ret); + sbappendsb(pData, &so->so_rcv, m); + STAM_PROFILE_STOP(&pData->StatIOSBAppend_pf_wp, a); + goto done; + } /* else */ + /* Whatever happened, we free the mbuf */ + STAM_COUNTER_INC(&pData->StatIOSBAppend_wa); + STAM_PROFILE_STOP(&pData->StatIOSBAppend_pf_wa, a); +done: + m_freem(pData, m); +} + +/* + * Copy the data from m into sb + * The caller is responsible to make sure there's enough room + */ +void +sbappendsb(PNATState pData, struct sbuf *sb, struct mbuf *m) +{ + int len, n, nn; +#ifndef VBOX_WITH_STATISTICS + NOREF(pData); +#endif + + len = m_length(m, NULL); + + STAM_COUNTER_INC(&pData->StatIOSBAppendSB); + if (sb->sb_wptr < sb->sb_rptr) + { + STAM_COUNTER_INC(&pData->StatIOSBAppendSB_w_l_r); + n = sb->sb_rptr - sb->sb_wptr; + if (n > len) + n = len; + m_copydata(m, 0, n, sb->sb_wptr); + } + else + { + STAM_COUNTER_INC(&pData->StatIOSBAppendSB_w_ge_r); + /* Do the right edge first */ + n = sb->sb_data + sb->sb_datalen - sb->sb_wptr; + if (n > len) + n = len; + m_copydata(m, 0, n, sb->sb_wptr); + len -= n; + if (len) + { + /* Now the left edge */ + nn = sb->sb_rptr - sb->sb_data; + if (nn > len) + nn = len; + m_copydata(m, n, nn, sb->sb_data); + n += nn; + } + } + + sb->sb_cc += n; + sb->sb_wptr += n; + if (sb->sb_wptr >= sb->sb_data + sb->sb_datalen) + { + STAM_COUNTER_INC(&pData->StatIOSBAppendSB_w_alter); + sb->sb_wptr -= sb->sb_datalen; + } +} + +/* + * Copy data from sbuf to a normal, straight buffer + * Don't update the sbuf rptr, this will be + * done in sbdrop when the data is acked + */ +void +sbcopy(struct sbuf *sb, int off, int len, char *to) +{ + char *from; + + from = sb->sb_rptr + off; + if (from >= sb->sb_data + sb->sb_datalen) + from -= sb->sb_datalen; + + if (from < sb->sb_wptr) + { + if (len > sb->sb_cc) + len = sb->sb_cc; + memcpy(to, from, len); + } + else + { + /* re-use off */ + off = (sb->sb_data + sb->sb_datalen) - from; + if (off > len) + off = len; + memcpy(to, from, off); + len -= off; + if (len) + memcpy(to+off, sb->sb_data, len); + } +} diff --git a/src/VBox/Devices/Network/slirp/sbuf.h b/src/VBox/Devices/Network/slirp/sbuf.h new file mode 100644 index 00000000..35983c0d --- /dev/null +++ b/src/VBox/Devices/Network/slirp/sbuf.h @@ -0,0 +1,63 @@ +/* $Id: sbuf.h $ */ +/** @file + * NAT - sbuf declarations/defines. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef _SBUF_H_ +#define _SBUF_H_ + +# define sbflush(sb) sbdrop((sb),(sb)->sb_cc) +# define sbspace(sb) ((sb)->sb_datalen - (sb)->sb_cc) +# define SBUF_LEN(sb) ((sb)->sb_cc) +# define SBUF_SIZE(sb) ((sb)->sb_datalen) + + +struct sbuf +{ + u_int sb_cc; /* actual chars in buffer */ + u_int sb_datalen; /* Length of data */ + char *sb_wptr; /* write pointer. points to where the next + * bytes should be written in the sbuf */ + char *sb_rptr; /* read pointer. points to where the next + * byte should be read from the sbuf */ + char *sb_data; /* Actual data */ +}; + +void sbfree (struct sbuf *); +void sbdrop (struct sbuf *, int); +void sbreserve (PNATState, struct sbuf *, int); +void sbappend (PNATState, struct socket *, struct mbuf *); +void sbappendsb (PNATState, struct sbuf *, struct mbuf *); +void sbcopy (struct sbuf *, int, int, char *); +#endif diff --git a/src/VBox/Devices/Network/slirp/slirp.c b/src/VBox/Devices/Network/slirp/slirp.c new file mode 100644 index 00000000..37c53292 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/slirp.c @@ -0,0 +1,2086 @@ +/* $Id: slirp.c $ */ +/** @file + * NAT - slirp glue. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * libslirp glue + * + * Copyright (c) 2004-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "slirp.h" +#ifdef RT_OS_OS2 +# include <paths.h> +#endif + +#include <iprt/errcore.h> +#include <VBox/vmm/dbgf.h> +#include <VBox/vmm/pdmdrv.h> +#include <iprt/assert.h> +#include <iprt/file.h> +#include <iprt/path.h> +#ifndef RT_OS_WINDOWS +# include <sys/ioctl.h> +# include <poll.h> +# include <netinet/in.h> +#else +# include <Winnls.h> +# define _WINSOCK2API_ +# include <iprt/win/iphlpapi.h> +#endif +#include <alias.h> + +#ifndef RT_OS_WINDOWS +/** + * XXX: It shouldn't be non-Windows specific. + * resolv_conf_parser.h client's structure isn't OS specific, it's just need to be generalized a + * a bit to replace slirp_state.h DNS server (domain) lists with rcp_state like structure. + */ +# include "resolv_conf_parser.h" +#endif + +#ifndef RT_OS_WINDOWS +# define DO_ENGAGE_EVENT1(so, fdset, label) \ + do { \ + if ( so->so_poll_index != -1 \ + && so->s == polls[so->so_poll_index].fd) \ + { \ + polls[so->so_poll_index].events |= N_(fdset ## _poll); \ + break; \ + } \ + AssertRelease(poll_index < (nfds)); \ + AssertRelease(poll_index >= 0 && poll_index < (nfds)); \ + polls[poll_index].fd = (so)->s; \ + (so)->so_poll_index = poll_index; \ + polls[poll_index].events = N_(fdset ## _poll); \ + polls[poll_index].revents = 0; \ + poll_index++; \ + } while (0) + +# define DO_ENGAGE_EVENT2(so, fdset1, fdset2, label) \ + do { \ + if ( so->so_poll_index != -1 \ + && so->s == polls[so->so_poll_index].fd) \ + { \ + polls[so->so_poll_index].events |= \ + N_(fdset1 ## _poll) | N_(fdset2 ## _poll); \ + break; \ + } \ + AssertRelease(poll_index < (nfds)); \ + polls[poll_index].fd = (so)->s; \ + (so)->so_poll_index = poll_index; \ + polls[poll_index].events = \ + N_(fdset1 ## _poll) | N_(fdset2 ## _poll); \ + poll_index++; \ + } while (0) + +# define DO_POLL_EVENTS(rc, error, so, events, label) do {} while (0) + +/* + * DO_CHECK_FD_SET is used in dumping events on socket, including POLLNVAL. + * gcc warns about attempts to log POLLNVAL so construction in a last to lines + * used to catch POLLNVAL while logging and return false in case of error while + * normal usage. + */ +# define DO_CHECK_FD_SET(so, events, fdset) \ + ( ((so)->so_poll_index != -1) \ + && ((so)->so_poll_index <= ndfs) \ + && ((so)->s == polls[so->so_poll_index].fd) \ + && (polls[(so)->so_poll_index].revents & N_(fdset ## _poll)) \ + && ( N_(fdset ## _poll) == POLLNVAL \ + || !(polls[(so)->so_poll_index].revents & POLLNVAL))) + + /* specific for Windows Winsock API */ +# define DO_WIN_CHECK_FD_SET(so, events, fdset) 0 + +# ifndef RT_OS_LINUX +# define readfds_poll (POLLRDNORM) +# define writefds_poll (POLLWRNORM) +# else +# define readfds_poll (POLLIN) +# define writefds_poll (POLLOUT) +# endif +# define xfds_poll (POLLPRI) +# define closefds_poll (POLLHUP) +# define rderr_poll (POLLERR) +# if 0 /* unused yet */ +# define rdhup_poll (POLLHUP) +# define nval_poll (POLLNVAL) +# endif + +# define ICMP_ENGAGE_EVENT(so, fdset) \ + do { \ + if (pData->icmp_socket.s != -1) \ + DO_ENGAGE_EVENT1((so), fdset, ICMP); \ + } while (0) + +#else /* RT_OS_WINDOWS */ + +/* + * On Windows, we will be notified by IcmpSendEcho2() when the response arrives. + * So no call to WSAEventSelect necessary. + */ +# define ICMP_ENGAGE_EVENT(so, fdset) do {} while (0) + +/* + * On Windows we use FD_ALL_EVENTS to ensure that we don't miss any event. + */ +# define DO_ENGAGE_EVENT1(so, fdset1, label) \ + do { \ + rc = WSAEventSelect((so)->s, VBOX_SOCKET_EVENT, FD_ALL_EVENTS); \ + if (rc == SOCKET_ERROR) \ + { \ + /* This should not happen */ \ + error = WSAGetLastError(); \ + LogRel(("WSAEventSelect (" #label ") error %d (so=%x, socket=%s, event=%x)\n", \ + error, (so), (so)->s, VBOX_SOCKET_EVENT)); \ + } \ + } while (0); \ + CONTINUE(label) + +# define DO_ENGAGE_EVENT2(so, fdset1, fdset2, label) \ + DO_ENGAGE_EVENT1((so), (fdset1), label) + +# define DO_POLL_EVENTS(rc, error, so, events, label) \ + (rc) = WSAEnumNetworkEvents((so)->s, VBOX_SOCKET_EVENT, (events)); \ + if ((rc) == SOCKET_ERROR) \ + { \ + (error) = WSAGetLastError(); \ + LogRel(("WSAEnumNetworkEvents %R[natsock] " #label " error %d\n", (so), (error))); \ + LogFunc(("WSAEnumNetworkEvents %R[natsock] " #label " error %d\n", (so), (error))); \ + CONTINUE(label); \ + } + +# define acceptds_win FD_ACCEPT +# define acceptds_win_bit FD_ACCEPT_BIT +# define readfds_win FD_READ +# define readfds_win_bit FD_READ_BIT +# define writefds_win FD_WRITE +# define writefds_win_bit FD_WRITE_BIT +# define xfds_win FD_OOB +# define xfds_win_bit FD_OOB_BIT +# define closefds_win FD_CLOSE +# define closefds_win_bit FD_CLOSE_BIT +# define connectfds_win FD_CONNECT +# define connectfds_win_bit FD_CONNECT_BIT + +# define closefds_win FD_CLOSE +# define closefds_win_bit FD_CLOSE_BIT + +# define DO_CHECK_FD_SET(so, events, fdset) \ + ((events).lNetworkEvents & fdset ## _win) + +# define DO_WIN_CHECK_FD_SET(so, events, fdset) DO_CHECK_FD_SET((so), (events), fdset) +# define DO_UNIX_CHECK_FD_SET(so, events, fdset) 1 /*specific for Unix API */ + +#endif /* RT_OS_WINDOWS */ + +#define TCP_ENGAGE_EVENT1(so, fdset) \ + DO_ENGAGE_EVENT1((so), fdset, tcp) + +#define TCP_ENGAGE_EVENT2(so, fdset1, fdset2) \ + DO_ENGAGE_EVENT2((so), fdset1, fdset2, tcp) + +#ifdef RT_OS_WINDOWS +# define WIN_TCP_ENGAGE_EVENT2(so, fdset, fdset2) TCP_ENGAGE_EVENT2(so, fdset1, fdset2) +#endif + +#define UDP_ENGAGE_EVENT(so, fdset) \ + DO_ENGAGE_EVENT1((so), fdset, udp) + +#define POLL_TCP_EVENTS(rc, error, so, events) \ + DO_POLL_EVENTS((rc), (error), (so), (events), tcp) + +#define POLL_UDP_EVENTS(rc, error, so, events) \ + DO_POLL_EVENTS((rc), (error), (so), (events), udp) + +#define CHECK_FD_SET(so, events, set) \ + (DO_CHECK_FD_SET((so), (events), set)) + +#define WIN_CHECK_FD_SET(so, events, set) \ + (DO_WIN_CHECK_FD_SET((so), (events), set)) + +/* + * Loging macros + */ +#ifdef VBOX_WITH_DEBUG_NAT_SOCKETS +# if defined(RT_OS_WINDOWS) +# define DO_LOG_NAT_SOCK(so, proto, winevent, r_fdset, w_fdset, x_fdset) \ + do { \ + LogRel((" " #proto " %R[natsock] %R[natwinnetevents]\n", (so), (winevent))); \ + } while (0) +# else /* !RT_OS_WINDOWS */ +# define DO_LOG_NAT_SOCK(so, proto, winevent, r_fdset, w_fdset, x_fdset) \ + do { \ + LogRel((" " #proto " %R[natsock] %s %s %s er: %s, %s, %s\n", (so), \ + CHECK_FD_SET(so, ign ,r_fdset) ? "READ":"", \ + CHECK_FD_SET(so, ign, w_fdset) ? "WRITE":"", \ + CHECK_FD_SET(so, ign, x_fdset) ? "OOB":"", \ + CHECK_FD_SET(so, ign, rderr) ? "RDERR":"", \ + CHECK_FD_SET(so, ign, rdhup) ? "RDHUP":"", \ + CHECK_FD_SET(so, ign, nval) ? "RDNVAL":"")); \ + } while (0) +# endif /* !RT_OS_WINDOWS */ +#else /* !VBOX_WITH_DEBUG_NAT_SOCKETS */ +# define DO_LOG_NAT_SOCK(so, proto, winevent, r_fdset, w_fdset, x_fdset) do {} while (0) +#endif /* !VBOX_WITH_DEBUG_NAT_SOCKETS */ + +#define LOG_NAT_SOCK(so, proto, winevent, r_fdset, w_fdset, x_fdset) \ + DO_LOG_NAT_SOCK((so), proto, (winevent), r_fdset, w_fdset, x_fdset) + +static const uint8_t special_ethaddr[6] = +{ + 0x52, 0x54, 0x00, 0x12, 0x35, 0x00 +}; + +static const uint8_t broadcast_ethaddr[6] = +{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + +const uint8_t zerro_ethaddr[6] = +{ + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 +}; + +/** + * This helper routine do the checks in descriptions to + * ''fUnderPolling'' and ''fShouldBeRemoved'' flags + * @returns 1 if socket removed and 0 if no changes was made. + */ +static int slirpVerifyAndFreeSocket(PNATState pData, struct socket *pSocket) +{ + AssertPtrReturn(pData, 0); + AssertPtrReturn(pSocket, 0); + AssertReturn(pSocket->fUnderPolling, 0); + if (pSocket->fShouldBeRemoved) + { + pSocket->fUnderPolling = 0; + sofree(pData, pSocket); + /* pSocket is PHANTOM, now */ + return 1; + } + return 0; +} + +int slirp_init(PNATState *ppData, uint32_t u32NetAddr, uint32_t u32Netmask, + bool fPassDomain, bool fUseHostResolver, int i32AliasMode, + int iIcmpCacheLimit, bool fLocalhostReachable, void *pvUser) +{ + int rc; + PNATState pData; + if (u32Netmask & 0x1f) + { + /* CTL is x.x.x.15, bootp passes up to 16 IPs (15..31) */ + LogRel(("NAT: The last 5 bits of the netmask (%RTnaipv4) need to be unset\n", RT_BE2H_U32(u32Netmask))); + return VERR_INVALID_PARAMETER; + } + pData = RTMemAllocZ(RT_ALIGN_Z(sizeof(NATState), sizeof(uint64_t))); + *ppData = pData; + if (!pData) + return VERR_NO_MEMORY; + pData->fPassDomain = !fUseHostResolver ? fPassDomain : false; + pData->fUseHostResolver = fUseHostResolver; + pData->fUseHostResolverPermanent = fUseHostResolver; + pData->fLocalhostReachable = fLocalhostReachable; + pData->pvUser = pvUser; + pData->netmask = u32Netmask; + + rc = RTCritSectRwInit(&pData->CsRwHandlerChain); + if (RT_FAILURE(rc)) + return rc; + + /* sockets & TCP defaults */ + pData->socket_rcv = 64 * _1K; + pData->socket_snd = 64 * _1K; + tcp_sndspace = 64 * _1K; + tcp_rcvspace = 64 * _1K; + + /* + * Use the same default here as in DevNAT.cpp (SoMaxConnection CFGM value) + * to avoid release log noise. + */ + pData->soMaxConn = 10; + +#ifdef RT_OS_WINDOWS + { + WSADATA Data; + RTLDRMOD hLdrMod; + + WSAStartup(MAKEWORD(2, 0), &Data); + + rc = RTLdrLoadSystem("Iphlpapi.dll", true /*fNoUnload*/, &hLdrMod); + if (RT_SUCCESS(rc)) + { + rc = RTLdrGetSymbol(hLdrMod, "GetAdaptersAddresses", (void **)&pData->pfnGetAdaptersAddresses); + if (RT_FAILURE(rc)) + LogRel(("NAT: Can't find GetAdapterAddresses in Iphlpapi.dll\n")); + + RTLdrClose(hLdrMod); + } + } + pData->phEvents[VBOX_SOCKET_EVENT_INDEX] = CreateEvent(NULL, FALSE, FALSE, NULL); +#endif + + rc = bootp_dhcp_init(pData); + if (RT_FAILURE(rc)) + { + Log(("NAT: DHCP server initialization failed\n")); + RTMemFree(pData); + *ppData = NULL; + return rc; + } + debug_init(pData); + if_init(pData); + ip_init(pData); + icmp_init(pData, iIcmpCacheLimit); + + /* Initialise mbufs *after* setting the MTU */ + mbuf_init(pData); + + pData->special_addr.s_addr = u32NetAddr; + pData->slirp_ethaddr = &special_ethaddr[0]; + alias_addr.s_addr = pData->special_addr.s_addr | RT_H2N_U32_C(CTL_ALIAS); + /** @todo add ability to configure this staff */ + + /* + * Some guests won't reacquire DHCP lease on link flap when VM is + * restored. Instead of forcing users to explicitly set CTL_GUEST + * in port-forwarding rules, provide it as initial guess here. + */ + slirp_update_guest_addr_guess(pData, + pData->special_addr.s_addr | RT_H2N_U32_C(CTL_GUEST), + "initialization"); + + /* set default addresses */ + inet_aton("127.0.0.1", &loopback_addr); + + rc = slirpTftpInit(pData); + AssertRCReturn(rc, rc); + + if (i32AliasMode & ~(PKT_ALIAS_LOG|PKT_ALIAS_SAME_PORTS|PKT_ALIAS_PROXY_ONLY)) + { + LogRel(("NAT: bad alias mode 0x%x ignored\n", i32AliasMode)); + i32AliasMode = 0; + } + else if (i32AliasMode != 0) + { + LogRel(("NAT: alias mode 0x%x\n", i32AliasMode)); + } + + pData->i32AliasMode = i32AliasMode; + getouraddr(pData); + { + int flags = 0; + struct in_addr proxy_addr; + pData->proxy_alias = LibAliasInit(pData, NULL); + if (pData->proxy_alias == NULL) + { + Log(("NAT: LibAlias default rule wasn't initialized\n")); + AssertMsgFailed(("NAT: LibAlias default rule wasn't initialized\n")); + } + flags = LibAliasSetMode(pData->proxy_alias, 0, 0); +#ifndef NO_FW_PUNCH + flags |= PKT_ALIAS_PUNCH_FW; +#endif + flags |= pData->i32AliasMode; /* do transparent proxying */ + flags = LibAliasSetMode(pData->proxy_alias, flags, ~0U); + proxy_addr.s_addr = RT_H2N_U32(RT_N2H_U32(pData->special_addr.s_addr) | CTL_ALIAS); + LibAliasSetAddress(pData->proxy_alias, proxy_addr); + ftp_alias_load(pData); + nbt_alias_load(pData); + } +#ifdef VBOX_WITH_NAT_SEND2HOME + /** @todo we should know all interfaces available on host. */ + pData->pInSockAddrHomeAddress = RTMemAllocZ(sizeof(struct sockaddr)); + pData->cInHomeAddressSize = 1; + inet_aton("192.168.1.25", &pData->pInSockAddrHomeAddress[0].sin_addr); + pData->pInSockAddrHomeAddress[0].sin_family = AF_INET; +# ifdef RT_OS_DARWIN + pData->pInSockAddrHomeAddress[0].sin_len = sizeof(struct sockaddr_in); +# endif +#endif + +#ifdef VBOX_WITH_DNSMAPPING_IN_HOSTRESOLVER + STAILQ_INIT(&pData->DNSMapNames); + STAILQ_INIT(&pData->DNSMapPatterns); +#endif + + slirp_link_up(pData); + return VINF_SUCCESS; +} + +/** + * Register statistics. + */ +void slirp_register_statistics(PNATState pData, PPDMDRVINS pDrvIns) +{ +#ifdef VBOX_WITH_STATISTICS +# define PROFILE_COUNTER(name, dsc) REGISTER_COUNTER(name, pData, STAMTYPE_PROFILE, STAMUNIT_TICKS_PER_CALL, dsc) +# define COUNTING_COUNTER(name, dsc) REGISTER_COUNTER(name, pData, STAMTYPE_COUNTER, STAMUNIT_COUNT, dsc) +# include "counters.h" +# undef COUNTER +/** @todo register statistics for the variables dumped by: + * ipstats(pData); tcpstats(pData); udpstats(pData); icmpstats(pData); + * mbufstats(pData); sockstats(pData); */ +#else /* VBOX_WITH_STATISTICS */ + NOREF(pData); + NOREF(pDrvIns); +#endif /* !VBOX_WITH_STATISTICS */ +} + +/** + * Deregister statistics. + */ +void slirp_deregister_statistics(PNATState pData, PPDMDRVINS pDrvIns) +{ + if (pData == NULL) + return; +#ifdef VBOX_WITH_STATISTICS +# define PROFILE_COUNTER(name, dsc) DEREGISTER_COUNTER(name, pData) +# define COUNTING_COUNTER(name, dsc) DEREGISTER_COUNTER(name, pData) +# include "counters.h" +#else /* VBOX_WITH_STATISTICS */ + NOREF(pData); + NOREF(pDrvIns); +#endif /* !VBOX_WITH_STATISTICS */ +} + +/** + * Marks the link as up, making it possible to establish new connections. + */ +void slirp_link_up(PNATState pData) +{ + if (link_up == 1) + return; + + link_up = 1; + + if (!pData->fUseHostResolverPermanent) + slirpInitializeDnsSettings(pData); +} + +/** + * Marks the link as down and cleans up the current connections. + */ +void slirp_link_down(PNATState pData) +{ + if (link_up == 0) + return; + + slirpReleaseDnsSettings(pData); + + link_up = 0; +} + +/** + * Terminates the slirp component. + */ +void slirp_term(PNATState pData) +{ + struct socket *so; + + if (pData == NULL) + return; + + icmp_finit(pData); + + while ((so = tcb.so_next) != &tcb) + { + /* Don't miss TCB releasing */ + if ( !sototcpcb(so) + && ( so->so_state & SS_NOFDREF + || so->s == -1)) + sofree(pData, so); + else + tcp_close(pData, sototcpcb(so)); + } + + while ((so = udb.so_next) != &udb) + udp_detach(pData, so); + + slirp_link_down(pData); + ftp_alias_unload(pData); + nbt_alias_unload(pData); + +#ifdef VBOX_WITH_DNSMAPPING_IN_HOSTRESOLVER + { + DNSMAPPINGHEAD *heads[2]; + int i; + + heads[0] = &pData->DNSMapNames; + heads[1] = &pData->DNSMapPatterns; + for (i = 0; i < RT_ELEMENTS(heads); ++i) + { + while (!STAILQ_EMPTY(heads[i])) + { + PDNSMAPPINGENTRY pDnsEntry = STAILQ_FIRST(heads[i]); + STAILQ_REMOVE_HEAD(heads[i], MapList); + RTStrFree(pDnsEntry->pszName); + RTMemFree(pDnsEntry); + } + } + } +#endif + + while (!LIST_EMPTY(&instancehead)) + { + struct libalias *la = LIST_FIRST(&instancehead); + /* libalias do all clean up */ + LibAliasUninit(la); + } + while (!LIST_EMPTY(&pData->arp_cache)) + { + struct arp_cache_entry *ac = LIST_FIRST(&pData->arp_cache); + LIST_REMOVE(ac, list); + RTMemFree(ac); + } + while (!LIST_EMPTY(&pData->port_forward_rule_head)) + { + struct port_forward_rule *rule = LIST_FIRST(&pData->port_forward_rule_head); + LIST_REMOVE(rule, list); + RTMemFree(rule); + } + slirpTftpTerm(pData); + bootp_dhcp_fini(pData); + m_fini(pData); +#ifdef RT_OS_WINDOWS + WSACleanup(); +#endif + if (tftp_prefix) + RTStrFree((char *)tftp_prefix); +#ifdef LOG_ENABLED + Log(("\n" + "NAT statistics\n" + "--------------\n" + "\n")); + ipstats(pData); + tcpstats(pData); + udpstats(pData); + icmpstats(pData); + mbufstats(pData); + sockstats(pData); + Log(("\n" + "\n" + "\n")); +#endif + RTCritSectRwDelete(&pData->CsRwHandlerChain); + RTMemFree(pData); +} + + +#define CONN_CANFSEND(so) (((so)->so_state & (SS_FCANTSENDMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED) +#define CONN_CANFRCV(so) (((so)->so_state & (SS_FCANTRCVMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED) + +/* + * curtime kept to an accuracy of 1ms + */ +static void updtime(PNATState pData) +{ +#ifdef RT_OS_WINDOWS + struct _timeb tb; + + _ftime(&tb); + curtime = (u_int)tb.time * (u_int)1000; + curtime += (u_int)tb.millitm; +#else + gettimeofday(&tt, 0); + + curtime = (u_int)tt.tv_sec * (u_int)1000; + curtime += (u_int)tt.tv_usec / (u_int)1000; + + if ((tt.tv_usec % 1000) >= 500) + curtime++; +#endif +} + +#ifdef RT_OS_WINDOWS +void slirp_select_fill(PNATState pData, int *pnfds) +#else /* RT_OS_WINDOWS */ +void slirp_select_fill(PNATState pData, int *pnfds, struct pollfd *polls) +#endif /* !RT_OS_WINDOWS */ +{ + struct socket *so, *so_next; + int nfds; +#if defined(RT_OS_WINDOWS) + int rc; + int error; +#else + int poll_index = 0; +#endif + int i; + + STAM_PROFILE_START(&pData->StatFill, a); + + nfds = *pnfds; + + /* + * First, TCP sockets + */ + do_slowtimo = 0; + if (!link_up) + goto done; + + /* + * *_slowtimo needs calling if there are IP fragments + * in the fragment queue, or there are TCP connections active + */ + /* XXX: + * triggering of fragment expiration should be the same but use new macroses + */ + do_slowtimo = (tcb.so_next != &tcb); + if (!do_slowtimo) + { + for (i = 0; i < IPREASS_NHASH; i++) + { + if (!TAILQ_EMPTY(&ipq[i])) + { + do_slowtimo = 1; + break; + } + } + } + /* always add the ICMP socket */ +#ifndef RT_OS_WINDOWS + pData->icmp_socket.so_poll_index = -1; +#endif + ICMP_ENGAGE_EVENT(&pData->icmp_socket, readfds); + + STAM_COUNTER_RESET(&pData->StatTCP); + STAM_COUNTER_RESET(&pData->StatTCPHot); + + QSOCKET_FOREACH(so, so_next, tcp) + /* { */ + Assert(so->so_type == IPPROTO_TCP); +#if !defined(RT_OS_WINDOWS) + so->so_poll_index = -1; +#endif + STAM_COUNTER_INC(&pData->StatTCP); + + /* + * See if we need a tcp_fasttimo + */ + if ( time_fasttimo == 0 + && so->so_tcpcb != NULL + && so->so_tcpcb->t_flags & TF_DELACK) + { + time_fasttimo = curtime; /* Flag when we want a fasttimo */ + } + + /* + * NOFDREF can include still connecting to local-host, + * newly socreated() sockets etc. Don't want to select these. + */ + if (so->so_state & SS_NOFDREF || so->s == -1) + CONTINUE(tcp); + + /* + * Set for reading sockets which are accepting + */ + if (so->so_state & SS_FACCEPTCONN) + { + STAM_COUNTER_INC(&pData->StatTCPHot); + TCP_ENGAGE_EVENT1(so, readfds); + CONTINUE(tcp); + } + + /* + * Set for writing sockets which are connecting + */ + if (so->so_state & SS_ISFCONNECTING) + { + Log2(("connecting %R[natsock] engaged\n",so)); + STAM_COUNTER_INC(&pData->StatTCPHot); +#ifdef RT_OS_WINDOWS + WIN_TCP_ENGAGE_EVENT2(so, writefds, connectfds); +#else + TCP_ENGAGE_EVENT1(so, writefds); +#endif + } + + /* + * Set for writing if we are connected, can send more, and + * we have something to send + */ + if (CONN_CANFSEND(so) && SBUF_LEN(&so->so_rcv)) + { + STAM_COUNTER_INC(&pData->StatTCPHot); + TCP_ENGAGE_EVENT1(so, writefds); + } + + /* + * Set for reading (and urgent data) if we are connected, can + * receive more, and we have room for it XXX /2 ? + */ + /** @todo vvl - check which predicat here will be more useful here in rerm of new sbufs. */ + if ( CONN_CANFRCV(so) + && (SBUF_LEN(&so->so_snd) < (SBUF_SIZE(&so->so_snd)/2)) +#ifdef RT_OS_WINDOWS + && !(so->so_state & SS_ISFCONNECTING) +#endif + ) + { + STAM_COUNTER_INC(&pData->StatTCPHot); + TCP_ENGAGE_EVENT2(so, readfds, xfds); + } + LOOP_LABEL(tcp, so, so_next); + } + + /* + * UDP sockets + */ + STAM_COUNTER_RESET(&pData->StatUDP); + STAM_COUNTER_RESET(&pData->StatUDPHot); + + QSOCKET_FOREACH(so, so_next, udp) + /* { */ + + Assert(so->so_type == IPPROTO_UDP); + STAM_COUNTER_INC(&pData->StatUDP); +#if !defined(RT_OS_WINDOWS) + so->so_poll_index = -1; +#endif + + /* + * See if it's timed out + */ + if (so->so_expire) + { + if (so->so_expire <= curtime) + { + Log2(("NAT: %R[natsock] expired\n", so)); + if (so->so_timeout != NULL) + { + /* so_timeout - might change the so_expire value or + * drop so_timeout* from so. + */ + so->so_timeout(pData, so, so->so_timeout_arg); + /* on 4.2 so-> + */ + if ( so_next->so_prev != so /* so_timeout freed the socket */ + || so->so_timeout) /* so_timeout just freed so_timeout */ + CONTINUE_NO_UNLOCK(udp); + } + UDP_DETACH(pData, so, so_next); + CONTINUE_NO_UNLOCK(udp); + } + } + + /* + * When UDP packets are received from over the link, they're + * sendto()'d straight away, so no need for setting for writing + * Limit the number of packets queued by this session to 4. + * Note that even though we try and limit this to 4 packets, + * the session could have more queued if the packets needed + * to be fragmented. + * + * (XXX <= 4 ?) + */ + if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) + { + STAM_COUNTER_INC(&pData->StatUDPHot); + UDP_ENGAGE_EVENT(so, readfds); + } + LOOP_LABEL(udp, so, so_next); + } +done: + +#if defined(RT_OS_WINDOWS) + *pnfds = VBOX_EVENT_COUNT; +#else /* RT_OS_WINDOWS */ + AssertRelease(poll_index <= *pnfds); + *pnfds = poll_index; +#endif /* !RT_OS_WINDOWS */ + + STAM_PROFILE_STOP(&pData->StatFill, a); +} + + +/** + * This function do Connection or sending tcp sequence to. + * @returns if true operation completed + * @note: functions call tcp_input that potentially could lead to tcp_drop + */ +static bool slirpConnectOrWrite(PNATState pData, struct socket *so, bool fConnectOnly) +{ + int ret; + LogFlowFunc(("ENTER: so:%R[natsock], fConnectOnly:%RTbool\n", so, fConnectOnly)); + /* + * Check for non-blocking, still-connecting sockets + */ + if (so->so_state & SS_ISFCONNECTING) + { + Log2(("connecting %R[natsock] catched\n", so)); + /* Connected */ + so->so_state &= ~SS_ISFCONNECTING; + + /* + * This should be probably guarded by PROBE_CONN too. Anyway, + * we disable it on OS/2 because the below send call returns + * EFAULT which causes the opened TCP socket to close right + * after it has been opened and connected. + */ +#ifndef RT_OS_OS2 + ret = send(so->s, (const char *)&ret, 0, 0); + if (ret < 0) + { + /* XXXXX Must fix, zero bytes is a NOP */ + if ( soIgnorableErrorCode(errno) + || errno == ENOTCONN) + { + LogFlowFunc(("LEAVE: false\n")); + return false; + } + + /* else failed */ + so->so_state = SS_NOFDREF; + } + /* else so->so_state &= ~SS_ISFCONNECTING; */ +#endif + + /* + * Continue tcp_input + */ + TCP_INPUT(pData, (struct mbuf *)NULL, sizeof(struct ip), so); + /* continue; */ + } + else if (!fConnectOnly) + { + SOWRITE(ret, pData, so); + if (RT_LIKELY(ret > 0)) + { + /* + * Make sure we will send window update to peer. This is + * a moral equivalent of calling tcp_output() for PRU_RCVD + * in tcp_usrreq() of the real stack. + */ + struct tcpcb *tp = sototcpcb(so); + if (RT_LIKELY(tp != NULL)) + tp->t_flags |= TF_DELACK; + } + } + + LogFlowFunc(("LEAVE: true\n")); + return true; +} + +#if defined(RT_OS_WINDOWS) +void slirp_select_poll(PNATState pData, int fTimeout) +#else /* RT_OS_WINDOWS */ +void slirp_select_poll(PNATState pData, struct pollfd *polls, int ndfs) +#endif /* !RT_OS_WINDOWS */ +{ + struct socket *so, *so_next; + int ret; +#if defined(RT_OS_WINDOWS) + WSANETWORKEVENTS NetworkEvents; + int rc; + int error; +#endif + + STAM_PROFILE_START(&pData->StatPoll, a); + + /* Update time */ + updtime(pData); + + /* + * See if anything has timed out + */ + if (link_up) + { + if (time_fasttimo && ((curtime - time_fasttimo) >= 2)) + { + STAM_PROFILE_START(&pData->StatFastTimer, b); + tcp_fasttimo(pData); + time_fasttimo = 0; + STAM_PROFILE_STOP(&pData->StatFastTimer, b); + } + if (do_slowtimo && ((curtime - last_slowtimo) >= 499)) + { + STAM_PROFILE_START(&pData->StatSlowTimer, c); + ip_slowtimo(pData); + tcp_slowtimo(pData); + last_slowtimo = curtime; + STAM_PROFILE_STOP(&pData->StatSlowTimer, c); + } + } +#if defined(RT_OS_WINDOWS) + if (fTimeout) + return; /* only timer update */ +#endif + + /* + * Check sockets + */ + if (!link_up) + goto done; +#if defined(RT_OS_WINDOWS) + icmpwin_process(pData); +#else + if ( (pData->icmp_socket.s != -1) + && CHECK_FD_SET(&pData->icmp_socket, ignored, readfds)) + sorecvfrom(pData, &pData->icmp_socket); +#endif + /* + * Check TCP sockets + */ + QSOCKET_FOREACH(so, so_next, tcp) + /* { */ + Assert(!so->fUnderPolling); + so->fUnderPolling = 1; + if (slirpVerifyAndFreeSocket(pData, so)) + CONTINUE(tcp); + /* + * FD_ISSET is meaningless on these sockets + * (and they can crash the program) + */ + if (so->so_state & SS_NOFDREF || so->s == -1) + { + so->fUnderPolling = 0; + CONTINUE(tcp); + } + + POLL_TCP_EVENTS(rc, error, so, &NetworkEvents); + + LOG_NAT_SOCK(so, TCP, &NetworkEvents, readfds, writefds, xfds); + + if (so->so_state & SS_ISFCONNECTING) + { + int sockerr = 0; +#if !defined(RT_OS_WINDOWS) + { + int revents = 0; + + /* + * Failed connect(2) is reported by poll(2) on + * different OSes with different combinations of + * POLLERR, POLLHUP, and POLLOUT. + */ + if ( CHECK_FD_SET(so, NetworkEvents, closefds) /* POLLHUP */ + || CHECK_FD_SET(so, NetworkEvents, rderr)) /* POLLERR */ + { + revents = POLLHUP; /* squash to single "failed" flag */ + } +#if defined(RT_OS_SOLARIS) || defined(RT_OS_NETBSD) + /* Solaris and NetBSD report plain POLLOUT even on error */ + else if (CHECK_FD_SET(so, NetworkEvents, writefds)) /* POLLOUT */ + { + revents = POLLOUT; + } +#endif + + if (revents != 0) + { + socklen_t optlen = (socklen_t)sizeof(sockerr); + ret = getsockopt(so->s, SOL_SOCKET, SO_ERROR, &sockerr, &optlen); + + if ( RT_UNLIKELY(ret < 0) + || ( (revents & POLLHUP) + && RT_UNLIKELY(sockerr == 0))) + sockerr = ETIMEDOUT; + } + } +#else /* RT_OS_WINDOWS */ + { + if (NetworkEvents.lNetworkEvents & FD_CONNECT) + sockerr = NetworkEvents.iErrorCode[FD_CONNECT_BIT]; + } +#endif + if (sockerr != 0) + { + tcp_fconnect_failed(pData, so, sockerr); + ret = slirpVerifyAndFreeSocket(pData, so); + Assert(ret == 1); /* freed */ + CONTINUE(tcp); + } + + /* + * XXX: For now just fall through to the old code to + * handle successful connect(2). + */ + } + + /* + * Check for URG data + * This will soread as well, so no need to + * test for readfds below if this succeeds + */ + + /* out-of-band data */ + if ( CHECK_FD_SET(so, NetworkEvents, xfds) +#ifdef RT_OS_DARWIN + /* Darwin and probably BSD hosts generates POLLPRI|POLLHUP event on receiving TCP.flags.{ACK|URG|FIN} this + * combination on other Unixs hosts doesn't enter to this branch + */ + && !CHECK_FD_SET(so, NetworkEvents, closefds) +#endif +#ifdef RT_OS_WINDOWS + /** + * In some cases FD_CLOSE comes with FD_OOB, that confuse tcp processing. + */ + && !WIN_CHECK_FD_SET(so, NetworkEvents, closefds) +#endif + ) + { + sorecvoob(pData, so); + if (slirpVerifyAndFreeSocket(pData, so)) + CONTINUE(tcp); + } + + /* + * Check sockets for reading + */ + else if ( CHECK_FD_SET(so, NetworkEvents, readfds) + || WIN_CHECK_FD_SET(so, NetworkEvents, acceptds)) + { + +#ifdef RT_OS_WINDOWS + if (WIN_CHECK_FD_SET(so, NetworkEvents, connectfds)) + { + /* Finish connection first */ + /* should we ignore return value? */ + bool fRet = slirpConnectOrWrite(pData, so, true); + LogFunc(("fRet:%RTbool\n", fRet)); NOREF(fRet); + if (slirpVerifyAndFreeSocket(pData, so)) + CONTINUE(tcp); + } +#endif + /* + * Check for incoming connections + */ + if (so->so_state & SS_FACCEPTCONN) + { + TCP_CONNECT(pData, so); + if (slirpVerifyAndFreeSocket(pData, so)) + CONTINUE(tcp); + if (!CHECK_FD_SET(so, NetworkEvents, closefds)) + { + so->fUnderPolling = 0; + CONTINUE(tcp); + } + } + + ret = soread(pData, so); + if (slirpVerifyAndFreeSocket(pData, so)) + CONTINUE(tcp); + /* Output it if we read something */ + if (RT_LIKELY(ret > 0)) + TCP_OUTPUT(pData, sototcpcb(so)); + + if (slirpVerifyAndFreeSocket(pData, so)) + CONTINUE(tcp); + } + + /* + * Check for FD_CLOSE events. + * in some cases once FD_CLOSE engaged on socket it could be flashed latter (for some reasons) + */ + if ( CHECK_FD_SET(so, NetworkEvents, closefds) + || (so->so_close == 1)) + { + /* + * drain the socket + */ + for (; so_next->so_prev == so + && !slirpVerifyAndFreeSocket(pData, so);) + { + ret = soread(pData, so); + if (slirpVerifyAndFreeSocket(pData, so)) + break; + + if (ret > 0) + TCP_OUTPUT(pData, sototcpcb(so)); + else if (so_next->so_prev == so) + { + Log2(("%R[natsock] errno %d (%s)\n", so, errno, strerror(errno))); + break; + } + } + + /* if socket freed ''so'' is PHANTOM and next socket isn't points on it */ + if (so_next->so_prev != so) + { + CONTINUE(tcp); + } + else + { + /* mark the socket for termination _after_ it was drained */ + so->so_close = 1; + /* No idea about Windows but on Posix, POLLHUP means that we can't send more. + * Actually in the specific error scenario, POLLERR is set as well. */ +#ifndef RT_OS_WINDOWS + if (CHECK_FD_SET(so, NetworkEvents, rderr)) + sofcantsendmore(so); +#endif + } + } + + /* + * Check sockets for writing + */ + if ( CHECK_FD_SET(so, NetworkEvents, writefds) +#ifdef RT_OS_WINDOWS + || WIN_CHECK_FD_SET(so, NetworkEvents, connectfds) +#endif + ) + { + int fConnectOrWriteSuccess = slirpConnectOrWrite(pData, so, false); + /* slirpConnectOrWrite could return true even if tcp_input called tcp_drop, + * so we should be ready to such situations. + */ + if (slirpVerifyAndFreeSocket(pData, so)) + CONTINUE(tcp); + else if (!fConnectOrWriteSuccess) + { + so->fUnderPolling = 0; + CONTINUE(tcp); + } + /* slirpConnectionOrWrite succeeded and socket wasn't dropped */ + } + + /* + * Probe a still-connecting, non-blocking socket + * to check if it's still alive + */ +#ifdef PROBE_CONN + if (so->so_state & SS_ISFCONNECTING) + { + ret = recv(so->s, (char *)&ret, 0, 0); + + if (ret < 0) + { + /* XXX */ + if ( soIgnorableErrorCode(errno) + || errno == ENOTCONN) + { + CONTINUE(tcp); /* Still connecting, continue */ + } + + /* else failed */ + so->so_state = SS_NOFDREF; + + /* tcp_input will take care of it */ + } + else + { + ret = send(so->s, &ret, 0, 0); + if (ret < 0) + { + /* XXX */ + if ( soIgnorableErrorCode(errno) + || errno == ENOTCONN) + { + CONTINUE(tcp); + } + /* else failed */ + so->so_state = SS_NOFDREF; + } + else + so->so_state &= ~SS_ISFCONNECTING; + + } + TCP_INPUT((struct mbuf *)NULL, sizeof(struct ip),so); + } /* SS_ISFCONNECTING */ +#endif + if (!slirpVerifyAndFreeSocket(pData, so)) + so->fUnderPolling = 0; + LOOP_LABEL(tcp, so, so_next); + } + + /* + * Now UDP sockets. + * Incoming packets are sent straight away, they're not buffered. + * Incoming UDP data isn't buffered either. + */ + QSOCKET_FOREACH(so, so_next, udp) + /* { */ +#if 0 + so->fUnderPolling = 1; + if(slirpVerifyAndFreeSocket(pData, so)); + CONTINUE(udp); + so->fUnderPolling = 0; +#endif + + POLL_UDP_EVENTS(rc, error, so, &NetworkEvents); + + LOG_NAT_SOCK(so, UDP, &NetworkEvents, readfds, writefds, xfds); + + if (so->s != -1 && CHECK_FD_SET(so, NetworkEvents, readfds)) + { + SORECVFROM(pData, so); + } + LOOP_LABEL(udp, so, so_next); + } + +done: + + STAM_PROFILE_STOP(&pData->StatPoll, a); +} + + +struct arphdr +{ + unsigned short ar_hrd; /* format of hardware address */ +#define ARPHRD_ETHER 1 /* ethernet hardware format */ + unsigned short ar_pro; /* format of protocol address */ + unsigned char ar_hln; /* length of hardware address */ + unsigned char ar_pln; /* length of protocol address */ + unsigned short ar_op; /* ARP opcode (command) */ +#define ARPOP_REQUEST 1 /* ARP request */ +#define ARPOP_REPLY 2 /* ARP reply */ + + /* + * Ethernet looks like this : This bit is variable sized however... + */ + unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ + unsigned char ar_sip[4]; /* sender IP address */ + unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ + unsigned char ar_tip[4]; /* target IP address */ +}; +AssertCompileSize(struct arphdr, 28); + +static void arp_output(PNATState pData, const uint8_t *pcu8EtherSource, const struct arphdr *pcARPHeaderSource, uint32_t ip4TargetAddress) +{ + struct ethhdr *pEtherHeaderResponse; + struct arphdr *pARPHeaderResponse; + uint32_t ip4TargetAddressInHostFormat; + struct mbuf *pMbufResponse; + + Assert((pcu8EtherSource)); + if (!pcu8EtherSource) + return; + ip4TargetAddressInHostFormat = RT_N2H_U32(ip4TargetAddress); + + pMbufResponse = m_getcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR); + if (!pMbufResponse) + return; + pEtherHeaderResponse = mtod(pMbufResponse, struct ethhdr *); + /* @note: if_encap will swap src and dst*/ + memcpy(pEtherHeaderResponse->h_source, pcu8EtherSource, ETH_ALEN); + pMbufResponse->m_data += ETH_HLEN; + pARPHeaderResponse = mtod(pMbufResponse, struct arphdr *); + pMbufResponse->m_len = sizeof(struct arphdr); + + pARPHeaderResponse->ar_hrd = RT_H2N_U16_C(1); + pARPHeaderResponse->ar_pro = RT_H2N_U16_C(ETH_P_IP); + pARPHeaderResponse->ar_hln = ETH_ALEN; + pARPHeaderResponse->ar_pln = 4; + pARPHeaderResponse->ar_op = RT_H2N_U16_C(ARPOP_REPLY); + memcpy(pARPHeaderResponse->ar_sha, special_ethaddr, ETH_ALEN); + + if (!slirpMbufTagService(pData, pMbufResponse, (uint8_t)(ip4TargetAddressInHostFormat & ~pData->netmask))) + { + static bool fTagErrorReported; + if (!fTagErrorReported) + { + LogRel(("NAT: Couldn't add the tag(PACKET_SERVICE:%d)\n", + (uint8_t)(ip4TargetAddressInHostFormat & ~pData->netmask))); + fTagErrorReported = true; + } + } + pARPHeaderResponse->ar_sha[5] = (uint8_t)(ip4TargetAddressInHostFormat & ~pData->netmask); + + memcpy(pARPHeaderResponse->ar_sip, pcARPHeaderSource->ar_tip, 4); + memcpy(pARPHeaderResponse->ar_tha, pcARPHeaderSource->ar_sha, ETH_ALEN); + memcpy(pARPHeaderResponse->ar_tip, pcARPHeaderSource->ar_sip, 4); + if_encap(pData, ETH_P_ARP, pMbufResponse, ETH_ENCAP_URG); +} + +/** + * @note This function will free m! + */ +static void arp_input(PNATState pData, struct mbuf *m) +{ + struct ethhdr *pEtherHeader; + struct arphdr *pARPHeader; + int ar_op; + uint32_t ip4TargetAddress; + + /* drivers never return runt packets, so this should never happen */ + if (RT_UNLIKELY((size_t)m->m_len + < sizeof(struct ethhdr) + sizeof(struct arphdr))) + goto done; + + pEtherHeader = mtod(m, struct ethhdr *); + pARPHeader = (struct arphdr *)&pEtherHeader[1]; + + if (RT_UNLIKELY( pARPHeader->ar_hrd != RT_H2N_U16_C(ARPHRD_ETHER) + || pARPHeader->ar_pro != RT_H2N_U16_C(ETH_P_IP) + || pARPHeader->ar_hln != ETH_ALEN + || pARPHeader->ar_pln != sizeof(RTNETADDRIPV4))) + goto done; + + ar_op = RT_N2H_U16(pARPHeader->ar_op); + ip4TargetAddress = *(uint32_t*)pARPHeader->ar_tip; + + switch (ar_op) + { + case ARPOP_REQUEST: + if ( CTL_CHECK(ip4TargetAddress, CTL_DNS) + || CTL_CHECK(ip4TargetAddress, CTL_ALIAS) + || CTL_CHECK(ip4TargetAddress, CTL_TFTP)) + { +#if 0 /* Dropping ARP requests destined for CTL_ALIAS breaks all outgoing traffic completely, so don't do that... */ + /* Don't reply to ARP requests for the hosts loopback interface if it is disabled. */ + if ( CTL_CHECK(ip4TargetAddress, CTL_ALIAS) + && !pData->fLocalhostReachable) + break; +#endif + slirp_update_guest_addr_guess(pData, *(uint32_t *)pARPHeader->ar_sip, "arp request"); + arp_output(pData, pEtherHeader->h_source, pARPHeader, ip4TargetAddress); + break; + } + + /* Gratuitous ARP */ + if ( *(uint32_t *)pARPHeader->ar_sip == *(uint32_t *)pARPHeader->ar_tip + && ( memcmp(pARPHeader->ar_tha, zerro_ethaddr, ETH_ALEN) == 0 + || memcmp(pARPHeader->ar_tha, broadcast_ethaddr, ETH_ALEN) == 0) + && memcmp(pEtherHeader->h_dest, broadcast_ethaddr, ETH_ALEN) == 0) + { + LogRel2(("NAT: Gratuitous ARP from %RTnaipv4 at %RTmac\n", + *(uint32_t *)pARPHeader->ar_sip, pARPHeader->ar_sha)); + slirp_update_guest_addr_guess(pData, *(uint32_t *)pARPHeader->ar_sip, "gratuitous arp"); + slirp_arp_cache_update_or_add(pData, *(uint32_t *)pARPHeader->ar_sip, &pARPHeader->ar_sha[0]); + } + break; + + case ARPOP_REPLY: + slirp_arp_cache_update_or_add(pData, *(uint32_t *)pARPHeader->ar_sip, &pARPHeader->ar_sha[0]); + break; + + default: + break; + } + + done: + m_freem(pData, m); +} + +/** + * Feed a packet into the slirp engine. + * + * @param m Data buffer, m_len is not valid. + * @param cbBuf The length of the data in m. + */ +void slirp_input(PNATState pData, struct mbuf *m, size_t cbBuf) +{ + int proto; + static bool fWarnedIpv6; + struct ethhdr *eh; + + m->m_len = (int)cbBuf; Assert((size_t)m->m_len == cbBuf); + if (cbBuf < ETH_HLEN) + { + Log(("NAT: packet having size %d has been ignored\n", m->m_len)); + m_freem(pData, m); + return; + } + + eh = mtod(m, struct ethhdr *); + proto = RT_N2H_U16(eh->h_proto); + switch(proto) + { + case ETH_P_ARP: + arp_input(pData, m); + break; + + case ETH_P_IP: + /* Update time. Important if the network is very quiet, as otherwise + * the first outgoing connection gets an incorrect timestamp. */ + updtime(pData); + m_adj(m, ETH_HLEN); + M_ASSERTPKTHDR(m); + m->m_pkthdr.header = mtod(m, void *); + ip_input(pData, m); + break; + + case ETH_P_IPV6: + m_freem(pData, m); + if (!fWarnedIpv6) + { + LogRel(("NAT: IPv6 not supported\n")); + fWarnedIpv6 = true; + } + break; + + default: + Log(("NAT: Unsupported protocol %x\n", proto)); + m_freem(pData, m); + break; + } +} + +/** + * Output the IP packet to the ethernet device. + * + * @note This function will free m! + */ +void if_encap(PNATState pData, uint16_t eth_proto, struct mbuf *m, int flags) +{ + struct ethhdr *eh; + uint8_t *mbuf = NULL; + int mlen; + STAM_PROFILE_START(&pData->StatIF_encap, a); + LogFlowFunc(("ENTER: pData:%p, eth_proto:%RX16, m:%p, flags:%d\n", + pData, eth_proto, m, flags)); + + M_ASSERTPKTHDR(m); + + Assert(M_LEADINGSPACE(m) >= ETH_HLEN); + m->m_data -= ETH_HLEN; + m->m_len += ETH_HLEN; + eh = mtod(m, struct ethhdr *); + mlen = m->m_len; + + if (memcmp(eh->h_source, special_ethaddr, ETH_ALEN) != 0) + { + struct m_tag *t = m_tag_first(m); + uint8_t u8ServiceId = CTL_ALIAS; + memcpy(eh->h_dest, eh->h_source, ETH_ALEN); + memcpy(eh->h_source, special_ethaddr, ETH_ALEN); + Assert(memcmp(eh->h_dest, special_ethaddr, ETH_ALEN) != 0); + if (memcmp(eh->h_dest, zerro_ethaddr, ETH_ALEN) == 0) + { + /* don't do anything */ + m_freem(pData, m); + goto done; + } + if ( t + && (t = m_tag_find(m, PACKET_SERVICE, NULL))) + { + Assert(t); + u8ServiceId = *(uint8_t *)&t[1]; + } + eh->h_source[5] = u8ServiceId; + } + /* + * we're processing the chain, that isn't not expected. + */ + Assert((!m->m_next)); + if (m->m_next) + { + Log(("NAT: if_encap's recived the chain, dropping...\n")); + m_freem(pData, m); + goto done; + } + mbuf = mtod(m, uint8_t *); + eh->h_proto = RT_H2N_U16(eth_proto); + LogFunc(("eh(dst:%RTmac, src:%RTmac)\n", eh->h_dest, eh->h_source)); + if (flags & ETH_ENCAP_URG) + slirp_urg_output(pData->pvUser, m, mbuf, mlen); + else + slirp_output(pData->pvUser, m, mbuf, mlen); +done: + STAM_PROFILE_STOP(&pData->StatIF_encap, a); + LogFlowFuncLeave(); +} + + +void +slirp_update_guest_addr_guess(PNATState pData, uint32_t guess, const char *msg) +{ + Assert(msg != NULL); + + if (pData->guest_addr_guess.s_addr == guess) + { + LogRel2(("NAT: Guest address guess %RTnaipv4 re-confirmed by %s\n", + pData->guest_addr_guess.s_addr, msg)); + return; + } + + if (pData->guest_addr_guess.s_addr == INADDR_ANY) + { + pData->guest_addr_guess.s_addr = guess; + LogRel(("NAT: Guest address guess set to %RTnaipv4 by %s\n", + pData->guest_addr_guess.s_addr, msg)); + return; + } + else + { + LogRel(("NAT: Guest address guess changed from %RTnaipv4 to %RTnaipv4 by %s\n", + pData->guest_addr_guess.s_addr, guess, msg)); + pData->guest_addr_guess.s_addr = guess; + return; + } +} + + +static struct port_forward_rule * +slirp_find_redirect(PNATState pData, + int is_udp, + struct in_addr host_addr, int host_port, + struct in_addr guest_addr, int guest_port) +{ + struct port_forward_rule *rule; + uint16_t proto = (is_udp ? IPPROTO_UDP : IPPROTO_TCP); + + LIST_FOREACH(rule, &pData->port_forward_rule_head, list) + { + if ( rule->proto == proto + && rule->host_port == host_port + && rule->bind_ip.s_addr == host_addr.s_addr + && rule->guest_port == guest_port + && rule->guest_addr.s_addr == guest_addr.s_addr) + { + return rule; + } + } + + return NULL; +} + + +int slirp_add_redirect(PNATState pData, int is_udp, struct in_addr host_addr, int host_port, + struct in_addr guest_addr, int guest_port) +{ + struct port_forward_rule *rule; + + rule = slirp_find_redirect(pData, is_udp, host_addr, host_port, guest_addr, guest_port); + if (rule != NULL) /* rule has been already registered */ + { + /* XXX: this shouldn't happen */ + return 0; + } + + rule = RTMemAllocZ(sizeof(struct port_forward_rule)); + if (rule == NULL) + return 1; + + rule->proto = (is_udp ? IPPROTO_UDP : IPPROTO_TCP); + rule->bind_ip.s_addr = host_addr.s_addr; + rule->host_port = host_port; + rule->guest_addr.s_addr = guest_addr.s_addr; + rule->guest_port = guest_port; + + if (rule->proto == IPPROTO_UDP) + rule->so = udp_listen(pData, rule->bind_ip.s_addr, RT_H2N_U16(rule->host_port), + rule->guest_addr.s_addr, RT_H2N_U16(rule->guest_port), 0); + else + rule->so = solisten(pData, rule->bind_ip.s_addr, RT_H2N_U16(rule->host_port), + rule->guest_addr.s_addr, RT_H2N_U16(rule->guest_port), 0); + + if (rule->so == NULL) + { + LogRel(("NAT: Failed to redirect %s %RTnaipv4:%d -> %RTnaipv4:%d (%s)\n", + rule->proto == IPPROTO_UDP ? "UDP" : "TCP", + rule->bind_ip.s_addr, rule->host_port, + guest_addr, rule->guest_port, strerror(errno))); + RTMemFree(rule); + return 1; + } + + LogRel(("NAT: Set redirect %s %RTnaipv4:%d -> %RTnaipv4:%d\n", + rule->proto == IPPROTO_UDP ? "UDP" : "TCP", + rule->bind_ip.s_addr, rule->host_port, + guest_addr, rule->guest_port)); + + LIST_INSERT_HEAD(&pData->port_forward_rule_head, rule, list); + return 0; +} + + +int slirp_remove_redirect(PNATState pData, int is_udp, struct in_addr host_addr, int host_port, + struct in_addr guest_addr, int guest_port) +{ + struct port_forward_rule *rule; + + rule = slirp_find_redirect(pData, is_udp, host_addr, host_port, guest_addr, guest_port); + if (rule == NULL) + { + LogRel(("NAT: Unable to find redirect %s %RTnaipv4:%d -> %RTnaipv4:%d\n", + is_udp ? "UDP" : "TCP", + host_addr.s_addr, host_port, + guest_addr.s_addr, guest_port)); + return 0; + } + + LogRel(("NAT: Remove redirect %s %RTnaipv4:%d -> %RTnaipv4:%d\n", + rule->proto == IPPROTO_UDP ? "UDP" : "TCP", + rule->bind_ip.s_addr, rule->host_port, + guest_addr.s_addr, rule->guest_port)); + + if (rule->so != NULL) + { + if (is_udp) + udp_detach(pData, rule->so); + else + tcp_close(pData, sototcpcb(rule->so)); + } + + LIST_REMOVE(rule, list); + RTMemFree(rule); + return 0; +} + + +#if defined(RT_OS_WINDOWS) +HANDLE *slirp_get_events(PNATState pData) +{ + return pData->phEvents; +} +void slirp_register_external_event(PNATState pData, HANDLE hEvent, int index) +{ + pData->phEvents[index] = hEvent; +} +#endif + +unsigned int slirp_get_timeout_ms(PNATState pData) +{ + if (link_up) + { + if (time_fasttimo) + return 2; + if (do_slowtimo) + return 500; /* see PR_SLOWHZ */ + } + return 3600*1000; /* one hour */ +} + +#ifndef RT_OS_WINDOWS +int slirp_get_nsock(PNATState pData) +{ + return pData->nsock; +} +#endif + +/* + * this function called from NAT thread + */ +void slirp_post_sent(PNATState pData, void *pvArg) +{ + struct mbuf *m = (struct mbuf *)pvArg; + m_freem(pData, m); +} + +void slirp_set_dhcp_TFTP_prefix(PNATState pData, const char *tftpPrefix) +{ + Log2(("tftp_prefix: %s\n", tftpPrefix)); + if (tftp_prefix) + RTStrFree((char *)tftp_prefix); + tftp_prefix = RTPathAbsDup(tftpPrefix); +} + +void slirp_set_dhcp_TFTP_bootfile(PNATState pData, const char *bootFile) +{ + Log2(("bootFile: %s\n", bootFile)); + bootp_filename = bootFile; +} + +void slirp_set_dhcp_next_server(PNATState pData, const char *next_server) +{ + Log2(("next_server: %s\n", next_server)); + if (next_server == NULL) + pData->tftp_server.s_addr = RT_H2N_U32(RT_N2H_U32(pData->special_addr.s_addr) | CTL_TFTP); + else + inet_aton(next_server, &pData->tftp_server); +} + +int slirp_set_binding_address(PNATState pData, char *addr) +{ + int ok; + + pData->bindIP.s_addr = INADDR_ANY; + + if (addr == NULL || *addr == '\0') + return VINF_SUCCESS; + + ok = inet_aton(addr, &pData->bindIP); + if (!ok) + { + LogRel(("NAT: Unable to parse binding address: %s\n", addr)); + return VERR_INVALID_PARAMETER; + } + + if (pData->bindIP.s_addr == INADDR_ANY) + return VINF_SUCCESS; + + if ((pData->bindIP.s_addr & RT_N2H_U32_C(0xe0000000)) == RT_N2H_U32_C(0xe0000000)) + { + LogRel(("NAT: Ignoring multicast binding address %RTnaipv4\n", pData->bindIP.s_addr)); + pData->bindIP.s_addr = INADDR_ANY; + return VERR_INVALID_PARAMETER; + } + + LogRel(("NAT: Binding address %RTnaipv4\n", pData->bindIP.s_addr)); + return VINF_SUCCESS; +} + +void slirp_set_dhcp_dns_proxy(PNATState pData, bool fDNSProxy) +{ + if (!pData->fUseHostResolver) + { + Log2(("NAT: DNS proxy switched %s\n", (fDNSProxy ? "on" : "off"))); + pData->fUseDnsProxy = fDNSProxy; + } + else if (fDNSProxy) + LogRel(("NAT: Host Resolver conflicts with DNS proxy, the last one was forcely ignored\n")); +} + +#define CHECK_ARG(name, val, lim_min, lim_max) \ + do { \ + if ((val) < (lim_min) || (val) > (lim_max)) \ + { \ + LogRel(("NAT: (" #name ":%d) has been ignored, " \ + "because out of range (%d, %d)\n", (val), (lim_min), (lim_max))); \ + return; \ + } \ + else \ + LogRel(("NAT: (" #name ":%d)\n", (val))); \ + } while (0) + +void slirp_set_somaxconn(PNATState pData, int iSoMaxConn) +{ + LogFlowFunc(("iSoMaxConn:%d\n", iSoMaxConn)); + /* Conditions */ + if (iSoMaxConn > SOMAXCONN) + { + LogRel(("NAT: value of somaxconn(%d) bigger than SOMAXCONN(%d)\n", iSoMaxConn, SOMAXCONN)); + iSoMaxConn = SOMAXCONN; + } + + if (iSoMaxConn < 1) + { + LogRel(("NAT: proposed value(%d) of somaxconn is invalid, default value is used (%d)\n", iSoMaxConn, pData->soMaxConn)); + LogFlowFuncLeave(); + return; + } + + /* Asignment */ + if (pData->soMaxConn != iSoMaxConn) + { + LogRel(("NAT: value of somaxconn has been changed from %d to %d\n", + pData->soMaxConn, iSoMaxConn)); + pData->soMaxConn = iSoMaxConn; + } + LogFlowFuncLeave(); +} +/* don't allow user set less 8kB and more than 1M values */ +#define _8K_1M_CHECK_ARG(name, val) CHECK_ARG(name, (val), 8, 1024) +void slirp_set_rcvbuf(PNATState pData, int kilobytes) +{ + _8K_1M_CHECK_ARG("SOCKET_RCVBUF", kilobytes); + pData->socket_rcv = kilobytes; +} +void slirp_set_sndbuf(PNATState pData, int kilobytes) +{ + _8K_1M_CHECK_ARG("SOCKET_SNDBUF", kilobytes); + pData->socket_snd = kilobytes * _1K; +} +void slirp_set_tcp_rcvspace(PNATState pData, int kilobytes) +{ + _8K_1M_CHECK_ARG("TCP_RCVSPACE", kilobytes); + tcp_rcvspace = kilobytes * _1K; +} +void slirp_set_tcp_sndspace(PNATState pData, int kilobytes) +{ + _8K_1M_CHECK_ARG("TCP_SNDSPACE", kilobytes); + tcp_sndspace = kilobytes * _1K; +} + +/* + * Looking for Ether by ip in ARP-cache + * Note: it´s responsible of caller to allocate buffer for result + * @returns iprt status code + */ +int slirp_arp_lookup_ether_by_ip(PNATState pData, uint32_t ip, uint8_t *ether) +{ + struct arp_cache_entry *ac; + + if (ether == NULL) + return VERR_INVALID_PARAMETER; + + if (LIST_EMPTY(&pData->arp_cache)) + return VERR_NOT_FOUND; + + LIST_FOREACH(ac, &pData->arp_cache, list) + { + if ( ac->ip == ip + && memcmp(ac->ether, broadcast_ethaddr, ETH_ALEN) != 0) + { + memcpy(ether, ac->ether, ETH_ALEN); + return VINF_SUCCESS; + } + } + return VERR_NOT_FOUND; +} + +/* + * Looking for IP by Ether in ARP-cache + * Note: it´s responsible of caller to allocate buffer for result + * @returns 0 - if found, 1 - otherwise + */ +int slirp_arp_lookup_ip_by_ether(PNATState pData, const uint8_t *ether, uint32_t *ip) +{ + struct arp_cache_entry *ac; + *ip = INADDR_ANY; + + if (LIST_EMPTY(&pData->arp_cache)) + return VERR_NOT_FOUND; + + LIST_FOREACH(ac, &pData->arp_cache, list) + { + if (memcmp(ether, ac->ether, ETH_ALEN) == 0) + { + *ip = ac->ip; + return VINF_SUCCESS; + } + } + return VERR_NOT_FOUND; +} + +void slirp_arp_who_has(PNATState pData, uint32_t dst) +{ + struct mbuf *m; + struct ethhdr *ehdr; + struct arphdr *ahdr; + static bool fWarned = false; + LogFlowFunc(("ENTER: %RTnaipv4\n", dst)); + + /* ARP request WHO HAS 0.0.0.0 is one of the signals + * that something has been broken at Slirp. Investigating + * pcap dumps it's easy to miss warning ARP requests being + * focused on investigation of other protocols flow. + */ +#ifdef DEBUG_vvl + Assert((dst != INADDR_ANY)); + NOREF(fWarned); +#else + if ( dst == INADDR_ANY + && !fWarned) + { + LogRel(("NAT: ARP: \"WHO HAS INADDR_ANY\" request has been detected\n")); + fWarned = true; + } +#endif /* !DEBUG_vvl */ + + m = m_getcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR); + if (m == NULL) + { + Log(("NAT: Can't alloc mbuf for ARP request\n")); + LogFlowFuncLeave(); + return; + } + ehdr = mtod(m, struct ethhdr *); + memset(ehdr->h_source, 0xff, ETH_ALEN); + ahdr = (struct arphdr *)&ehdr[1]; + ahdr->ar_hrd = RT_H2N_U16_C(1); + ahdr->ar_pro = RT_H2N_U16_C(ETH_P_IP); + ahdr->ar_hln = ETH_ALEN; + ahdr->ar_pln = 4; + ahdr->ar_op = RT_H2N_U16_C(ARPOP_REQUEST); + memcpy(ahdr->ar_sha, special_ethaddr, ETH_ALEN); + /* we assume that this request come from gw, but not from DNS or TFTP */ + ahdr->ar_sha[5] = CTL_ALIAS; + *(uint32_t *)ahdr->ar_sip = RT_H2N_U32(RT_N2H_U32(pData->special_addr.s_addr) | CTL_ALIAS); + memset(ahdr->ar_tha, 0xff, ETH_ALEN); /*broadcast*/ + *(uint32_t *)ahdr->ar_tip = dst; + /* warn!!! should falls in mbuf minimal size */ + m->m_len = sizeof(struct arphdr) + ETH_HLEN; + m->m_data += ETH_HLEN; + m->m_len -= ETH_HLEN; + if_encap(pData, ETH_P_ARP, m, ETH_ENCAP_URG); + LogFlowFuncLeave(); +} + + +/* updates the arp cache + * @note: this is helper function, slirp_arp_cache_update_or_add should be used. + * @returns 0 - if has found and updated + * 1 - if hasn't found. + */ +static inline int slirp_arp_cache_update(PNATState pData, uint32_t dst, const uint8_t *mac) +{ + struct arp_cache_entry *ac; + Assert(( memcmp(mac, broadcast_ethaddr, ETH_ALEN) + && memcmp(mac, zerro_ethaddr, ETH_ALEN))); + LIST_FOREACH(ac, &pData->arp_cache, list) + { + if (ac->ip == dst) + { + memcpy(ac->ether, mac, ETH_ALEN); + return 0; + } + } + return 1; +} + +/** + * add entry to the arp cache + * @note: this is helper function, slirp_arp_cache_update_or_add should be used. + */ +static inline void slirp_arp_cache_add(PNATState pData, uint32_t ip, const uint8_t *ether) +{ + struct arp_cache_entry *ac = NULL; + Assert(( memcmp(ether, broadcast_ethaddr, ETH_ALEN) + && memcmp(ether, zerro_ethaddr, ETH_ALEN))); + ac = RTMemAllocZ(sizeof(struct arp_cache_entry)); + if (ac == NULL) + { + Log(("NAT: Can't allocate arp cache entry\n")); + return; + } + ac->ip = ip; + memcpy(ac->ether, ether, ETH_ALEN); + LIST_INSERT_HEAD(&pData->arp_cache, ac, list); +} + +/* updates or adds entry to the arp cache + * @returns 0 - if has found and updated + * 1 - if hasn't found. + */ +int slirp_arp_cache_update_or_add(PNATState pData, uint32_t dst, const uint8_t *mac) +{ + if ( !memcmp(mac, broadcast_ethaddr, ETH_ALEN) + || !memcmp(mac, zerro_ethaddr, ETH_ALEN)) + { + static bool fBroadcastEtherAddReported; + if (!fBroadcastEtherAddReported) + { + LogRel(("NAT: Attempt to add pair [%RTmac:%RTnaipv4] in ARP cache was ignored\n", + mac, dst)); + fBroadcastEtherAddReported = true; + } + return 1; + } + if (slirp_arp_cache_update(pData, dst, mac)) + slirp_arp_cache_add(pData, dst, mac); + + return 0; +} + + +void slirp_set_mtu(PNATState pData, int mtu) +{ + if (mtu < 20 || mtu >= 16000) + { + LogRel(("NAT: MTU(%d) is out of range (20;16000] mtu forcely assigned to 1500\n", mtu)); + mtu = 1500; + } + /* MTU is maximum transition unit on */ + if_mtu = + if_mru = mtu; +} + +/** + * Info handler. + */ +void slirp_info(PNATState pData, const void *pvArg, const char *pszArgs) +{ + struct socket *so, *so_next; + struct arp_cache_entry *ac; + struct port_forward_rule *rule; + PCDBGFINFOHLP pHlp = (PCDBGFINFOHLP)pvArg; + NOREF(pszArgs); + + pHlp->pfnPrintf(pHlp, "NAT parameters: MTU=%d\n", if_mtu); + pHlp->pfnPrintf(pHlp, "NAT TCP ports:\n"); + QSOCKET_FOREACH(so, so_next, tcp) + /* { */ + pHlp->pfnPrintf(pHlp, " %R[natsock]\n", so); + } + + pHlp->pfnPrintf(pHlp, "NAT UDP ports:\n"); + QSOCKET_FOREACH(so, so_next, udp) + /* { */ + pHlp->pfnPrintf(pHlp, " %R[natsock]\n", so); + } + + pHlp->pfnPrintf(pHlp, "NAT ARP cache:\n"); + LIST_FOREACH(ac, &pData->arp_cache, list) + { + pHlp->pfnPrintf(pHlp, " %RTnaipv4 %RTmac\n", ac->ip, &ac->ether); + } + + pHlp->pfnPrintf(pHlp, "NAT rules:\n"); + LIST_FOREACH(rule, &pData->port_forward_rule_head, list) + { + pHlp->pfnPrintf(pHlp, " %s %d => %RTnaipv4:%d %c\n", + rule->proto == IPPROTO_UDP ? "UDP" : "TCP", + rule->host_port, rule->guest_addr.s_addr, rule->guest_port, + rule->activated ? ' ' : '*'); + } +} + +/** + * @note: NATState::fUseHostResolver could be changed in bootp.c::dhcp_decode + * @note: this function is executed on GUI/VirtualBox or main/VBoxHeadless thread. + * @note: this function can potentially race with bootp.c::dhcp_decode (except Darwin) + */ +int slirp_host_network_configuration_change_strategy_selector(const PNATState pData) +{ + if (pData->fUseHostResolverPermanent) + return VBOX_NAT_DNS_HOSTRESOLVER; + + if (pData->fUseDnsProxy) { +#if HAVE_NOTIFICATION_FOR_DNS_UPDATE /* XXX */ && !defined(RT_OS_WINDOWS) + /* We dont conflict with bootp.c::dhcp_decode */ + struct rcp_state rcp_state; + int rc; + + rcp_state.rcps_flags = RCPSF_IGNORE_IPV6; + rc = rcp_parse(&rcp_state, RESOLV_CONF_FILE); + LogRelFunc(("NAT: rcp_parse:%Rrc old domain:%s new domain:%s\n", + rc, LIST_EMPTY(&pData->pDomainList) + ? "(null)" + : LIST_FIRST(&pData->pDomainList)->dd_pszDomain, + rcp_state.rcps_domain)); + if ( RT_FAILURE(rc) + || LIST_EMPTY(&pData->pDomainList)) + return VBOX_NAT_DNS_DNSPROXY; + + if ( rcp_state.rcps_domain + && strcmp(rcp_state.rcps_domain, LIST_FIRST(&pData->pDomainList)->dd_pszDomain) == 0) + return VBOX_NAT_DNS_DNSPROXY; + else + return VBOX_NAT_DNS_EXTERNAL; +#else + /* copy domain name */ + /* domain only compare with coy version */ + return VBOX_NAT_DNS_DNSPROXY; +#endif + } + return VBOX_NAT_DNS_EXTERNAL; +} diff --git a/src/VBox/Devices/Network/slirp/slirp.h b/src/VBox/Devices/Network/slirp/slirp.h new file mode 100644 index 00000000..96ff468c --- /dev/null +++ b/src/VBox/Devices/Network/slirp/slirp.h @@ -0,0 +1,555 @@ +/* $Id: slirp.h $ */ +/** @file + * NAT - slirp (declarations/defines). + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef __COMMON_H__ +#define __COMMON_H__ + +#include <VBox/vmm/stam.h> + +#ifdef RT_OS_WINDOWS +# include <iprt/win/winsock2.h> +# include <iprt/win/ws2tcpip.h> +typedef int socklen_t; +#endif +#ifdef RT_OS_OS2 /* temporary workaround, see ticket #127 */ +# define mbstat mbstat_os2 +# include <sys/socket.h> +# undef mbstat +typedef int socklen_t; +#endif + +#define CONFIG_QEMU + +#ifdef DEBUG +# undef DEBUG +# define DEBUG 1 +#endif + +#ifndef CONFIG_QEMU +# include "version.h" +#endif +#define LOG_GROUP LOG_GROUP_DRV_NAT +#include <VBox/log.h> +#include <iprt/mem.h> +#ifdef RT_OS_WINDOWS +# include <iprt/win/windows.h> +# include <io.h> +#endif +#include <iprt/asm.h> +#include <iprt/assert.h> +#include <iprt/string.h> +#include <iprt/dir.h> +#include <iprt/rand.h> +#include <iprt/net.h> +#include <VBox/types.h> + +#undef malloc +#define malloc dont_use_malloc +#undef free +#define free dont_use_free +#undef realloc +#define realloc dont_use_realloc +#undef strdup +#define strdup dont_use_strdup + +#include "slirp_config.h" + +#ifdef RT_OS_WINDOWS + +# ifndef _MSC_VER +# include <inttypes.h> +# endif + + +# include <sys/timeb.h> +# include <iprt/win/iphlpapi.h> + +/* We don't want the errno.h versions of these error defines. */ +# if defined(_MSC_VER) && _MSC_VER >= 1600 +# include <errno.h> +# undef ECONNREFUSED +# undef ECONNRESET +# undef EHOSTDOWN +# undef EHOSTUNREACH +# undef EINPROGRESS +# undef ENETDOWN +# undef ENETUNREACH +# undef ENOTCONN +# undef ESHUTDOWN +# undef EWOULDBLOCK +# endif +# define ECONNREFUSED WSAECONNREFUSED +# define ECONNRESET WSAECONNRESET +# define EHOSTDOWN WSAEHOSTDOWN +# define EHOSTUNREACH WSAEHOSTUNREACH +# define EINPROGRESS WSAEINPROGRESS +# define ENETDOWN WSAENETDOWN +# define ENETUNREACH WSAENETUNREACH +# define ENOTCONN WSAENOTCONN +# define ESHUTDOWN WSAESHUTDOWN +# define EWOULDBLOCK WSAEWOULDBLOCK + +/* standard names for the shutdown() "how" argument */ +#define SHUT_RD SD_RECEIVE +#define SHUT_WR SD_SEND +#define SHUT_RDWR SD_BOTH + +typedef uint8_t u_int8_t; +typedef uint16_t u_int16_t; +typedef uint32_t u_int32_t; + +#else /* !RT_OS_WINDOWS */ + +# define ioctlsocket ioctl +# define closesocket(s) close(s) +# define O_BINARY 0 + +#endif /* !RT_OS_WINDOWS */ + +#if defined(RT_OS_WINDOWS) || defined (RT_OS_SOLARIS) +typedef uint64_t u_int64_t; +typedef char *caddr_t; +#endif + +#include <sys/types.h> +#ifdef HAVE_SYS_BITYPES_H +# include <sys/bitypes.h> +#endif + +#ifdef _MSC_VER +# include <time.h> +#else /* !_MSC_VER */ +# include <sys/time.h> +#endif /* !_MSC_VER */ + +#ifdef NEED_TYPEDEFS +typedef char int8_t; +typedef unsigned char u_int8_t; + +# if SIZEOF_SHORT == 2 + typedef short int16_t; + typedef unsigned short u_int16_t; +# else +# if SIZEOF_INT == 2 + typedef int int16_t; + typedef unsigned int u_int16_t; +# else + #error Cannot find a type with sizeof() == 2 +# endif +# endif + +# if SIZEOF_SHORT == 4 + typedef short int32_t; + typedef unsigned short u_int32_t; +# else +# if SIZEOF_INT == 4 + typedef int int32_t; + typedef unsigned int u_int32_t; +# else + #error Cannot find a type with sizeof() == 4 +# endif +# endif +#endif /* NEED_TYPEDEFS */ + +#ifdef HAVE_UNISTD_H +# include <unistd.h> +#endif + +#ifdef HAVE_STDLIB_H +# include <stdlib.h> +#endif + +#include <errno.h> + + +#ifndef HAVE_MEMMOVE +# define memmove(x, y, z) bcopy(y, x, z) +#endif + +#if TIME_WITH_SYS_TIME +# include <sys/time.h> +# include <time.h> +#else +# ifndef HAVE_SYS_TIME_H +# define HAVE_SYS_TIME_H 0 +# endif +# if HAVE_SYS_TIME_H +# include <sys/time.h> +# else +# include <time.h> +# endif +#endif + +#ifdef HAVE_STRING_H +# include <string.h> +#else +# include <strings.h> +#endif + +#ifndef RT_OS_WINDOWS +# include <sys/uio.h> +#endif + +#ifndef RT_OS_WINDOWS +# include <netinet/in.h> +# include <arpa/inet.h> +#endif + +#ifdef GETTIMEOFDAY_ONE_ARG +# define gettimeofday(x, y) gettimeofday(x) +#endif + +#ifndef HAVE_INET_ATON +int inet_aton (const char *cp, struct in_addr *ia); +#endif + +#include <fcntl.h> +#ifndef NO_UNIX_SOCKETS +# include <sys/un.h> +#endif +#include <signal.h> +#ifdef HAVE_SYS_SIGNAL_H +# include <sys/signal.h> +#endif +#ifndef RT_OS_WINDOWS +# include <sys/socket.h> +#endif + +#if defined(HAVE_SYS_IOCTL_H) +# include <sys/ioctl.h> +#endif + +#ifdef HAVE_SYS_SELECT_H +# include <sys/select.h> +#endif + +#ifdef HAVE_SYS_WAIT_H +# include <sys/wait.h> +#endif + +#ifdef HAVE_SYS_FILIO_H +# include <sys/filio.h> +#endif + +#if defined(__STDC__) || defined(_MSC_VER) +# include <stdarg.h> +#else +# include <varargs.h> +#endif + +#include <sys/stat.h> + +/* Avoid conflicting with the libc insque() and remque(), which + * have different prototypes. */ +#define insque slirp_insque +#define remque slirp_remque + +#ifdef HAVE_SYS_STROPTS_H +# include <sys/stropts.h> +#endif + +#include "libslirp.h" + +#include "debug.h" + +#include "ip.h" +#include "tcp.h" +#include "tcp_timer.h" +#include "tcp_var.h" +#include "tcpip.h" +#include "udp.h" +#include "icmp_var.h" +#include "mbuf.h" +#include "if.h" +#include "sbuf.h" +#include "socket.h" +#include "main.h" +#include "misc.h" +#include "ctl.h" +#include "bootp.h" +#include "tftp.h" + +#include "slirp_state.h" +#include "slirp_dns.h" + +#undef PVM /* XXX Mac OS X hack */ + +#ifndef NULL +# define NULL (void *)0 +#endif + +void if_start (PNATState); + +#ifndef HAVE_INDEX + char *index (const char *, int); +#endif + +#ifndef HAVE_GETHOSTID + long gethostid (void); +#endif + +#ifndef RT_OS_WINDOWS +#include <netdb.h> +#endif + +#include "dnsproxy/dnsproxy.h" + +#define DEFAULT_BAUD 115200 + +int get_dns_addr(PNATState pData); + +/* cksum.c */ +typedef uint16_t u_short; +typedef unsigned int u_int; +#include "in_cksum.h" + +/* if.c */ +void if_init (PNATState); +void if_output (PNATState, struct socket *, struct mbuf *); + +/* ip_input.c */ +void ip_init (PNATState); +void ip_input (PNATState, struct mbuf *); +struct mbuf * ip_reass (PNATState, register struct mbuf *); +void ip_freef (PNATState, struct ipqhead *, struct ipq_t *); +void ip_slowtimo (PNATState); +void ip_stripoptions (register struct mbuf *, struct mbuf *); + +/* ip_output.c */ +int ip_output (PNATState, struct socket *, struct mbuf *); +int ip_output0 (PNATState, struct socket *, struct mbuf *, int urg); + +/* tcp_input.c */ +int tcp_reass (PNATState, struct tcpcb *, struct tcphdr *, int *, struct mbuf *); +void tcp_input (PNATState, register struct mbuf *, int, struct socket *); +void tcp_fconnect_failed(PNATState, struct socket *, int); +void tcp_dooptions (PNATState, struct tcpcb *, u_char *, int, struct tcpiphdr *); +void tcp_xmit_timer (PNATState, register struct tcpcb *, int); +int tcp_mss (PNATState, register struct tcpcb *, u_int); + +/* tcp_output.c */ +int tcp_output (PNATState, register struct tcpcb *); +void tcp_setpersist (register struct tcpcb *); + +/* tcp_subr.c */ +void tcp_init (PNATState); +void tcp_template (struct tcpcb *); +void tcp_respond (PNATState, struct tcpcb *, register struct tcpiphdr *, register struct mbuf *, tcp_seq, tcp_seq, int); +struct tcpcb * tcp_newtcpcb (PNATState, struct socket *); +struct tcpcb * tcp_close (PNATState, register struct tcpcb *); +void tcp_drain (void); +void tcp_sockclosed (PNATState, struct tcpcb *); +int tcp_fconnect (PNATState, struct socket *); +void tcp_connect (PNATState, struct socket *); +int tcp_attach (PNATState, struct socket *); +u_int8_t tcp_tos (struct socket *); +int tcp_ctl (PNATState, struct socket *); +struct tcpcb *tcp_drop(PNATState, struct tcpcb *tp, int err); + +/* hostres.c */ +struct mbuf *hostresolver(PNATState, struct mbuf *, uint32_t src, uint16_t sport); + +/*slirp.c*/ +void slirp_arp_who_has(PNATState pData, uint32_t dst); +int slirp_arp_cache_update_or_add(PNATState pData, uint32_t dst, const uint8_t *mac); +int slirp_init_dns_list(PNATState pData); +void slirp_release_dns_list(PNATState pData); +#define MIN_MRU 128 +#define MAX_MRU 16384 + +#ifndef RT_OS_WINDOWS +# define min(x, y) ((x) < (y) ? (x) : (y)) +# define max(x, y) ((x) > (y) ? (x) : (y)) +#endif + +#ifdef RT_OS_WINDOWS +# undef errno +# if 0 /* debugging */ +int errno_func(const char *file, int line); +# define errno (errno_func(__FILE__, __LINE__)) +# else +# define errno (WSAGetLastError()) +# endif +#endif + +# define ETH_ALEN 6 +# define ETH_HLEN 14 + +struct ethhdr +{ + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + unsigned char h_source[ETH_ALEN]; /* source ether addr */ + unsigned short h_proto; /* packet type ID field */ +}; +AssertCompileSize(struct ethhdr, 14); + +/* + * (vvl) externing of sscanf. + */ +int sscanf(const char *s, const char *format, ...); + +#if defined(VBOX_SLIRP_ALIAS) || defined(VBOX_SLIRP_BSD) + +# define ip_next(ip) (void *)((uint8_t *)(ip) + ((ip)->ip_hl << 2)) +# define udp_next(udp) (void *)((uint8_t *)&((struct udphdr *)(udp))[1]) +# undef bcopy +# define bcopy(src, dst, len) memcpy((dst), (src), (len)) +# undef bcmp +# define bcmp(a1, a2, len) memcmp((a1), (a2), (len)) +# define NO_FW_PUNCH +/* Two wrongs don't make a right, but this at least averts harm. */ +# define NO_USE_SOCKETS + +# ifdef alias_addr +# ifndef VBOX_SLIRP_BSD +# error alias_addr has already defined!!! +# else +# undef alias_addr +# endif +# endif + +# define arc4random() RTRandU32() +# undef malloc +# undef calloc +# undef free +# define malloc(x) RTMemAlloc((x)) +# define calloc(x, n) RTMemAllocZ((x)*(n)) +# define free(x) RTMemFree((x)) +# ifndef __unused +# define __unused +# endif + +# define strncasecmp RTStrNICmp +# define stderr NULL +# define stdout NULL + +# ifdef VBOX_WITH_DEBUG_LIBALIAS +# define LIBALIAS_DEBUG +# endif + +# define fflush(x) do{} while(0) +# include "ext.h" +#endif /*VBOX_SLIRP_ALIAS*/ + +/** + * @todo might be useful to make it configurable, especially in terms of Intnet behind NAT + */ +# define maxusers 32 +# define max_protohdr 0 +/** + * @todo (vvl) for now ignore these values, later perhaps initialize tuning parameters + */ +# define TUNABLE_INT_FETCH(name, pval) do { } while (0) +# define SYSCTL_PROC(a0, a1, a2, a3, a4, a5, a6, a7, a8) const int dummy_ ## a6 = 0 +# define SYSCTL_STRUCT(a0, a1, a2, a3, a4, a5, a6) const int dummy_ ## a5 = 0 +# define SYSINIT(a0, a1, a2, a3, a4) const int dummy_ ## a3 = 0 +# define sysctl_handle_int(a0, a1, a2, a3) 0 +# define EVENTHANDLER_INVOKE(a) do{}while(0) +# define EVENTHANDLER_REGISTER(a0, a1, a2, a3) do{}while(0) +# define KASSERT AssertMsg + +struct dummy_req +{ + void *newptr; +}; + +#define SYSCTL_HANDLER_ARGS PNATState pData, void *oidp, struct dummy_req *req + +void mbuf_init(void *); +# define cksum(m, len) in_cksum_skip((m), (len), 0) + +int ftp_alias_load(PNATState); +int ftp_alias_unload(PNATState); +int nbt_alias_load(PNATState); +int nbt_alias_unload(PNATState); +int slirp_arp_lookup_ip_by_ether(PNATState, const uint8_t *, uint32_t *); +int slirp_arp_lookup_ether_by_ip(PNATState, uint32_t, uint8_t *); + +DECLINLINE(unsigned) slirp_size(PNATState pData) +{ + if (if_mtu < MSIZE) + return MCLBYTES; + else if (if_mtu < MCLBYTES) + return MCLBYTES; + else if (if_mtu < MJUM9BYTES) + return MJUM9BYTES; + else if (if_mtu < MJUM16BYTES) + return MJUM16BYTES; + else + AssertMsgFailed(("Unsupported size")); + return 0; +} + +static inline bool slirpMbufTagService(PNATState pData, struct mbuf *m, uint8_t u8ServiceId) +{ + struct m_tag * t = NULL; + NOREF(pData); + /* if_encap assumes that all packets goes through aliased address(gw) */ + if (u8ServiceId == CTL_ALIAS) + return true; + t = m_tag_get(PACKET_SERVICE, sizeof(uint8_t), 0); + if (!t) + return false; + *(uint8_t *)&t[1] = u8ServiceId; + m_tag_prepend(m, t); + return true; +} + +/** + * This function tags mbuf allocated for special services. + * @todo: add service id verification. + */ +static inline struct mbuf *slirpServiceMbufAlloc(PNATState pData, uint8_t u8ServiceId) +{ + struct mbuf *m = NULL; + m = m_getcl(pData, M_DONTWAIT, MT_HEADER, M_PKTHDR); + if (!m) + return m; + if(!slirpMbufTagService(pData, m, u8ServiceId)) + { + m_freem(pData, m); + return NULL; + } + return m; +} + +static inline struct mbuf *slirpDnsMbufAlloc(PNATState pData) +{ + return slirpServiceMbufAlloc(pData, CTL_DNS); +} + +DECLINLINE(bool) slirpIsWideCasting(PNATState pData, uint32_t u32Addr) +{ + bool fWideCasting; + LogFlowFunc(("Enter: u32Addr:%RTnaipv4\n", u32Addr)); + fWideCasting = ( u32Addr == INADDR_BROADCAST + || (u32Addr & RT_H2N_U32_C(~pData->netmask)) == RT_H2N_U32_C(~pData->netmask)); + LogFlowFunc(("Leave: %RTbool\n", fWideCasting)); + return fWideCasting; +} +#endif + diff --git a/src/VBox/Devices/Network/slirp/slirp_config.h b/src/VBox/Devices/Network/slirp/slirp_config.h new file mode 100644 index 00000000..7ea0c1a8 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/slirp_config.h @@ -0,0 +1,225 @@ +/* $Id: slirp_config.h $ */ +/** @file + * NAT - compile-time configuration. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * User definable configuration options + */ + +/* Undefine if you don't want talk emulation */ +#undef EMULATE_TALK + +/* Define if you want the connection to be probed */ +/* XXX Not working yet, so ignore this for now */ +#undef PROBE_CONN + +/* Define to 1 if you want KEEPALIVE timers */ +#define DO_KEEPALIVE 0 + +/* Define to MAX interfaces you expect to use at once */ +/* MAX_INTERFACES determines the max. TOTAL number of interfaces (SLIP and PPP) */ +/* MAX_PPP_INTERFACES determines max. number of PPP interfaces */ +#define MAX_INTERFACES 1 +#define MAX_PPP_INTERFACES 1 + +/* Define if you want slirp's socket in /tmp */ +/* XXXXXX Do this in ./configure */ +#undef USE_TMPSOCKET + +/* Define if you want slirp to use cfsetXspeed() on the terminal */ +#undef DO_CFSETSPEED + +/* Define this if you want slirp to write to the tty as fast as it can */ +/* This should only be set if you are using load-balancing, slirp does a */ +/* pretty good job on single modems already, and seting this will make */ +/* interactive sessions less responsive */ +/* XXXXX Talk about having fast modem as unit 0 */ +#undef FULL_BOLT + +/* + * Define if you want slirp to use less CPU + * You will notice a small lag in interactive sessions, but it's not that bad + * Things like Netscape/ftp/etc. are completely unaffected + * This is mainly for sysadmins who have many slirp users + */ +#undef USE_LOWCPU + +/*********************************************************/ +/* + * Autoconf defined configuration options + * You shouldn't need to touch any of these + */ + +#ifdef _MSC_VER +#undef HAVE_UNISTD_H +#else +/* Define if you have unistd.h */ +#define HAVE_UNISTD_H +#endif + +/* Define if you have stdlib.h */ +#define HAVE_STDLIB_H + +/* Define if you have sys/ioctl.h */ +#undef HAVE_SYS_IOCTL_H +#ifndef RT_OS_WINDOWS +# define HAVE_SYS_IOCTL_H +#endif + +/* Define if you have sys/filio.h */ +#undef HAVE_SYS_FILIO_H +#ifdef __APPLE__ +#define HAVE_SYS_FILIO_H +#endif + +/* Define according to how time.h should be included */ +#define TIME_WITH_SYS_TIME 0 +#undef HAVE_SYS_TIME_H + +/* Define if you have sys/bitypes.h */ +#undef HAVE_SYS_BITYPES_H + +/* Define if the machine is big endian */ +/*#undef WORDS_BIGENDIAN */ + +/* Define if you have readv */ +#undef HAVE_READV + +/* Define if iovec needs to be declared */ +#undef DECLARE_IOVEC +#ifdef RT_OS_WINDOWS +# define DECLARE_IOVEC +#endif + +/* Define if you have a POSIX.1 sys/wait.h */ +#undef HAVE_SYS_WAIT_H + +/* Define if you have sys/select.h */ +#undef HAVE_SYS_SELECT_H +#ifndef RT_OS_WINDOWS +# define HAVE_SYS_SELECT_H +#endif + +/* Define if you have strings.h */ +#define HAVE_STRING_H + +/* Define if you have arpa/inet.h */ +#undef HAVE_ARPA_INET_H +#ifndef RT_OS_WINDOWS +# define HAVE_ARPA_INET_H +#endif + +/* Define if you have sys/signal.h */ +#undef HAVE_SYS_SIGNAL_H + +/* Define if you have sys/stropts.h */ +#undef HAVE_SYS_STROPTS_H + +/* Define to whatever your compiler thinks inline should be */ +#if defined(_MSC_VER) && !defined(__cplusplus) +# define inline _inline +#else +# define inline inline +#endif + +/* Define to whatever your compiler thinks const should be */ +#define const const + +/* Define if you don't have u_int32_t etc. typedef'd */ +#undef NEED_TYPEDEFS +#ifdef __sun__ +#define NEED_TYPEDEFS +#endif + +/* Define to sizeof(char) */ +#define SIZEOF_CHAR 1 + +/* Define to sizeof(short) */ +#define SIZEOF_SHORT 2 + +/* Define to sizeof(int) */ +#define SIZEOF_INT 4 + +/* Define to sizeof(char *) */ +#define HOST_LONG_BITS ARCH_BITS +#define SIZEOF_CHAR_P (HOST_LONG_BITS / 8) + +/* Define if you have random() */ +#undef HAVE_RANDOM + +/* Define if you have srandom() */ +#undef HAVE_SRANDOM + +/* Define if you have inet_aton */ +#undef HAVE_INET_ATON +#ifndef RT_OS_WINDOWS +# define HAVE_INET_ATON +#endif + +/* Define if you have setenv */ +#undef HAVE_SETENV + +/* Define if you have index() */ +#undef HAVE_INDEX + +/* Define if you have bcmp() */ +#undef HAVE_BCMP + +/* Define if you have drand48 */ +#undef HAVE_DRAND48 + +/* Define if you have memmove */ +#define HAVE_MEMMOVE + +/* Define if you have gethostid */ +#undef HAVE_GETHOSTID +#ifdef RT_OS_OS2 +# define HAVE_GETHOSTID +#endif + +/* Define if you DON'T have unix-domain sockets */ +#undef NO_UNIX_SOCKETS +#ifdef RT_OS_WINDOWS +# define NO_UNIX_SOCKETS +#endif + +/* Define if gettimeofday only takes one argument */ +#undef GETTIMEOFDAY_ONE_ARG + +/* Define if you have revoke() */ +#undef HAVE_REVOKE + +/* Define if you have the sysv method of opening pty's (/dev/ptmx, etc.) */ +#undef HAVE_GRANTPT + +/* Define if you have fchmod */ +#undef HAVE_FCHMOD + +/* Define if you have <sys/type32.h> */ +#undef HAVE_SYS_TYPES32_H +#ifdef RT_OS_SOLARIS +# define HAVE_SYS_TYPES32_H +#endif diff --git a/src/VBox/Devices/Network/slirp/slirp_dns.c b/src/VBox/Devices/Network/slirp/slirp_dns.c new file mode 100644 index 00000000..e3b105fe --- /dev/null +++ b/src/VBox/Devices/Network/slirp/slirp_dns.c @@ -0,0 +1,323 @@ +/* $Id: slirp_dns.c $ */ +/** @file + * NAT - dns initialization. + */ + +/* + * Copyright (C) 2012-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "slirp.h" +#ifdef RT_OS_OS2 +# include <paths.h> +#endif + +#include <iprt/errcore.h> +#include <VBox/vmm/pdmdrv.h> +#include <iprt/assert.h> +#include <iprt/file.h> + +#ifdef RT_OS_WINDOWS +# include <iprt/utf16.h> +# include <Winnls.h> +# define _WINSOCK2API_ +# include <iprt/win/iphlpapi.h> + +static int get_dns_addr_domain(PNATState pData) +{ + /*ULONG flags = GAA_FLAG_INCLUDE_PREFIX;*/ /*GAA_FLAG_INCLUDE_ALL_INTERFACES;*/ /* all interfaces registered in NDIS */ + PIP_ADAPTER_ADDRESSES pAdapterAddr = NULL; + PIP_ADAPTER_ADDRESSES pAddr = NULL; + PIP_ADAPTER_DNS_SERVER_ADDRESS pDnsAddr = NULL; + ULONG size; + char *pszSuffix; + struct dns_domain_entry *pDomain = NULL; + ULONG ret = ERROR_SUCCESS; + + /** @todo add SKIPing flags to get only required information */ + + /* determine size of buffer */ + size = 0; + ret = pData->pfnGetAdaptersAddresses(AF_INET, 0, NULL /* reserved */, pAdapterAddr, &size); + if (ret != ERROR_BUFFER_OVERFLOW) + { + Log(("NAT: error %lu occurred on capacity detection operation\n", ret)); + return -1; + } + if (size == 0) + { + Log(("NAT: Win socket API returns non capacity\n")); + return -1; + } + + pAdapterAddr = RTMemAllocZ(size); + if (!pAdapterAddr) + { + Log(("NAT: No memory available\n")); + return -1; + } + ret = pData->pfnGetAdaptersAddresses(AF_INET, 0, NULL /* reserved */, pAdapterAddr, &size); + if (ret != ERROR_SUCCESS) + { + Log(("NAT: error %lu occurred on fetching adapters info\n", ret)); + RTMemFree(pAdapterAddr); + return -1; + } + + for (pAddr = pAdapterAddr; pAddr != NULL; pAddr = pAddr->Next) + { + int found; + if (pAddr->OperStatus != IfOperStatusUp) + continue; + + for (pDnsAddr = pAddr->FirstDnsServerAddress; pDnsAddr != NULL; pDnsAddr = pDnsAddr->Next) + { + struct sockaddr *SockAddr = pDnsAddr->Address.lpSockaddr; + struct in_addr InAddr; + struct dns_entry *pDns; + + if (SockAddr->sa_family != AF_INET) + continue; + + InAddr = ((struct sockaddr_in *)SockAddr)->sin_addr; + + /* add dns server to list */ + pDns = RTMemAllocZ(sizeof(struct dns_entry)); + if (!pDns) + { + Log(("NAT: Can't allocate buffer for DNS entry\n")); + RTMemFree(pAdapterAddr); + return VERR_NO_MEMORY; + } + + Log(("NAT: adding %RTnaipv4 to DNS server list\n", InAddr)); + if ((InAddr.s_addr & RT_H2N_U32_C(IN_CLASSA_NET)) == RT_N2H_U32_C(INADDR_LOOPBACK & IN_CLASSA_NET)) + pDns->de_addr.s_addr = RT_H2N_U32(RT_N2H_U32(pData->special_addr.s_addr) | CTL_ALIAS); + else + pDns->de_addr.s_addr = InAddr.s_addr; + + TAILQ_INSERT_HEAD(&pData->pDnsList, pDns, de_list); + + if (pAddr->DnsSuffix == NULL) + continue; + + /* uniq */ + RTUtf16ToUtf8(pAddr->DnsSuffix, &pszSuffix); + if (!pszSuffix || strlen(pszSuffix) == 0) + { + RTStrFree(pszSuffix); + continue; + } + + found = 0; + LIST_FOREACH(pDomain, &pData->pDomainList, dd_list) + { + if ( pDomain->dd_pszDomain != NULL + && strcmp(pDomain->dd_pszDomain, pszSuffix) == 0) + { + found = 1; + RTStrFree(pszSuffix); + break; + } + } + if (!found) + { + pDomain = RTMemAllocZ(sizeof(struct dns_domain_entry)); + if (!pDomain) + { + Log(("NAT: not enough memory\n")); + RTStrFree(pszSuffix); + RTMemFree(pAdapterAddr); + return VERR_NO_MEMORY; + } + pDomain->dd_pszDomain = pszSuffix; + Log(("NAT: adding domain name %s to search list\n", pDomain->dd_pszDomain)); + LIST_INSERT_HEAD(&pData->pDomainList, pDomain, dd_list); + } + } + } + RTMemFree(pAdapterAddr); + return 0; +} + +#else /* !RT_OS_WINDOWS */ + +#include "resolv_conf_parser.h" + +static int get_dns_addr_domain(PNATState pData) +{ + struct rcp_state st; + int rc; + unsigned i; + + /* XXX: perhaps IPv6 shouldn't be ignored if we're using DNS proxy */ + st.rcps_flags = RCPSF_IGNORE_IPV6; + rc = rcp_parse(&st, RESOLV_CONF_FILE); + + if (rc < 0) + return -1; + + /* for historical reasons: Slirp returns -1 if no nameservers were found */ + if (st.rcps_num_nameserver == 0) + return -1; + + + /* XXX: We're composing the list, but we already knows + * its size so we can allocate array instead (Linux guests + * dont like >3 servers in the list anyway) + * or use pre-allocated array in NATState. + */ + for (i = 0; i != st.rcps_num_nameserver; ++i) + { + struct dns_entry *pDns; + RTNETADDRU *address = &st.rcps_nameserver[i].uAddr; + + if (address->IPv4.u == INADDR_ANY) + { + /* + * This doesn't seem to be very well documented except for + * RTFS of res_init.c, but INADDR_ANY is a valid value for + * for "nameserver". + */ + address->IPv4.u = RT_H2N_U32_C(INADDR_LOOPBACK); + } + + if ( (address->IPv4.u & RT_H2N_U32_C(IN_CLASSA_NET)) + == RT_N2H_U32_C(INADDR_LOOPBACK & IN_CLASSA_NET)) + { + /** + * XXX: Note shouldn't patch the address in case of using DNS proxy, + * because DNS proxy we do revert it back actually. + */ + if ( address->IPv4.u == RT_N2H_U32_C(INADDR_LOOPBACK) + && pData->fLocalhostReachable) + address->IPv4.u = RT_H2N_U32(RT_N2H_U32(pData->special_addr.s_addr) | CTL_ALIAS); + else if (pData->fUseDnsProxy == 0) { + /* + * Either the resolver lives somewhere else on the 127/8 network or the loopback interface + * is blocked for access from the guest, either way switch to the DNS proxy. + */ + if (pData->fLocalhostReachable) + LogRel(("NAT: DNS server %RTnaipv4 registration detected, switching to the DNS proxy\n", address->IPv4)); + else + LogRel(("NAT: Switching to DNS proxying due to access to the loopback interface being blocked\n")); + pData->fUseDnsProxy = 1; + } + } + + pDns = RTMemAllocZ(sizeof(struct dns_entry)); + if (pDns == NULL) + { + slirpReleaseDnsSettings(pData); + return VERR_NO_MEMORY; + } + + pDns->de_addr.s_addr = address->IPv4.u; + TAILQ_INSERT_HEAD(&pData->pDnsList, pDns, de_list); + } + + if (st.rcps_domain != 0) + { + struct dns_domain_entry *pDomain = RTMemAllocZ(sizeof(struct dns_domain_entry)); + if (pDomain == NULL) + { + slirpReleaseDnsSettings(pData); + return -1; + } + + pDomain->dd_pszDomain = RTStrDup(st.rcps_domain); + LogRel(("NAT: Adding domain name %s\n", pDomain->dd_pszDomain)); + LIST_INSERT_HEAD(&pData->pDomainList, pDomain, dd_list); + } + + return 0; +} + +#endif /* !RT_OS_WINDOWS */ + +int slirpInitializeDnsSettings(PNATState pData) +{ + int rc = VINF_SUCCESS; + AssertPtrReturn(pData, VERR_INVALID_PARAMETER); + LogFlowFuncEnter(); + if (!pData->fUseHostResolverPermanent) + { + TAILQ_INIT(&pData->pDnsList); + LIST_INIT(&pData->pDomainList); + + /* + * Some distributions haven't got /etc/resolv.conf + * so we should other way to configure DNS settings. + */ + if (get_dns_addr_domain(pData) < 0) + pData->fUseHostResolver = true; + else + { + pData->fUseHostResolver = false; + dnsproxy_init(pData); + } + + if (!pData->fUseHostResolver) + { + struct dns_entry *pDNSEntry = NULL; + int cDNSListEntry = 0; + TAILQ_FOREACH_REVERSE(pDNSEntry, &pData->pDnsList, dns_list_head, de_list) + { + LogRel(("NAT: DNS#%i: %RTnaipv4\n", cDNSListEntry, pDNSEntry->de_addr.s_addr)); + cDNSListEntry++; + } + } + } + + LogFlowFuncLeaveRC(rc); + return rc; +} + +int slirpReleaseDnsSettings(PNATState pData) +{ + struct dns_entry *pDns = NULL; + struct dns_domain_entry *pDomain = NULL; + int rc = VINF_SUCCESS; + AssertPtrReturn(pData, VERR_INVALID_PARAMETER); + LogFlowFuncEnter(); + + while (!TAILQ_EMPTY(&pData->pDnsList)) + { + pDns = TAILQ_FIRST(&pData->pDnsList); + TAILQ_REMOVE(&pData->pDnsList, pDns, de_list); + RTMemFree(pDns); + } + + while (!LIST_EMPTY(&pData->pDomainList)) + { + pDomain = LIST_FIRST(&pData->pDomainList); + LIST_REMOVE(pDomain, dd_list); + if (pDomain->dd_pszDomain != NULL) + RTStrFree(pDomain->dd_pszDomain); + RTMemFree(pDomain); + } + + /* tell any pending dnsproxy requests their copy is expired */ + ++pData->dnsgen; + + LogFlowFuncLeaveRC(rc); + return rc; +} diff --git a/src/VBox/Devices/Network/slirp/slirp_dns.h b/src/VBox/Devices/Network/slirp/slirp_dns.h new file mode 100644 index 00000000..c2ee1807 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/slirp_dns.h @@ -0,0 +1,32 @@ +/* $Id: slirp_dns.h $ */ +/** @file + * NAT - Slirp's dns header. + */ + +/* + * Copyright (C) 2012-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ +#ifndef _SLIRP_DNS_H_ +#define _SLIRP_DNS_H_ +int slirpInitializeDnsSettings(PNATState pData); +int slirpReleaseDnsSettings(PNATState pData); +#endif + diff --git a/src/VBox/Devices/Network/slirp/slirp_state.h b/src/VBox/Devices/Network/slirp/slirp_state.h new file mode 100644 index 00000000..5bf59825 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/slirp_state.h @@ -0,0 +1,548 @@ +/** @file + * NAT - slirp state/configuration. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef ___slirp_state_h +#define ___slirp_state_h + +#include <iprt/req.h> +#include <iprt/critsect.h> + +#define COUNTERS_INIT +#include "counters.h" + +#include "ip_icmp.h" +#include "dnsproxy/dnsproxy.h" + + +/** Where to start DHCP IP number allocation. */ +#define START_ADDR 15 + +/** DHCP Lease time. */ +#define LEASE_TIME (24 * 3600) + +/* + * ARP cache this is naive implementaion of ARP + * cache of mapping 4 byte IPv4 address to 6 byte + * ethernet one. + */ +struct arp_cache_entry +{ + uint32_t ip; + uint8_t ether[6]; + LIST_ENTRY(arp_cache_entry) list; +}; +LIST_HEAD(arp_cache_head, arp_cache_entry); + +/** TFTP session entry. */ +struct dns_domain_entry +{ + char *dd_pszDomain; + LIST_ENTRY(dns_domain_entry) dd_list; +}; +LIST_HEAD(dns_domain_list_head, dns_domain_entry); + +#ifdef VBOX_WITH_DNSMAPPING_IN_HOSTRESOLVER +typedef struct DNSMAPPINGENTRY +{ + /** Literal or pattern. */ + bool fPattern; + /** Host name or pattern to map. */ + char *pszName; + /** The IP Address. */ + uint32_t u32IpAddress; + /** List entry. */ + STAILQ_ENTRY(DNSMAPPINGENTRY) MapList; +} DNSMAPPINGENTRY, *PDNSMAPPINGENTRY; +typedef STAILQ_HEAD(DNSMAPPINGHEAD, DNSMAPPINGENTRY) DNSMAPPINGHEAD; +#endif + +struct dns_entry +{ + struct in_addr de_addr; + TAILQ_ENTRY(dns_entry) de_list; +}; +TAILQ_HEAD(dns_list_head, dns_entry); +TAILQ_HEAD(if_queue, mbuf); + +struct port_forward_rule +{ + uint16_t proto; + uint16_t host_port; + uint16_t guest_port; + struct in_addr guest_addr; + struct in_addr bind_ip; + int activated; + struct socket *so; + LIST_ENTRY(port_forward_rule) list; +}; +LIST_HEAD(port_forward_rule_list, port_forward_rule); + + +#ifdef RT_OS_WINDOWS +struct pong; +TAILQ_HEAD(pong_tailq, pong); +#endif + +/* forward declaration */ +struct proto_handler; + +/** Main state/configuration structure for slirp NAT. */ +typedef struct NATState +{ +#define PROFILE_COUNTER(name, dsc) STAMPROFILE Stat ## name +#define COUNTING_COUNTER(name, dsc) STAMCOUNTER Stat ## name +#include "counters.h" + /* Stuff from boot.c */ + void *pbootp_clients; + const char *bootp_filename; + /* Stuff from if.c */ + int if_mtu, if_mru; + int if_comp; + int if_maxlinkhdr; + int if_queued; + int if_thresh; + /* Stuff from icmp.c */ + struct icmpstat_t icmpstat; + /* Stuff from ip_input.c */ + struct ipstat_t ipstat; + struct ipqhead ipq[IPREASS_NHASH]; + int maxnipq; /* Administrative limit on # of reass queues*/ + int maxfragsperpacket; /* Maximum number of IPv4 fragments allowed per packet */ + int nipq; /* total number of reass queues */ + uint16_t ip_currid; + /* Stuff from mbuf.c */ + /* Stuff from slirp.c */ + void *pvUser; + uint32_t curtime; + uint32_t time_fasttimo; + uint32_t last_slowtimo; + bool do_slowtimo; + bool link_up; + struct timeval tt; + struct in_addr our_addr; + struct in_addr alias_addr; + struct in_addr special_addr; + struct in_addr guest_addr_guess; + + int tcp_rcvspace; + int tcp_sndspace; + int socket_rcv; + int socket_snd; + int soMaxConn; +#ifdef RT_OS_WINDOWS + ULONG (WINAPI * pfnGetAdaptersAddresses)(ULONG, ULONG, PVOID, PIP_ADAPTER_ADDRESSES, PULONG); +#endif + struct dns_list_head pDnsList; + struct dns_domain_list_head pDomainList; + uint32_t dnsgen; /* XXX: merge with dnsLastUpdate? */ + struct in_addr tftp_server; + struct in_addr loopback_addr; + uint32_t dnsLastUpdate; + uint32_t netmask; + const uint8_t *slirp_ethaddr; + char slirp_hostname[33]; + bool fPassDomain; + struct in_addr bindIP; + /* Stuff from tcp_input.c */ + struct socket tcb; + + struct socket *tcp_last_so; + tcp_seq tcp_iss; + /* Stuff from tcp_timer.c */ + struct tcpstat_t tcpstat; + uint32_t tcp_now; + int tcp_reass_qsize; + int tcp_reass_maxqlen; + int tcp_reass_maxseg; + int tcp_reass_overflows; + /* Stuff from tftp.c */ + void *pvTftpSessions; + int cTftpSession; + const char *tftp_prefix; + /* Stuff from udp.c */ + struct udpstat_t udpstat; + struct socket udb; + struct socket *udp_last_so; + +# ifndef RT_OS_WINDOWS + /* counter of sockets needed for allocation enough room to + * process sockets with poll/epoll + * + * NSOCK_INC/DEC should be injected before every + * operation on socket queue (tcb, udb) + */ + int nsock; +# define NSOCK_INC() do {pData->nsock++;} while (0) +# define NSOCK_DEC() do {pData->nsock--;} while (0) +# define NSOCK_INC_EX(ex) do {ex->pData->nsock++;} while (0) +# define NSOCK_DEC_EX(ex) do {ex->pData->nsock--;} while (0) +# else +# define NSOCK_INC() do {} while (0) +# define NSOCK_DEC() do {} while (0) +# define NSOCK_INC_EX(ex) do {} while (0) +# define NSOCK_DEC_EX(ex) do {} while (0) +# endif + + struct socket icmp_socket; +# if !defined(RT_OS_WINDOWS) + struct icmp_storage icmp_msg_head; + int cIcmpCacheSize; + int iIcmpCacheLimit; +# else + struct pong_tailq pongs_expected; + struct pong_tailq pongs_received; + size_t cbIcmpPending; +# endif + +#if defined(RT_OS_WINDOWS) +# define VBOX_SOCKET_EVENT (pData->phEvents[VBOX_SOCKET_EVENT_INDEX]) + HANDLE phEvents[VBOX_EVENT_COUNT]; +#endif +#ifdef zone_mbuf +# undef zone_mbuf +#endif + uma_zone_t zone_mbuf; +#ifdef zone_clust +# undef zone_clust +#endif + uma_zone_t zone_clust; +#ifdef zone_pack +# undef zone_pack +#endif + uma_zone_t zone_pack; +#ifdef zone_jumbop +# undef zone_jumbop +#endif + uma_zone_t zone_jumbop; +#ifdef zone_jumbo9 +# undef zone_jumbo9 +#endif + uma_zone_t zone_jumbo9; +#ifdef zone_jumbo16 +# undef zone_jumbo16 +#endif + uma_zone_t zone_jumbo16; +#ifdef zone_ext_refcnt +# undef zone_ext_refcnt + int nmbclusters; /* limits number of mbuf clusters */ + int nmbjumbop; /* limits number of page size jumbo clusters */ + int nmbjumbo9; /* limits number of 9k jumbo clusters */ + int nmbjumbo16; /* limits number of 16k jumbo clusters */ + struct mbstat mbstat; +#endif + uma_zone_t zone_ext_refcnt; + /** + * in (r89055) using of this behaviour has been changed and mean that Slirp + * can't parse hosts strucutures/files to provide to guest host name-resolving + * configuration, instead Slirp provides .{interface-number + 1}.3 as a nameserver + * and proxies DNS queiries to Host's Name Resolver API. + */ + bool fUseHostResolver; + /** + * Flag whether using the host resolver mode is permanent + * because the user configured it that way. + */ + bool fUseHostResolverPermanent; + /* from dnsproxy/dnsproxy.h*/ + unsigned int authoritative_port; + unsigned int authoritative_timeout; + unsigned int recursive_port; + unsigned int recursive_timeout; + unsigned int stats_timeout; + unsigned int port; + + unsigned long active_queries; + unsigned long all_queries; + unsigned long authoritative_queries; + unsigned long recursive_queries; + unsigned long removed_queries; + unsigned long dropped_queries; + unsigned long answered_queries; + unsigned long dropped_answers; + unsigned long late_answers; + unsigned long hash_collisions; + /*dnsproxy/dnsproxy.c*/ + unsigned short queryid; + struct sockaddr_in authoritative_addr; + struct sockaddr_in recursive_addr; + int sock_query; + int sock_answer; + /* dnsproxy/hash.c */ +#define HASHSIZE 10 +#define HASH(id) (id & ((1 << HASHSIZE) - 1)) + struct request *request_hash[1 << HASHSIZE]; + /* this field control behaviour of DHCP server */ + bool fUseDnsProxy; + /** Flag whether the guest can contact services on the host's + * loopback interface (127.0.0.1/localhost). */ + bool fLocalhostReachable; + + LIST_HEAD(RT_NOTHING, libalias) instancehead; + int i32AliasMode; + struct libalias *proxy_alias; + LIST_HEAD(handler_chain, proto_handler) handler_chain; + /** Critical R/W section to protect the handler chain list. */ + RTCRITSECTRW CsRwHandlerChain; + struct port_forward_rule_list port_forward_rule_head; + struct arp_cache_head arp_cache; + /* libalis modules' handlers*/ + struct proto_handler *ftp_module; + struct proto_handler *nbt_module; +#ifdef VBOX_WITH_NAT_SEND2HOME + /* array of home addresses */ + struct sockaddr_in *pInSockAddrHomeAddress; + /* size of pInSockAddrHomeAddress in elements */ + int cInHomeAddressSize; +#endif +#ifdef VBOX_WITH_DNSMAPPING_IN_HOSTRESOLVER + DNSMAPPINGHEAD DNSMapNames; + DNSMAPPINGHEAD DNSMapPatterns; +#endif +} NATState; + + +/** Default IP time to live. */ +#define ip_defttl IPDEFTTL + +/** Number of permanent buffers in mbuf. */ +#define mbuf_thresh 30 + +/** Use a fixed time before sending keepalive. */ +#define tcp_keepidle TCPTV_KEEP_IDLE + +/** Use a fixed interval between keepalive. */ +#define tcp_keepintvl TCPTV_KEEPINTVL + +/** Maximum idle time before timing out a connection. */ +#define tcp_maxidle (TCPTV_KEEPCNT * tcp_keepintvl) + +/** Default TCP socket options. */ +#define so_options DO_KEEPALIVE + +/** Default TCP MSS value. */ +#define tcp_mssdflt TCP_MSS + +/** Default TCP round trip time. */ +#define tcp_rttdflt (TCPTV_SRTTDFLT / PR_SLOWHZ) + +/** Enable RFC1323 performance enhancements. + * @todo check if it really works, it was turned off before. */ +#define tcp_do_rfc1323 1 + +/** TCP receive buffer size. */ +#define tcp_rcvspace pData->tcp_rcvspace + +/** TCP receive buffer size. */ +#define tcp_sndspace pData->tcp_sndspace + +/* TCP duplicate ACK retransmit threshold. */ +#define tcprexmtthresh 3 + + +#define bootp_filename pData->bootp_filename + +#define if_mtu pData->if_mtu +#define if_mru pData->if_mru +#define if_comp pData->if_comp +#define if_maxlinkhdr pData->if_maxlinkhdr +#define if_queued pData->if_queued +#define if_thresh pData->if_thresh + +#define icmpstat pData->icmpstat + +#define ipstat pData->ipstat +#define ipq pData->ipq +#define ip_currid pData->ip_currid + +#define mbuf_alloced pData->mbuf_alloced +#define mbuf_max pData->mbuf_max +#define msize pData->msize +#define m_freelist pData->m_freelist +#define m_usedlist pData->m_usedlist + +#define curtime pData->curtime +#define time_fasttimo pData->time_fasttimo +#define last_slowtimo pData->last_slowtimo +#define do_slowtimo pData->do_slowtimo +#define link_up pData->link_up +#define cUsers pData->cUsers +#define tt pData->tt +#define our_addr pData->our_addr +#ifndef VBOX_SLIRP_ALIAS +# define alias_addr pData->alias_addr +#else +# define handler_chain pData->handler_chain +#endif +#define dns_addr pData->dns_addr +#define loopback_addr pData->loopback_addr +#define slirp_hostname pData->slirp_hostname + +#define tcb pData->tcb +#define tcp_last_so pData->tcp_last_so +#define tcp_iss pData->tcp_iss + +#define tcpstat pData->tcpstat +#define tcp_now pData->tcp_now + +#define tftp_prefix pData->tftp_prefix + +#define udpstat pData->udpstat +#define udb pData->udb +#define udp_last_so pData->udp_last_so + +#define maxfragsperpacket pData->maxfragsperpacket +#define maxnipq pData->maxnipq +#define nipq pData->nipq + +#define tcp_reass_qsize pData->tcp_reass_qsize +#define tcp_reass_maxqlen pData->tcp_reass_maxqlen +#define tcp_reass_maxseg pData->tcp_reass_maxseg +#define tcp_reass_overflows pData->tcp_reass_overflows + +#define queue_tcp_label tcb +#define queue_udp_label udb +#define VBOX_X2(x) x +#define VBOX_X(x) VBOX_X2(x) + +#if 1 + +# define QSOCKET_LOCK(queue) do {} while (0) +# define QSOCKET_UNLOCK(queue) do {} while (0) +# define QSOCKET_LOCK_CREATE(queue) do {} while (0) +# define QSOCKET_LOCK_DESTROY(queue) do {} while (0) +# define QSOCKET_FOREACH(so, sonext, label) \ + for ((so) = VBOX_X2(queue_ ## label ## _label).so_next; \ + (so) != &(VBOX_X2(queue_ ## label ## _label)); \ + (so) = (sonext)) \ + { \ + (sonext) = (so)->so_next; \ + Log5(("%s:%d Processing so:%R[natsock]\n", RT_GCC_EXTENSION __FUNCTION__, __LINE__, (so))); +# define CONTINUE(label) continue +# define CONTINUE_NO_UNLOCK(label) continue +# define LOOP_LABEL(label, so, sonext) /* empty*/ +# define DO_TCP_OUTPUT(data, sotcb) tcp_output((data), (sotcb)) +# define DO_TCP_INPUT(data, mbuf, size, so) tcp_input((data), (mbuf), (size), (so)) +# define DO_TCP_CONNECT(data, so) tcp_connect((data), (so)) +# define DO_SOREAD(ret, data, so, ifclose) \ + do { \ + (ret) = soread((data), (so), (ifclose)); \ + } while(0) +# define DO_SOWRITE(ret, data, so) \ + do { \ + (ret) = sowrite((data), (so)); \ + } while(0) +# define DO_SORECFROM(data, so) sorecvfrom((data), (so)) +# define SOLOOKUP(so, label, src, sport, dst, dport) \ + do { \ + (so) = solookup(&VBOX_X2(queue_ ## label ## _label), (src), (sport), (dst), (dport)); \ + } while (0) +# define DO_UDP_DETACH(data, so, ignored) udp_detach((data), (so)) + +#endif + +#define TCP_OUTPUT(data, sotcb) DO_TCP_OUTPUT((data), (sotcb)) +#define TCP_INPUT(data, mbuf, size, so) DO_TCP_INPUT((data), (mbuf), (size), (so)) +#define TCP_CONNECT(data, so) DO_TCP_CONNECT((data), (so)) +#define SOREAD(ret, data, so, ifclose) DO_SOREAD((ret), (data), (so), (ifclose)) +#define SOWRITE(ret, data, so) DO_SOWRITE((ret), (data), (so)) +#define SORECVFROM(data, so) DO_SORECFROM((data), (so)) +#define UDP_DETACH(data, so, so_next) DO_UDP_DETACH((data), (so), (so_next)) + +/* dnsproxy/dnsproxy.c */ +#define authoritative_port pData->authoritative_port +#define authoritative_timeout pData->authoritative_timeout +#define recursive_port pData->recursive_port +#define recursive_timeout pData->recursive_timeout +#define stats_timeout pData->stats_timeout +/* dnsproxy/hash.c */ +#define dns_port pData->port +#define request_hash pData->request_hash +#define hash_collisions pData->hash_collisions +#define active_queries pData->active_queries +#define all_queries pData->all_queries +#define authoritative_queries pData->authoritative_queries +#define recursive_queries pData->recursive_queries +#define removed_queries pData->removed_queries +#define dropped_queries pData->dropped_queries +#define answered_queries pData->answered_queries +#define dropped_answers pData->dropped_answers +#define late_answers pData->late_answers + +/* dnsproxy/dnsproxy.c */ +#define queryid pData->queryid +#define authoritative_addr pData->authoritative_addr +#define recursive_addr pData->recursive_addr +#define sock_query pData->sock_query +#define sock_answer pData->sock_answer + +#define instancehead pData->instancehead + +#define nmbclusters pData->nmbclusters +#define nmbjumbop pData->nmbjumbop +#define nmbjumbo9 pData->nmbjumbo9 +#define nmbjumbo16 pData->nmbjumbo16 +#define mbstat pData->mbstat +#include "ext.h" +#undef zone_mbuf +#undef zone_clust +#undef zone_pack +#undef zone_jumbop +#undef zone_jumbo9 +#undef zone_jumbo16 +#undef zone_ext_refcnt +static inline uma_zone_t slirp_zone_pack(PNATState pData) +{ + return pData->zone_pack; +} +static inline uma_zone_t slirp_zone_jumbop(PNATState pData) +{ + return pData->zone_jumbop; +} +static inline uma_zone_t slirp_zone_jumbo9(PNATState pData) +{ + return pData->zone_jumbo9; +} +static inline uma_zone_t slirp_zone_jumbo16(PNATState pData) +{ + return pData->zone_jumbo16; +} +static inline uma_zone_t slirp_zone_ext_refcnt(PNATState pData) +{ + return pData->zone_ext_refcnt; +} +static inline uma_zone_t slirp_zone_mbuf(PNATState pData) +{ + return pData->zone_mbuf; +} +static inline uma_zone_t slirp_zone_clust(PNATState pData) +{ + return pData->zone_clust; +} +#ifndef VBOX_SLIRP_BSD +# define m_adj(m, len) m_adj(pData, (m), (len)) +#endif + +#endif /* !___slirp_state_h */ diff --git a/src/VBox/Devices/Network/slirp/socket.c b/src/VBox/Devices/Network/slirp/socket.c new file mode 100644 index 00000000..c12a2ed9 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/socket.c @@ -0,0 +1,1497 @@ +/* $Id: socket.c $ */ +/** @file + * NAT - socket handling. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> +#include "ip_icmp.h" +#include "main.h" +#ifdef __sun__ +#include <sys/filio.h> +#endif +#include <VBox/vmm/pdmdrv.h> +#if defined (RT_OS_WINDOWS) +#include <iprt/win/iphlpapi.h> +#include <icmpapi.h> +#endif +#include <alias.h> + +#if defined(DECLARE_IOVEC) && defined(RT_OS_WINDOWS) +AssertCompileMembersSameSizeAndOffset(struct iovec, iov_base, WSABUF, buf); +AssertCompileMembersSameSizeAndOffset(struct iovec, iov_len, WSABUF, len); +#endif + +#ifdef VBOX_WITH_NAT_SEND2HOME +DECLINLINE(bool) slirpSend2Home(PNATState pData, struct socket *pSo, const void *pvBuf, uint32_t cbBuf, int iFlags) +{ + int idxAddr; + int ret = 0; + bool fSendDone = false; + LogFlowFunc(("Enter pSo:%R[natsock] pvBuf: %p, cbBuf: %d, iFlags: %d\n", pSo, pvBuf, cbBuf, iFlags)); + for (idxAddr = 0; idxAddr < pData->cInHomeAddressSize; ++idxAddr) + { + + struct socket *pNewSocket = soCloneUDPSocketWithForegnAddr(pData, pSo, pData->pInSockAddrHomeAddress[idxAddr].sin_addr); + AssertReturn((pNewSocket, false)); + pData->pInSockAddrHomeAddress[idxAddr].sin_port = pSo->so_fport; + /** @todo more verbose on errors, + * @note: we shouldn't care if this send fail or not (we're in broadcast). + */ + LogFunc(("send %d bytes to %RTnaipv4 from %R[natsock]\n", cbBuf, pData->pInSockAddrHomeAddress[idxAddr].sin_addr.s_addr, pNewSocket)); + ret = sendto(pNewSocket->s, pvBuf, cbBuf, iFlags, (struct sockaddr *)&pData->pInSockAddrHomeAddress[idxAddr], sizeof(struct sockaddr_in)); + if (ret < 0) + LogFunc(("Failed to send %d bytes to %RTnaipv4\n", cbBuf, pData->pInSockAddrHomeAddress[idxAddr].sin_addr.s_addr)); + fSendDone |= ret > 0; + } + LogFlowFunc(("Leave %RTbool\n", fSendDone)); + return fSendDone; +} +#endif /* !VBOX_WITH_NAT_SEND2HOME */ + +#if !defined(RT_OS_WINDOWS) +static void send_icmp_to_guest(PNATState, char *, size_t, const struct sockaddr_in *); +static void sorecvfrom_icmp_unix(PNATState, struct socket *); +#endif /* !RT_OS_WINDOWS */ + +void +so_init(void) +{ +} + +struct socket * +solookup(struct socket *head, struct in_addr laddr, + u_int lport, struct in_addr faddr, u_int fport) +{ + struct socket *so; + + for (so = head->so_next; so != head; so = so->so_next) + { + if ( so->so_lport == lport + && so->so_laddr.s_addr == laddr.s_addr + && so->so_faddr.s_addr == faddr.s_addr + && so->so_fport == fport) + return so; + } + + return (struct socket *)NULL; +} + +/* + * Create a new socket, initialise the fields + * It is the responsibility of the caller to + * insque() it into the correct linked-list + */ +struct socket * +socreate(void) +{ + struct socket *so; + + so = (struct socket *)RTMemAllocZ(sizeof(struct socket)); + if (so) + { + so->so_state = SS_NOFDREF; + so->s = -1; +#if !defined(RT_OS_WINDOWS) + so->so_poll_index = -1; +#endif + } + return so; +} + +/* + * remque and free a socket, clobber cache + */ +void +sofree(PNATState pData, struct socket *so) +{ + LogFlowFunc(("ENTER:%R[natsock]\n", so)); + /* + * We should not remove socket when polling routine do the polling + * instead we mark it for deletion. + */ + if (so->fUnderPolling) + { + so->fShouldBeRemoved = 1; + LogFlowFunc(("LEAVE:%R[natsock] postponed deletion\n", so)); + return; + } + /** + * Check that we don't freeng socket with tcbcb + */ + Assert(!sototcpcb(so)); + /* udp checks */ + Assert(!so->so_timeout); + Assert(!so->so_timeout_arg); + if (so == tcp_last_so) + tcp_last_so = &tcb; + else if (so == udp_last_so) + udp_last_so = &udb; + + /* check if mbuf haven't been already freed */ + if (so->so_m != NULL) + { + m_freem(pData, so->so_m); + so->so_m = NULL; + } + + if (so->so_ohdr != NULL) + { + RTMemFree(so->so_ohdr); + so->so_ohdr = NULL; + } + + if (so->so_next && so->so_prev) + { + remque(pData, so); /* crashes if so is not in a queue */ + NSOCK_DEC(); + } + + RTMemFree(so); + LogFlowFuncLeave(); +} + + +/* + * Worker for sobind() below. + */ +static int +sobindto(struct socket *so, uint32_t addr, uint16_t port) +{ + struct sockaddr_in self; + int status; + + if (addr == INADDR_ANY && port == 0 && so->so_type != IPPROTO_UDP) + { + /* TCP sockets without constraints don't need to be bound */ + Log2(("NAT: sobind: %s guest %RTnaipv4:%d - nothing to do\n", + so->so_type == IPPROTO_UDP ? "udp" : "tcp", + so->so_laddr.s_addr, ntohs(so->so_lport))); + return 0; + } + + RT_ZERO(self); +#ifdef RT_OS_DARWIN + self.sin_len = sizeof(self); +#endif + self.sin_family = AF_INET; + self.sin_addr.s_addr = addr; + self.sin_port = port; + + status = bind(so->s, (struct sockaddr *)&self, sizeof(self)); + if (status == 0) + { + Log2(("NAT: sobind: %s guest %RTnaipv4:%d to host %RTnaipv4:%d\n", + so->so_type == IPPROTO_UDP ? "udp" : "tcp", + so->so_laddr.s_addr, ntohs(so->so_lport), addr, ntohs(port))); + return 0; + } + + Log2(("NAT: sobind: %s guest %RTnaipv4:%d to host %RTnaipv4:%d error %d%s\n", + so->so_type == IPPROTO_UDP ? "udp" : "tcp", + so->so_laddr.s_addr, ntohs(so->so_lport), + addr, ntohs(port), + errno, port ? " (will retry with random port)" : "")); + + if (port) /* retry without */ + status = sobindto(so, addr, 0); + + if (addr) + return status; + else + return 0; +} + + +/* + * Bind the socket to specific host address and/or port if necessary. + * We also always bind udp sockets to force the local port to be + * allocated and known in advance. + */ +int +sobind(PNATState pData, struct socket *so) +{ + uint32_t addr = pData->bindIP.s_addr; /* may be INADDR_ANY */ + bool fSamePorts = !!(pData->i32AliasMode & PKT_ALIAS_SAME_PORTS); + uint16_t port; + int status; + + if (fSamePorts) + { + int opt = 1; + setsockopt(so->s, SOL_SOCKET, SO_REUSEADDR, (char *)&opt, sizeof(opt)); + port = so->so_lport; + } + else + { + port = 0; + } + + status = sobindto(so, addr, port); + return status; +} + + +/* + * Read from so's socket into sb_snd, updating all relevant sbuf fields + * NOTE: This will only be called if it is select()ed for reading, so + * a read() of 0 (or less) means it's disconnected + */ +int +soread(PNATState pData, struct socket *so) +{ + int n, nn, lss, total; + struct sbuf *sb = &so->so_snd; + u_int len = sb->sb_datalen - sb->sb_cc; + struct iovec iov[2]; + int mss = so->so_tcpcb->t_maxseg; + int sockerr; + + STAM_PROFILE_START(&pData->StatIOread, a); + STAM_COUNTER_RESET(&pData->StatIORead_in_1); + STAM_COUNTER_RESET(&pData->StatIORead_in_2); + + QSOCKET_LOCK(tcb); + SOCKET_LOCK(so); + QSOCKET_UNLOCK(tcb); + + LogFlow(("soread: so = %R[natsock]\n", so)); + Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb)); + + /* + * No need to check if there's enough room to read. + * soread wouldn't have been called if there weren't + */ + + len = sb->sb_datalen - sb->sb_cc; + + iov[0].iov_base = sb->sb_wptr; + iov[1].iov_base = 0; + iov[1].iov_len = 0; + if (sb->sb_wptr < sb->sb_rptr) + { + iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; + /* Should never succeed, but... */ + if (iov[0].iov_len > len) + iov[0].iov_len = len; + if (iov[0].iov_len > mss) + iov[0].iov_len -= iov[0].iov_len%mss; + n = 1; + } + else + { + iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; + /* Should never succeed, but... */ + if (iov[0].iov_len > len) + iov[0].iov_len = len; + len -= iov[0].iov_len; + if (len) + { + iov[1].iov_base = sb->sb_data; + iov[1].iov_len = sb->sb_rptr - sb->sb_data; + if (iov[1].iov_len > len) + iov[1].iov_len = len; + total = iov[0].iov_len + iov[1].iov_len; + if (total > mss) + { + lss = total % mss; + if (iov[1].iov_len > lss) + { + iov[1].iov_len -= lss; + n = 2; + } + else + { + lss -= iov[1].iov_len; + iov[0].iov_len -= lss; + n = 1; + } + } + else + n = 2; + } + else + { + if (iov[0].iov_len > mss) + iov[0].iov_len -= iov[0].iov_len%mss; + n = 1; + } + } + +#ifdef HAVE_READV + nn = readv(so->s, (struct iovec *)iov, n); +#else + nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0)); +#endif + if (nn < 0) + sockerr = errno; /* save it, as it may be clobbered by logging */ + else + sockerr = 0; + + Log2(("%s: read(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn)); + Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb)); + if (nn <= 0) + { +#ifdef RT_OS_WINDOWS + /* + * Windows reports ESHUTDOWN after SHUT_RD (SD_RECEIVE) + * instead of just returning EOF indication. + */ + if (nn < 0 && sockerr == ESHUTDOWN) + { + nn = 0; + sockerr = 0; + } +#endif + + if (nn == 0) /* XXX: should this be inside #if defined(RT_OS_WINDOWS)? */ + { + /* + * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that + * _could_ mean that the connection is closed. But we will receive an + * FD_CLOSE event later if the connection was _really_ closed. With + * www.youtube.com I see this very often. Closing the socket too early + * would be dangerous. + */ + int status; + unsigned long pending = 0; + status = ioctlsocket(so->s, FIONREAD, &pending); + if (status < 0) + Log(("NAT:%s: error in WSAIoctl: %d\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, errno)); + if (pending != 0) + { + SOCKET_UNLOCK(so); + STAM_PROFILE_STOP(&pData->StatIOread, a); + return 0; + } + } + + if ( nn < 0 + && soIgnorableErrorCode(sockerr)) + { + SOCKET_UNLOCK(so); + STAM_PROFILE_STOP(&pData->StatIOread, a); + return 0; + } + else + { + int fUninitializedTemplate = 0; + int shuterr; + + fUninitializedTemplate = RT_BOOL(( sototcpcb(so) + && ( sototcpcb(so)->t_template.ti_src.s_addr == INADDR_ANY + || sototcpcb(so)->t_template.ti_dst.s_addr == INADDR_ANY))); + /* nn == 0 means peer has performed an orderly shutdown */ + Log2(("%s: disconnected, nn = %d, errno = %d (%s)\n", + RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sockerr, strerror(sockerr))); + + shuterr = sofcantrcvmore(so); + if (!sockerr && !shuterr && !fUninitializedTemplate) + tcp_sockclosed(pData, sototcpcb(so)); + else + { + LogRel2(("NAT: sockerr %d, shuterr %d - %R[natsock]\n", sockerr, shuterr, so)); + tcp_drop(pData, sototcpcb(so), sockerr); + } + SOCKET_UNLOCK(so); + STAM_PROFILE_STOP(&pData->StatIOread, a); + return -1; + } + } + STAM_STATS( + if (n == 1) + { + STAM_COUNTER_INC(&pData->StatIORead_in_1); + STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn); + } + else + { + STAM_COUNTER_INC(&pData->StatIORead_in_2); + STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn); + } + ); + +#ifndef HAVE_READV + /* + * If there was no error, try and read the second time round + * We read again if n = 2 (ie, there's another part of the buffer) + * and we read as much as we could in the first read + * We don't test for <= 0 this time, because there legitimately + * might not be any more data (since the socket is non-blocking), + * a close will be detected on next iteration. + * A return of -1 wont (shouldn't) happen, since it didn't happen above + */ + if (n == 2 && (unsigned)nn == iov[0].iov_len) + { + int ret; + ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0); + if (ret > 0) + nn += ret; + STAM_STATS( + if (ret > 0) + { + STAM_COUNTER_INC(&pData->StatIORead_in_2); + STAM_COUNTER_ADD(&pData->StatIORead_in_2_2nd_bytes, ret); + } + ); + } + + Log2(("%s: read(2) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn)); +#endif + + /* Update fields */ + sb->sb_cc += nn; + sb->sb_wptr += nn; + Log2(("%s: update so_snd (readed nn = %d) %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sb)); + if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) + { + sb->sb_wptr -= sb->sb_datalen; + Log2(("%s: alter sb_wptr so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, sb)); + } + STAM_PROFILE_STOP(&pData->StatIOread, a); + SOCKET_UNLOCK(so); + return nn; +} + +/* + * Get urgent data + * + * When the socket is created, we set it SO_OOBINLINE, + * so when OOB data arrives, we soread() it and everything + * in the send buffer is sent as urgent data + */ +void +sorecvoob(PNATState pData, struct socket *so) +{ + struct tcpcb *tp = sototcpcb(so); + ssize_t ret; + + LogFlowFunc(("sorecvoob: so = %R[natsock]\n", so)); + + /* + * We take a guess at how much urgent data has arrived. + * In most situations, when urgent data arrives, the next + * read() should get all the urgent data. This guess will + * be wrong however if more data arrives just after the + * urgent data, or the read() doesn't return all the + * urgent data. + */ + ret = soread(pData, so); + if (RT_LIKELY(ret > 0)) + { + /* + * @todo for now just scrub the URG pointer. To faithfully + * proxy URG we need to read the srteam until SIOCATMARK, and + * then mark the first byte of the next read ar urgent. + */ +#if 0 + tp->snd_up = tp->snd_una + SBUF_LEN(&so->so_snd); +#endif + tp->t_force = 1; + tcp_output(pData, tp); + tp->t_force = 0; + } +} + +/* + * Send urgent data + * There's a lot duplicated code here, but... + */ +int +sosendoob(struct socket *so) +{ + struct sbuf *sb = &so->so_rcv; + char buff[2048]; /* XXX Shouldn't be sending more oob data than this */ + + int n, len; + + LogFlowFunc(("sosendoob so = %R[natsock]\n", so)); + + if (so->so_urgc > sizeof(buff)) + so->so_urgc = sizeof(buff); /* XXX */ + + if (sb->sb_rptr < sb->sb_wptr) + { + /* We can send it directly */ + n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */ + so->so_urgc -= n; + + Log2((" --- sent %d bytes urgent data, %d urgent bytes left\n", + n, so->so_urgc)); + } + else + { + /* + * Since there's no sendv or sendtov like writev, + * we must copy all data to a linear buffer then + * send it all + */ + len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; + if (len > so->so_urgc) + len = so->so_urgc; + memcpy(buff, sb->sb_rptr, len); + so->so_urgc -= len; + if (so->so_urgc) + { + n = sb->sb_wptr - sb->sb_data; + if (n > so->so_urgc) + n = so->so_urgc; + memcpy(buff + len, sb->sb_data, n); + so->so_urgc -= n; + len += n; + } + n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */ +#ifdef DEBUG + if (n != len) + Log(("Didn't send all data urgently XXXXX\n")); +#endif + Log2((" ---2 sent %d bytes urgent data, %d urgent bytes left\n", + n, so->so_urgc)); + } + + sb->sb_cc -= n; + sb->sb_rptr += n; + if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) + sb->sb_rptr -= sb->sb_datalen; + + return n; +} + +/* + * Write data from so_rcv to so's socket, + * updating all sbuf field as necessary + */ +int +sowrite(PNATState pData, struct socket *so) +{ + int n, nn; + struct sbuf *sb = &so->so_rcv; + u_int len = sb->sb_cc; + struct iovec iov[2]; + + STAM_PROFILE_START(&pData->StatIOwrite, a); + STAM_COUNTER_RESET(&pData->StatIOWrite_in_1); + STAM_COUNTER_RESET(&pData->StatIOWrite_in_1_bytes); + STAM_COUNTER_RESET(&pData->StatIOWrite_in_2); + STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_1st_bytes); + STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_2nd_bytes); + STAM_COUNTER_RESET(&pData->StatIOWrite_no_w); + STAM_COUNTER_RESET(&pData->StatIOWrite_rest); + STAM_COUNTER_RESET(&pData->StatIOWrite_rest_bytes); + LogFlowFunc(("so = %R[natsock]\n", so)); + Log2(("%s: so = %R[natsock] so->so_rcv = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb)); + QSOCKET_LOCK(tcb); + SOCKET_LOCK(so); + QSOCKET_UNLOCK(tcb); + if (so->so_urgc) + { + sosendoob(so); + if (sb->sb_cc == 0) + { + SOCKET_UNLOCK(so); + STAM_PROFILE_STOP(&pData->StatIOwrite, a); + return 0; + } + } + + /* + * No need to check if there's something to write, + * sowrite wouldn't have been called otherwise + */ + + len = sb->sb_cc; + + iov[0].iov_base = sb->sb_rptr; + iov[1].iov_base = 0; + iov[1].iov_len = 0; + if (sb->sb_rptr < sb->sb_wptr) + { + iov[0].iov_len = sb->sb_wptr - sb->sb_rptr; + /* Should never succeed, but... */ + if (iov[0].iov_len > len) + iov[0].iov_len = len; + n = 1; + } + else + { + iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; + if (iov[0].iov_len > len) + iov[0].iov_len = len; + len -= iov[0].iov_len; + if (len) + { + iov[1].iov_base = sb->sb_data; + iov[1].iov_len = sb->sb_wptr - sb->sb_data; + if (iov[1].iov_len > len) + iov[1].iov_len = len; + n = 2; + } + else + n = 1; + } + STAM_STATS({ + if (n == 1) + { + STAM_COUNTER_INC(&pData->StatIOWrite_in_1); + STAM_COUNTER_ADD(&pData->StatIOWrite_in_1_bytes, iov[0].iov_len); + } + else + { + STAM_COUNTER_INC(&pData->StatIOWrite_in_2); + STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_1st_bytes, iov[0].iov_len); + STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_2nd_bytes, iov[1].iov_len); + } + }); + /* Check if there's urgent data to send, and if so, send it */ +#ifdef HAVE_READV + nn = writev(so->s, (const struct iovec *)iov, n); +#else + nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0); +#endif + Log2(("%s: wrote(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn)); + /* This should never happen, but people tell me it does *shrug* */ + if ( nn < 0 + && soIgnorableErrorCode(errno)) + { + SOCKET_UNLOCK(so); + STAM_PROFILE_STOP(&pData->StatIOwrite, a); + return 0; + } + + if (nn < 0 || (nn == 0 && iov[0].iov_len > 0)) + { + Log2(("%s: disconnected, so->so_state = %x, errno = %d\n", + RT_GCC_EXTENSION __PRETTY_FUNCTION__, so->so_state, errno)); + sofcantsendmore(so); + tcp_sockclosed(pData, sototcpcb(so)); + SOCKET_UNLOCK(so); + STAM_PROFILE_STOP(&pData->StatIOwrite, a); + return -1; + } + +#ifndef HAVE_READV + if (n == 2 && (unsigned)nn == iov[0].iov_len) + { + int ret; + ret = send(so->s, iov[1].iov_base, iov[1].iov_len, 0); + if (ret > 0) + nn += ret; +# ifdef VBOX_WITH_STATISTICS + if (ret > 0 && ret != (ssize_t)iov[1].iov_len) + { + STAM_COUNTER_INC(&pData->StatIOWrite_rest); + STAM_COUNTER_ADD(&pData->StatIOWrite_rest_bytes, (iov[1].iov_len - ret)); + } +#endif + } + Log2(("%s: wrote(2) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn)); +#endif + + /* Update sbuf */ + sb->sb_cc -= nn; + sb->sb_rptr += nn; + Log2(("%s: update so_rcv (written nn = %d) %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sb)); + if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) + { + sb->sb_rptr -= sb->sb_datalen; + Log2(("%s: alter sb_rptr of so_rcv %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, sb)); + } + + /* + * If in DRAIN mode, and there's no more data, set + * it CANTSENDMORE + */ + if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0) + sofcantsendmore(so); + + SOCKET_UNLOCK(so); + STAM_PROFILE_STOP(&pData->StatIOwrite, a); + return nn; +} + +/* + * recvfrom() a UDP socket + */ +void +sorecvfrom(PNATState pData, struct socket *so) +{ + LogFlowFunc(("sorecvfrom: so = %p\n", so)); + +#ifdef RT_OS_WINDOWS + /* ping is handled with ICMP API in ip_icmpwin.c */ + Assert(so->so_type == IPPROTO_UDP); +#else + if (so->so_type == IPPROTO_ICMP) + { + /* This is a "ping" reply */ + sorecvfrom_icmp_unix(pData, so); + udp_detach(pData, so); + } + else +#endif /* !RT_OS_WINDOWS */ + { + static char achBuf[64 * 1024]; + + /* A "normal" UDP packet */ + struct sockaddr_in addr; + socklen_t addrlen = sizeof(struct sockaddr_in); + struct iovec iov[2]; + ssize_t nread; + struct mbuf *m; + + QSOCKET_LOCK(udb); + SOCKET_LOCK(so); + QSOCKET_UNLOCK(udb); + + m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, slirp_size(pData)); + if (m == NULL) + { + SOCKET_UNLOCK(so); + return; + } + + m->m_data += ETH_HLEN; + m->m_pkthdr.header = mtod(m, void *); + + m->m_data += sizeof(struct udpiphdr); + + /* small packets will fit without copying */ + iov[0].iov_base = mtod(m, char *); + iov[0].iov_len = M_TRAILINGSPACE(m); + + /* large packets will spill into a temp buffer */ + iov[1].iov_base = achBuf; + iov[1].iov_len = sizeof(achBuf); + +#if !defined(RT_OS_WINDOWS) + { + struct msghdr mh; + memset(&mh, 0, sizeof(mh)); + + mh.msg_iov = iov; + mh.msg_iovlen = 2; + mh.msg_name = &addr; + mh.msg_namelen = addrlen; + + nread = recvmsg(so->s, &mh, 0); + } +#else /* RT_OS_WINDOWS */ + { + DWORD nbytes; /* NB: can't use nread b/c of different size */ + DWORD flags = 0; + int status; + AssertCompile(sizeof(WSABUF) == sizeof(struct iovec)); + AssertCompileMembersSameSizeAndOffset(WSABUF, len, struct iovec, iov_len); + AssertCompileMembersSameSizeAndOffset(WSABUF, buf, struct iovec, iov_base); + status = WSARecvFrom(so->s, (WSABUF *)&iov[0], 2, &nbytes, &flags, + (struct sockaddr *)&addr, &addrlen, + NULL, NULL); + if (status != SOCKET_ERROR) + nread = nbytes; + else + nread = -1; + } +#endif + if (nread >= 0) + { + if (nread <= iov[0].iov_len) + m->m_len = nread; + else + { + m->m_len = iov[0].iov_len; + m_append(pData, m, nread - iov[0].iov_len, iov[1].iov_base); + } + Assert(m_length(m, NULL) == (size_t)nread); + + /* + * Hack: domain name lookup will be used the most for UDP, + * and since they'll only be used once there's no need + * for the 4 minute (or whatever) timeout... So we time them + * out much quicker (10 seconds for now...) + */ + if (so->so_expire) + { + if (so->so_fport != RT_H2N_U16_C(53)) + so->so_expire = curtime + SO_EXPIRE; + } + + /* + * DNS proxy requests are forwarded to the real resolver, + * but its socket's so_faddr is that of the DNS proxy + * itself. + * + * last argument should be changed if Slirp will inject IP attributes + */ + if ( pData->fUseDnsProxy + && so->so_fport == RT_H2N_U16_C(53) + && CTL_CHECK(so->so_faddr.s_addr, CTL_DNS)) + dnsproxy_answer(pData, so, m); + + /* packets definetly will be fragmented, could confuse receiver peer. */ + if (nread > if_mtu) + m->m_flags |= M_SKIP_FIREWALL; + + /* + * If this packet was destined for CTL_ADDR, + * make it look like that's where it came from, done by udp_output + */ + udp_output(pData, so, m, &addr); + } + else + { + m_freem(pData, m); + + if (!soIgnorableErrorCode(errno)) + { + u_char code; + if (errno == EHOSTUNREACH) + code = ICMP_UNREACH_HOST; + else if (errno == ENETUNREACH) + code = ICMP_UNREACH_NET; + else + code = ICMP_UNREACH_PORT; + + Log2((" rx error, tx icmp ICMP_UNREACH:%i\n", code)); + icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno)); + so->so_m = NULL; + } + } + + SOCKET_UNLOCK(so); + } +} + +/* + * sendto() a socket + */ +int +sosendto(PNATState pData, struct socket *so, struct mbuf *m) +{ + int ret; + struct sockaddr_in *paddr; + struct sockaddr addr; +#if 0 + struct sockaddr_in host_addr; +#endif + caddr_t buf = 0; + int mlen; + + LogFlowFunc(("sosendto: so = %R[natsock], m = %p\n", so, m)); + + memset(&addr, 0, sizeof(struct sockaddr)); +#ifdef RT_OS_DARWIN + addr.sa_len = sizeof(struct sockaddr_in); +#endif + paddr = (struct sockaddr_in *)&addr; + paddr->sin_family = AF_INET; + if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr) + { + /* It's an alias */ + uint32_t last_byte = RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask; + switch(last_byte) + { +#if 0 + /* handle this case at 'default:' */ + case CTL_BROADCAST: + addr.sin_addr.s_addr = INADDR_BROADCAST; + /* Send the packet to host to fully emulate broadcast */ + /** @todo r=klaus: on Linux host this causes the host to receive + * the packet twice for some reason. And I cannot find any place + * in the man pages which states that sending a broadcast does not + * reach the host itself. */ + host_addr.sin_family = AF_INET; + host_addr.sin_port = so->so_fport; + host_addr.sin_addr = our_addr; + sendto(so->s, m->m_data, m->m_len, 0, + (struct sockaddr *)&host_addr, sizeof (struct sockaddr)); + break; +#endif + case CTL_DNS: + case CTL_ALIAS: + default: + if (last_byte == ~pData->netmask) + paddr->sin_addr.s_addr = INADDR_BROADCAST; + else + paddr->sin_addr = loopback_addr; + break; + } + } + else + paddr->sin_addr = so->so_faddr; + paddr->sin_port = so->so_fport; + + Log2((" sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n", + RT_N2H_U16(paddr->sin_port), inet_ntoa(paddr->sin_addr))); + + /* Don't care what port we get */ + /* + * > nmap -sV -T4 -O -A -v -PU3483 255.255.255.255 + * generates bodyless messages, annoying memmory management system. + */ + mlen = m_length(m, NULL); + if (mlen > 0) + { + buf = RTMemAlloc(mlen); + if (buf == NULL) + { + return -1; + } + m_copydata(m, 0, mlen, buf); + } + ret = sendto(so->s, buf, mlen, 0, + (struct sockaddr *)&addr, sizeof (struct sockaddr)); +#ifdef VBOX_WITH_NAT_SEND2HOME + if (slirpIsWideCasting(pData, so->so_faddr.s_addr)) + { + slirpSend2Home(pData, so, buf, mlen, 0); + } +#endif + if (buf) + RTMemFree(buf); + if (ret < 0) + { + Log2(("UDP: sendto fails (%s)\n", strerror(errno))); + return -1; + } + + /* + * Kill the socket if there's no reply in 4 minutes, + * but only if it's an expirable socket + */ + if (so->so_expire) + so->so_expire = curtime + SO_EXPIRE; + so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */ + return 0; +} + +/* + * XXX This should really be tcp_listen + */ +struct socket * +solisten(PNATState pData, u_int32_t bind_addr, u_int port, u_int32_t laddr, u_int lport, int flags) +{ + struct sockaddr_in addr; + struct socket *so; + socklen_t addrlen = sizeof(addr); + int s, opt = 1; + int status; + + LogFlowFunc(("solisten: port = %d, laddr = %x, lport = %d, flags = %x\n", port, laddr, lport, flags)); + + if ((so = socreate()) == NULL) + { + /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */ + return NULL; + } + + /* Don't tcp_attach... we don't need so_snd nor so_rcv */ + if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL) + { + RTMemFree(so); + return NULL; + } + + SOCKET_LOCK_CREATE(so); + SOCKET_LOCK(so); + QSOCKET_LOCK(tcb); + insque(pData, so,&tcb); + NSOCK_INC(); + QSOCKET_UNLOCK(tcb); + + /* + * SS_FACCEPTONCE sockets must time out. + */ + if (flags & SS_FACCEPTONCE) + so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2; + + so->so_state = (SS_FACCEPTCONN|flags); + so->so_lport = lport; /* Kept in network format */ + so->so_laddr.s_addr = laddr; /* Ditto */ + + memset(&addr, 0, sizeof(addr)); +#ifdef RT_OS_DARWIN + addr.sin_len = sizeof(addr); +#endif + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = bind_addr; + addr.sin_port = port; + + /** + * changing listen(,1->SOMAXCONN) shouldn't be harmful for NAT's TCP/IP stack, + * kernel will choose the optimal value for requests queue length. + * @note: MSDN recommends low (2-4) values for bluetooth networking devices. + */ + if ( ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) + || (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)) < 0) + || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0) + || (listen(s, pData->soMaxConn) < 0)) + { +#ifdef RT_OS_WINDOWS + int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */ + closesocket(s); + QSOCKET_LOCK(tcb); + sofree(pData, so); + QSOCKET_UNLOCK(tcb); + /* Restore the real errno */ + WSASetLastError(tmperrno); +#else + int tmperrno = errno; /* Don't clobber the real reason we failed */ + close(s); + if (sototcpcb(so)) + tcp_close(pData, sototcpcb(so)); + else + sofree(pData, so); + /* Restore the real errno */ + errno = tmperrno; +#endif + return NULL; + } + fd_nonblock(s); + setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int)); + + getsockname(s,(struct sockaddr *)&addr,&addrlen); + so->so_fport = addr.sin_port; + /* set socket buffers */ + opt = pData->socket_rcv; + status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int)); + if (status < 0) + { + LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt)); + goto no_sockopt; + } + opt = pData->socket_snd; + status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int)); + if (status < 0) + { + LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt)); + goto no_sockopt; + } +no_sockopt: + if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr) + so->so_faddr = alias_addr; + else + so->so_faddr = addr.sin_addr; + + so->s = s; + SOCKET_UNLOCK(so); + return so; +} + +/* + * Data is available in so_rcv + * Just write() the data to the socket + * XXX not yet... + * @todo do we really need this function, what it's intended to do? + */ +void +sorwakeup(struct socket *so) +{ + NOREF(so); +#if 0 + sowrite(so); + FD_CLR(so->s,&writefds); +#endif +} + +/* + * Data has been freed in so_snd + * We have room for a read() if we want to + * For now, don't read, it'll be done in the main loop + */ +void +sowwakeup(struct socket *so) +{ + NOREF(so); +} + +/* + * Various session state calls + * XXX Should be #define's + * The socket state stuff needs work, these often get call 2 or 3 + * times each when only 1 was needed + */ +void +soisfconnecting(struct socket *so) +{ + so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE| + SS_FCANTSENDMORE|SS_FWDRAIN); + so->so_state |= SS_ISFCONNECTING; /* Clobber other states */ +} + +void +soisfconnected(struct socket *so) +{ + LogFlowFunc(("ENTER: so:%R[natsock]\n", so)); + so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF); + so->so_state |= SS_ISFCONNECTED; /* Clobber other states */ + LogFlowFunc(("LEAVE: so:%R[natsock]\n", so)); +} + +int +sofcantrcvmore(struct socket *so) +{ + int err = 0; + + LogFlowFunc(("ENTER: so:%R[natsock]\n", so)); + if ((so->so_state & SS_NOFDREF) == 0) + { + /* + * If remote closes first and then sends an RST, the recv() in + * soread() will keep reporting EOF without any error + * indication. As far as I can tell the only way to detect + * this on Linux is to check if shutdown() succeeds here (but + * see below). + * + * OTOH on OS X shutdown() "helpfully" checks if remote has + * already closed and then always returns ENOTCONN + * immediately. + */ + int status = shutdown(so->s, SHUT_RD); +#if defined(RT_OS_LINUX) + if (status < 0) + err = errno; +#else + RT_NOREF(status); +#endif + } + so->so_state &= ~(SS_ISFCONNECTING); + if (so->so_state & SS_FCANTSENDMORE) + { +#if defined(RT_OS_LINUX) + /* + * If we have closed first, and remote closes, shutdown will + * return ENOTCONN, but this is expected. Don't tell the + * caller there was an error. + */ + if (err == ENOTCONN) + err = 0; +#endif + so->so_state = SS_NOFDREF; /* Don't select it */ + /* XXX close() here as well? */ + } + else + so->so_state |= SS_FCANTRCVMORE; + + LogFlowFunc(("LEAVE: %d\n", err)); + return err; +} + +void +sofcantsendmore(struct socket *so) +{ + LogFlowFunc(("ENTER: so:%R[natsock]\n", so)); + if ((so->so_state & SS_NOFDREF) == 0) + shutdown(so->s, 1); /* send FIN to fhost */ + + so->so_state &= ~(SS_ISFCONNECTING); + if (so->so_state & SS_FCANTRCVMORE) + so->so_state = SS_NOFDREF; /* as above */ + else + so->so_state |= SS_FCANTSENDMORE; + LogFlowFuncLeave(); +} + +void +soisfdisconnected(struct socket *so) +{ + NOREF(so); +#if 0 + so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED); + close(so->s); + so->so_state = SS_ISFDISCONNECTED; + /* + * XXX Do nothing ... ? + */ +#endif +} + +/* + * Set write drain mode + * Set CANTSENDMORE once all data has been write()n + */ +void +sofwdrain(struct socket *so) +{ + if (SBUF_LEN(&so->so_rcv)) + so->so_state |= SS_FWDRAIN; + else + sofcantsendmore(so); +} + +#if !defined(RT_OS_WINDOWS) +static void +send_icmp_to_guest(PNATState pData, char *buff, size_t len, const struct sockaddr_in *addr) +{ + struct ip *ip; + uint32_t dst, src; + char ip_copy[256]; + struct icmp *icp; + int old_ip_len = 0; + int hlen, original_hlen = 0; + struct mbuf *m; + struct icmp_msg *icm; + uint8_t proto; + int type = 0; + + ip = (struct ip *)buff; + /* Fix ip->ip_len to contain the total packet length including the header + * in _host_ byte order for all OSes. On Darwin, that value already is in + * host byte order. Solaris and Darwin report only the payload. */ +#ifndef RT_OS_DARWIN + ip->ip_len = RT_N2H_U16(ip->ip_len); +#endif + hlen = (ip->ip_hl << 2); +#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN) + ip->ip_len += hlen; +#endif + if (ip->ip_len < hlen + ICMP_MINLEN) + { + Log(("send_icmp_to_guest: ICMP header is too small to understand which type/subtype of the datagram\n")); + return; + } + icp = (struct icmp *)((char *)ip + hlen); + + Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code)); + if ( icp->icmp_type != ICMP_ECHOREPLY + && icp->icmp_type != ICMP_TIMXCEED + && icp->icmp_type != ICMP_UNREACH) + { + return; + } + + /* + * ICMP_ECHOREPLY, ICMP_TIMXCEED, ICMP_UNREACH minimal header size is + * ICMP_ECHOREPLY assuming data 0 + * icmp_{type(8), code(8), cksum(16),identifier(16),seqnum(16)} + */ + if (ip->ip_len < hlen + 8) + { + Log(("send_icmp_to_guest: NAT accept ICMP_{ECHOREPLY, TIMXCEED, UNREACH} the minimum size is 64 (see rfc792)\n")); + return; + } + + type = icp->icmp_type; + if ( type == ICMP_TIMXCEED + || type == ICMP_UNREACH) + { + /* + * ICMP_TIMXCEED, ICMP_UNREACH minimal header size is + * icmp_{type(8), code(8), cksum(16),unused(32)} + IP header + 64 bit of original datagram + */ + if (ip->ip_len < hlen + 2*8 + sizeof(struct ip)) + { + Log(("send_icmp_to_guest: NAT accept ICMP_{TIMXCEED, UNREACH} the minimum size of ipheader + 64 bit of data (see rfc792)\n")); + return; + } + ip = &icp->icmp_ip; + } + + icm = icmp_find_original_mbuf(pData, ip); + if (icm == NULL) + { + Log(("NAT: Can't find the corresponding packet for the received ICMP\n")); + return; + } + + m = icm->im_m; + if (!m) + { + LogFunc(("%R[natsock] hasn't stored it's mbuf on sent\n", icm->im_so)); + goto done; + } + + src = addr->sin_addr.s_addr; + if (type == ICMP_ECHOREPLY) + { + struct ip *ip0 = mtod(m, struct ip *); + struct icmp *icp0 = (struct icmp *)((char *)ip0 + (ip0->ip_hl << 2)); + if (icp0->icmp_type != ICMP_ECHO) + { + Log(("NAT: we haven't found echo for this reply\n")); + goto done; + } + /* + * while combining buffer to send (see ip_icmp.c) we control ICMP header only, + * IP header combined by OS network stack, our local copy of IP header contians values + * in host byte order so no byte order conversion is required. IP headers fields are converting + * in ip_output0 routine only. + */ + if ( (ip->ip_len - hlen) + != (ip0->ip_len - (ip0->ip_hl << 2))) + { + Log(("NAT: ECHO(%d) lenght doesn't match ECHOREPLY(%d)\n", + (ip->ip_len - hlen), (ip0->ip_len - (ip0->ip_hl << 2)))); + goto done; + } + } + + /* ip points on origianal ip header */ + ip = mtod(m, struct ip *); + proto = ip->ip_p; + /* Now ip is pointing on header we've sent from guest */ + if ( icp->icmp_type == ICMP_TIMXCEED + || icp->icmp_type == ICMP_UNREACH) + { + old_ip_len = (ip->ip_hl << 2) + 64; + if (old_ip_len > sizeof(ip_copy)) + old_ip_len = sizeof(ip_copy); + memcpy(ip_copy, ip, old_ip_len); + } + + /* source address from original IP packet*/ + dst = ip->ip_src.s_addr; + + /* overide ther tail of old packet */ + ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */ + original_hlen = ip->ip_hl << 2; + /* saves original ip header and options */ + m_copyback(pData, m, original_hlen, len - hlen, buff + hlen); + ip->ip_len = m_length(m, NULL); + ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/ + + icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2)); + type = icp->icmp_type; + if ( type == ICMP_TIMXCEED + || type == ICMP_UNREACH) + { + /* according RFC 793 error messages required copy of initial IP header + 64 bit */ + memcpy(&icp->icmp_ip, ip_copy, old_ip_len); + + /* undo byte order conversions done in ip_input() */ + HTONS(icp->icmp_ip.ip_len); + HTONS(icp->icmp_ip.ip_id); + HTONS(icp->icmp_ip.ip_off); + + ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */ + } + + ip->ip_src.s_addr = src; + ip->ip_dst.s_addr = dst; + icmp_reflect(pData, m); + /* m was freed */ + icm->im_m = NULL; + + done: + icmp_msg_delete(pData, icm); +} + +static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so) +{ + struct sockaddr_in addr; + socklen_t addrlen = sizeof(struct sockaddr_in); + struct ip ip; + char *buff; + int len = 0; + + /* 1- step: read the ip header */ + len = recvfrom(so->s, &ip, sizeof(struct ip), MSG_PEEK, + (struct sockaddr *)&addr, &addrlen); + if ( len < 0 + && ( soIgnorableErrorCode(errno) + || errno == ENOTCONN)) + { + Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm (would block)\n")); + return; + } + + if ( len < sizeof(struct ip) + || len < 0 + || len == 0) + { + u_char code; + code = ICMP_UNREACH_PORT; + + if (errno == EHOSTUNREACH) + code = ICMP_UNREACH_HOST; + else if (errno == ENETUNREACH) + code = ICMP_UNREACH_NET; + + LogRel(("NAT: UDP ICMP rx errno=%d (%s)\n", errno, strerror(errno))); + icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno)); + so->so_m = NULL; + Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm\n")); + return; + } + /* basic check of IP header */ + if ( ip.ip_v != IPVERSION +# ifndef RT_OS_DARWIN + || ip.ip_p != IPPROTO_ICMP +# endif + ) + { + Log(("sorecvfrom_icmp_unix: 1 - step IP isn't IPv4\n")); + return; + } +# ifndef RT_OS_DARWIN + /* Darwin reports the IP length already in host byte order. */ + ip.ip_len = RT_N2H_U16(ip.ip_len); +# endif +# if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN) + /* Solaris and Darwin report the payload only */ + ip.ip_len += (ip.ip_hl << 2); +# endif + /* Note: ip->ip_len in host byte order (all OS) */ + len = ip.ip_len; + buff = RTMemAlloc(len); + if (buff == NULL) + { + Log(("sorecvfrom_icmp_unix: 1 - step can't allocate enought room for datagram\n")); + return; + } + /* 2 - step: we're reading rest of the datagramm to the buffer */ + addrlen = sizeof(struct sockaddr_in); + memset(&addr, 0, addrlen); + len = recvfrom(so->s, buff, len, 0, + (struct sockaddr *)&addr, &addrlen); + if ( len < 0 + && ( soIgnorableErrorCode(errno) + || errno == ENOTCONN)) + { + Log(("sorecvfrom_icmp_unix: 2 - step can't read IP body (would block expected:%d)\n", + ip.ip_len)); + RTMemFree(buff); + return; + } + if ( len < 0 + || len == 0) + { + Log(("sorecvfrom_icmp_unix: 2 - step read of the rest of datagramm is fallen (errno:%d, len:%d expected: %d)\n", + errno, len, (ip.ip_len - sizeof(struct ip)))); + RTMemFree(buff); + return; + } + /* len is modified in 2nd read, when the rest of the datagramm was read */ + send_icmp_to_guest(pData, buff, len, &addr); + RTMemFree(buff); +} +#endif /* !RT_OS_WINDOWS */ diff --git a/src/VBox/Devices/Network/slirp/socket.h b/src/VBox/Devices/Network/slirp/socket.h new file mode 100644 index 00000000..a518412f --- /dev/null +++ b/src/VBox/Devices/Network/slirp/socket.h @@ -0,0 +1,205 @@ +/* $Id: socket.h $ */ +/** @file + * NAT - socket handling (declarations/defines). + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +/* MINE */ + +#ifndef _SLIRP_SOCKET_H_ +#define _SLIRP_SOCKET_H_ + +#define SO_EXPIRE 240000 +#define SO_EXPIREFAST 10000 + +/* + * Our socket structure + */ + +struct socket +{ + struct socket *so_next; + struct socket *so_prev; /* For a linked list of sockets */ + +#if !defined(RT_OS_WINDOWS) + int s; /* The actual socket */ +#else + union { + int s; + HANDLE sh; + }; + uint64_t so_icmp_id; /* XXX: hack */ + uint64_t so_icmp_seq; /* XXX: hack */ +#endif + + /* XXX union these with not-yet-used sbuf params */ + struct mbuf *so_m; /* Pointer to the original SYN packet, + * for non-blocking connect()'s, and + * PING reply's */ + struct tcpiphdr *so_ti; /* Pointer to the original ti within + * so_mconn, for non-blocking connections */ + uint8_t *so_ohdr; /* unmolested IP header of the datagram in so_m */ + caddr_t so_optp; /* tcp options in so_m */ + int so_optlen; /* length of options in so_m */ + int so_urgc; + struct in_addr so_faddr; /* foreign host table entry */ + struct in_addr so_laddr; /* local host table entry */ + u_int16_t so_fport; /* foreign port */ + u_int16_t so_lport; /* local port */ + u_int16_t so_hlport; /* host local port */ + struct in_addr so_hladdr; /* local host addr */ + + u_int8_t so_iptos; /* Type of service */ + + uint8_t so_sottl; /* cached socket's IP_TTL option */ + uint8_t so_sotos; /* cached socket's IP_TOS option */ + int8_t so_sodf; /* cached socket's DF option */ + + u_char so_type; /* Type of socket, UDP or TCP */ + int so_state; /* internal state flags SS_*, below */ + + struct tcpcb *so_tcpcb; /* pointer to TCP protocol control block */ + u_int so_expire; /* When the socket will expire */ + + int so_queued; /* Number of packets queued from this socket */ + int so_nqueued; /* Number of packets queued in a row + * Used to determine when to "downgrade" a session + * from fastq to batchq */ + + struct sbuf so_rcv; /* Receive buffer */ + struct sbuf so_snd; /* Send buffer */ +#ifndef RT_OS_WINDOWS + int so_poll_index; +#endif /* !RT_OS_WINDOWS */ + /* + * FD_CLOSE/POLLHUP event has been occurred on socket + */ + int so_close; + + void (* so_timeout)(PNATState pData, struct socket *so, void *arg); + void *so_timeout_arg; + + /** These flags (''fUnderPolling'' and ''fShouldBeRemoved'') introduced to + * to let polling routine gain control over freeing socket whatever level of + * TCP/IP initiated socket releasing. + * So polling routine when start processing socket alter it's state to + * ''fUnderPolling'' to 1, and clean (set to 0) when it finish. + * When polling routine calls functions it should be ensure on return, + * whether ''fShouldBeRemoved'' set or not, and depending on state call + * ''sofree'' or continue socket processing. + * On ''fShouldBeRemoved'' equal to 1, polling routine should call ''sofree'', + * clearing ''fUnderPolling'' to do real freeng of the socket and removing from + * the queue. + * @todo: perhaps, to simplefy the things we need some helper function. + * @note: it's used like a bool, I use 'int' to avoid compiler warnings + * appearing if [-Wc++-compat] used. + */ + int fUnderPolling; + /** This flag used by ''sofree'' function in following manner + * + * fUnderPolling = 1, then we don't remove socket from the queue, just + * alter value ''fShouldBeRemoved'' to 1, else we do removal. + */ + int fShouldBeRemoved; +}; + +# define SOCKET_LOCK(so) do {} while (0) +# define SOCKET_UNLOCK(so) do {} while (0) +# define SOCKET_LOCK_CREATE(so) do {} while (0) +# define SOCKET_LOCK_DESTROY(so) do {} while (0) + +/* + * Socket state bits. (peer means the host on the Internet, + * local host means the host on the other end of the modem) + */ +#define SS_NOFDREF 0x001 /* No fd reference */ + +#define SS_ISFCONNECTING 0x002 /* Socket is connecting to peer (non-blocking connect()'s) */ +#define SS_ISFCONNECTED 0x004 /* Socket is connected to peer */ +#define SS_FCANTRCVMORE 0x008 /* Socket can't receive more from peer (for half-closes) */ +#define SS_FCANTSENDMORE 0x010 /* Socket can't send more to peer (for half-closes) */ +/* #define SS_ISFDISCONNECTED 0x020*/ /* Socket has disconnected from peer, in 2MSL state */ +#define SS_FWDRAIN 0x040 /* We received a FIN, drain data and set SS_FCANTSENDMORE */ + +/* #define SS_CTL 0x080 */ +#define SS_FACCEPTCONN 0x100 /* Socket is accepting connections from a host on the internet */ +#define SS_FACCEPTONCE 0x200 /* If set, the SS_FACCEPTCONN socket will die after one accept */ + +extern struct socket tcb; + +#if defined(DECLARE_IOVEC) && !defined(HAVE_READV) +# if !defined(RT_OS_WINDOWS) +struct iovec +{ + char *iov_base; + size_t iov_len; +}; +# else +/* make it congruent with WSABUF */ +struct iovec +{ + ULONG iov_len; + char *iov_base; +}; +# endif +#endif + +void so_init (void); +struct socket * solookup (struct socket *, struct in_addr, u_int, struct in_addr, u_int); +struct socket * socreate (void); +void sofree (PNATState, struct socket *); +int sobind(PNATState, struct socket *); +int soread (PNATState, struct socket *); +void sorecvoob (PNATState, struct socket *); +int sosendoob (struct socket *); +int sowrite (PNATState, struct socket *); +void sorecvfrom (PNATState, struct socket *); +int sosendto (PNATState, struct socket *, struct mbuf *); +struct socket * solisten (PNATState, u_int32_t, u_int, u_int32_t, u_int, int); +void sorwakeup (struct socket *); +void sowwakeup (struct socket *); +void soisfconnecting (register struct socket *); +void soisfconnected (register struct socket *); +int sofcantrcvmore (struct socket *); +void sofcantsendmore (struct socket *); +void soisfdisconnected (struct socket *); +void sofwdrain (struct socket *); + +static inline int soIgnorableErrorCode(int iErrorCode) +{ + return ( iErrorCode == EINPROGRESS + || iErrorCode == EAGAIN + || iErrorCode == EWOULDBLOCK); +} + +#endif /* _SOCKET_H_ */ diff --git a/src/VBox/Devices/Network/slirp/tcp.h b/src/VBox/Devices/Network/slirp/tcp.h new file mode 100644 index 00000000..8ca6a17c --- /dev/null +++ b/src/VBox/Devices/Network/slirp/tcp.h @@ -0,0 +1,212 @@ +/* $Id: tcp.h $ */ +/** @file + * NAT - TCP. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp.h 8.1 (Berkeley) 6/10/93 + * tcp.h,v 1.3 1994/08/21 05:27:34 paul Exp + */ + +#ifndef _TCP_H_ +#define _TCP_H_ + +typedef uint32_t tcp_seq; + +#define PR_SLOWHZ 2 /* 2 slow timeouts per second (approx) */ +#define PR_FASTHZ 5 /* 5 fast timeouts per second (not important) */ + +extern int tcp_rcvspace; +extern int tcp_sndspace; +extern struct socket *tcp_last_so; + +#define TCP_SNDSPACE 8192 +#define TCP_RCVSPACE 8192 + +/* + * TCP header. + * Per RFC 793, September, 1981. + */ +struct tcphdr +{ + uint16_t th_sport; /* source port */ + uint16_t th_dport; /* destination port */ + tcp_seq th_seq; /* sequence number */ + tcp_seq th_ack; /* acknowledgement number */ +#ifdef WORDS_BIGENDIAN +# ifdef _MSC_VER + uint8_t th_off:4; /* data offset */ + uint8_t th_x2:4; /* (unused) */ +# else + unsigned th_off:4; /* data offset */ + unsigned th_x2:4; /* (unused) */ +# endif +#else +# ifdef _MSC_VER + uint8_t th_x2:4; /* (unused) */ + uint8_t th_off:4; /* data offset */ +# else + unsigned th_x2:4; /* (unused) */ + unsigned th_off:4; /* data offset */ +# endif +#endif + uint8_t th_flags; +#define TH_FIN 0x01 +#define TH_SYN 0x02 +#define TH_RST 0x04 +#define TH_PUSH 0x08 +#define TH_ACK 0x10 +#define TH_URG 0x20 + uint16_t th_win; /* window */ + uint16_t th_sum; /* checksum */ + uint16_t th_urp; /* urgent pointer */ +}; +AssertCompileSize(struct tcphdr, 20); + +#include "tcp_var.h" + +#define TCPOPT_EOL 0 +#define TCPOPT_NOP 1 +#define TCPOPT_MAXSEG 2 +#define TCPOLEN_MAXSEG 4 +#define TCPOPT_WINDOW 3 +#define TCPOLEN_WINDOW 3 +#define TCPOPT_SACK_PERMITTED 4 /* Experimental */ +#define TCPOLEN_SACK_PERMITTED 2 +#define TCPOPT_SACK 5 /* Experimental */ +#define TCPOPT_TIMESTAMP 8 +#define TCPOLEN_TIMESTAMP 10 +#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */ + +#define TCPOPT_TSTAMP_HDR \ + (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP) + +/* + * Default maximum segment size for TCP. + * With an IP MSS of 576, this is 536, + * but 512 is probably more convenient. + * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)). + * + * We make this 1460 because we only care about Ethernet in the qemu context. + */ +#define TCP_MSS (if_mtu - 80) + +#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ + +#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ + +/* + * User-settable options (used with setsockopt). + * + * We don't use the system headers on unix because we have conflicting + * local structures. We can't avoid the system definitions on Windows, + * so we undefine them. + */ +#undef TCP_NODELAY +#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ +#undef TCP_MAXSEG +/* #define TCP_MAXSEG 0x02 */ /* set maximum segment size */ + +/* + * TCP FSM state definitions. + * Per RFC793, September, 1981. + */ + +#define TCP_NSTATES 11 + +#define TCPS_CLOSED 0 /* closed */ +#define TCPS_LISTEN 1 /* listening for connection */ +#define TCPS_SYN_SENT 2 /* active, have sent syn */ +#define TCPS_SYN_RECEIVED 3 /* have send and received syn */ +/* states < TCPS_ESTABLISHED are those where connections not established */ +#define TCPS_ESTABLISHED 4 /* established */ +#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ +/* states > TCPS_CLOSE_WAIT are those where user has closed */ +#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ +#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ +#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ +/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ +#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ +#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ + +#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) +#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) +#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) + +/* + * TCP sequence numbers are 32 bit integers operated on with modular arithmetic. + * These macros can be used to compare such integers. + */ +#define SEQ_LT(a,b) ((int)((a)-(b)) < 0) +#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0) +#define SEQ_GT(a,b) ((int)((a)-(b)) > 0) +#define SEQ_GEQ(a,b) ((int)((a)-(b)) >= 0) + +/* + * Macros to initialize tcp sequence numbers for + * send and receive from initial send and receive + * sequence numbers. + */ +#define tcp_rcvseqinit(tp) \ + (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 + +#define tcp_sendseqinit(tp) \ + (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = (tp)->iss + +#define TCP_ISSINCR (125*1024) /* increment for tcp_iss each second */ + + +extern const char * const tcpstates[]; + +#endif diff --git a/src/VBox/Devices/Network/slirp/tcp_input.c b/src/VBox/Devices/Network/slirp/tcp_input.c new file mode 100644 index 00000000..30abb131 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/tcp_input.c @@ -0,0 +1,2060 @@ +/* $Id: tcp_input.c $ */ +/** @file + * NAT - TCP input. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 + * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> +#include "ip_icmp.h" + + +#if 0 /* code using this macroses is commented out */ +# define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) + +/* for modulo comparisons of timestamps */ +# define TSTMP_LT(a, b) ((int)((a)-(b)) < 0) +# define TSTMP_GEQ(a, b) ((int)((a)-(b)) >= 0) +#endif + +#ifndef TCP_ACK_HACK +#define DELAY_ACK(tp, ti) \ + if (ti->ti_flags & TH_PUSH) \ + tp->t_flags |= TF_ACKNOW; \ + else \ + tp->t_flags |= TF_DELACK; +#else /* !TCP_ACK_HACK */ +#define DELAY_ACK(tp, ign) \ + tp->t_flags |= TF_DELACK; +#endif /* TCP_ACK_HACK */ + + +/* + * deps: netinet/tcp_reass.c + * tcp_reass_maxqlen = 48 (deafault) + * tcp_reass_maxseg = nmbclusters/16 (nmbclusters = 1024 + maxusers * 64 from kern/kern_mbuf.c let's say 256) + */ +int +tcp_reass(PNATState pData, struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m) +{ + struct tseg_qent *q; + struct tseg_qent *p = NULL; + struct tseg_qent *nq; + struct tseg_qent *te = NULL; + struct socket *so = tp->t_socket; + int flags; + STAM_PROFILE_START(&pData->StatTCP_reassamble, tcp_reassamble); + LogFlowFunc(("ENTER: pData:%p, tp:%R[tcpcb793], th:%p, tlenp:%p, m:%p\n", pData, tp, th, tlenp, m)); + + /* + * XXX: tcp_reass() is rather inefficient with its data structures + * and should be rewritten (see NetBSD for optimizations). While + * doing that it should move to its own file tcp_reass.c. + */ + + /* + * Call with th==NULL after become established to + * force pre-ESTABLISHED data up to user socket. + */ + if (th == NULL) + { + LogFlowFunc(("%d -> present\n", __LINE__)); + goto present; + } + + /* + * Limit the number of segments in the reassembly queue to prevent + * holding on to too many segments (and thus running out of mbufs). + * Make sure to let the missing segment through which caused this + * queue. Always keep one global queue entry spare to be able to + * process the missing segment. + */ + if ( th->th_seq != tp->rcv_nxt + && ( tcp_reass_qsize + 1 >= tcp_reass_maxseg + || tp->t_segqlen >= tcp_reass_maxqlen)) + { + tcp_reass_overflows++; + tcpstat.tcps_rcvmemdrop++; + m_freem(pData, m); + *tlenp = 0; + STAM_PROFILE_STOP(&pData->StatTCP_reassamble, tcp_reassamble); + LogFlowFuncLeave(); + return (0); + } + + /* + * Allocate a new queue entry. If we can't, or hit the zone limit + * just drop the pkt. + */ + te = RTMemAlloc(sizeof(struct tseg_qent)); + if (te == NULL) + { + tcpstat.tcps_rcvmemdrop++; + m_freem(pData, m); + *tlenp = 0; + STAM_PROFILE_STOP(&pData->StatTCP_reassamble, tcp_reassamble); + LogFlowFuncLeave(); + return (0); + } + tp->t_segqlen++; + tcp_reass_qsize++; + + /* + * Find a segment which begins after this one does. + */ + LIST_FOREACH(q, &tp->t_segq, tqe_q) + { + if (SEQ_GT(q->tqe_th->th_seq, th->th_seq)) + break; + p = q; + } + + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us. + */ + if (p != NULL) + { + int i; + /* conversion to int (in i) handles seq wraparound */ + i = p->tqe_th->th_seq + p->tqe_len - th->th_seq; + if (i > 0) + { + if (i >= *tlenp) + { + tcpstat.tcps_rcvduppack++; + tcpstat.tcps_rcvdupbyte += *tlenp; + m_freem(pData, m); + RTMemFree(te); + tp->t_segqlen--; + tcp_reass_qsize--; + /* + * Try to present any queued data + * at the left window edge to the user. + * This is needed after the 3-WHS + * completes. + */ + LogFlowFunc(("%d -> present\n", __LINE__)); + goto present; /* ??? */ + } + m_adj(m, i); + *tlenp -= i; + th->th_seq += i; + } + } + tcpstat.tcps_rcvoopack++; + tcpstat.tcps_rcvoobyte += *tlenp; + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + while (q) + { + int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq; + if (i <= 0) + break; + if (i < q->tqe_len) + { + q->tqe_th->th_seq += i; + q->tqe_len -= i; + m_adj(q->tqe_m, i); + break; + } + + nq = LIST_NEXT(q, tqe_q); + LIST_REMOVE(q, tqe_q); + m_freem(pData, q->tqe_m); + RTMemFree(q); + tp->t_segqlen--; + tcp_reass_qsize--; + q = nq; + } + + /* Insert the new segment queue entry into place. */ + te->tqe_m = m; + te->tqe_th = th; + te->tqe_len = *tlenp; + + if (p == NULL) + { + LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q); + } + else + { + LIST_INSERT_AFTER(p, te, tqe_q); + } + +present: + /* + * Present data to user, advancing rcv_nxt through + * completed sequence space. + */ + if (!TCPS_HAVEESTABLISHED(tp->t_state)) + { + STAM_PROFILE_STOP(&pData->StatTCP_reassamble, tcp_reassamble); + return (0); + } + q = LIST_FIRST(&tp->t_segq); + if (!q || q->tqe_th->th_seq != tp->rcv_nxt) + { + STAM_PROFILE_STOP(&pData->StatTCP_reassamble, tcp_reassamble); + return (0); + } + do + { + tp->rcv_nxt += q->tqe_len; + flags = q->tqe_th->th_flags & TH_FIN; + nq = LIST_NEXT(q, tqe_q); + LIST_REMOVE(q, tqe_q); + /* XXX: This place should be checked for the same code in + * original BSD code for Slirp and current BSD used SS_FCANTRCVMORE + */ + if (so->so_state & SS_FCANTSENDMORE) + m_freem(pData, q->tqe_m); + else + sbappend(pData, so, q->tqe_m); + RTMemFree(q); + tp->t_segqlen--; + tcp_reass_qsize--; + q = nq; + } + while (q && q->tqe_th->th_seq == tp->rcv_nxt); + + STAM_PROFILE_STOP(&pData->StatTCP_reassamble, tcp_reassamble); + return flags; +} + +/* + * TCP input routine, follows pages 65-76 of the + * protocol specification dated September, 1981 very closely. + */ +void +tcp_input(PNATState pData, register struct mbuf *m, int iphlen, struct socket *inso) +{ + struct ip *ip, *save_ip; + register struct tcpiphdr *ti; + caddr_t optp = NULL; + int optlen = 0; + int len, off; + int tlen = 0; /* Shut up MSC (didn't check whether MSC was right). */ + register struct tcpcb *tp = 0; + register int tiflags; + struct socket *so = 0; + int todrop, acked, ourfinisacked, needoutput = 0; +/* int dropsocket = 0; */ + int iss = 0; + u_long tiwin; +/* int ts_present = 0; */ + unsigned ohdrlen; + uint8_t ohdr[60 + 8]; /* max IP header plus 8 bytes of payload for icmp */ + + STAM_PROFILE_START(&pData->StatTCP_input, counter_input); + + LogFlow(("tcp_input: m = %p, iphlen = %2d, inso = %R[natsock]\n", m, iphlen, inso)); + + if (inso != NULL) + { + QSOCKET_LOCK(tcb); + SOCKET_LOCK(inso); + QSOCKET_UNLOCK(tcb); + } + /* + * If called with m == 0, then we're continuing the connect + */ + if (m == NULL) + { + so = inso; + Log4(("NAT: tcp_input: %R[natsock]\n", so)); + + /* Re-set a few variables */ + tp = sototcpcb(so); + + m = so->so_m; + optp = so->so_optp; /* points into m if set */ + optlen = so->so_optlen; + so->so_m = NULL; + so->so_optp = 0; + so->so_optlen = 0; + + if (RT_LIKELY(so->so_ohdr != NULL)) + { + RTMemFree(so->so_ohdr); + so->so_ohdr = NULL; + } + + ti = so->so_ti; + + /** @todo (vvl) clarify why it might happens */ + if (ti == NULL) + { + LogRel(("NAT: ti is null. can't do any reseting connection actions\n")); + /* mbuf should be cleared in sofree called from tcp_close */ + tcp_close(pData, tp); + STAM_PROFILE_STOP(&pData->StatTCP_input, counter_input); + LogFlowFuncLeave(); + return; + } + + tiwin = ti->ti_win; + tiflags = ti->ti_flags; + + LogFlowFunc(("%d -> cont_conn\n", __LINE__)); + goto cont_conn; + } + + tcpstat.tcps_rcvtotal++; + + ip = mtod(m, struct ip *); + + /* ip_input() subtracts iphlen from ip::ip_len */ + AssertStmt(ip->ip_len + iphlen == (ssize_t)m_length(m, NULL), goto drop); + if (RT_UNLIKELY(ip->ip_len < sizeof(struct tcphdr))) + { + /* tcps_rcvshort++; */ + goto drop; + } + + /* + * Save a copy of the IP header in case we want to restore it for + * sending an ICMP error message in response. + * + * XXX: This function should really be fixed to not strip IP + * options, to not overwrite IP header and to use "tlen" local + * variable (instead of ti->ti_len), then "m" could be passed to + * icmp_error() directly. + */ + ohdrlen = iphlen + 8; + m_copydata(m, 0, ohdrlen, (caddr_t)ohdr); + save_ip = (struct ip *)ohdr; + save_ip->ip_len += iphlen; /* undo change by ip_input() */ + + + /* + * Get IP and TCP header together in first mbuf. + * Note: IP leaves IP header in first mbuf. + */ + ti = mtod(m, struct tcpiphdr *); + if (iphlen > sizeof(struct ip)) + { + ip_stripoptions(m, (struct mbuf *)0); + iphlen = sizeof(struct ip); + } + + /* + * Checksum extended TCP header and data. + */ + tlen = ((struct ip *)ti)->ip_len; + memset(ti->ti_x1, 0, 9); + ti->ti_len = RT_H2N_U16((u_int16_t)tlen); + len = sizeof(struct ip) + tlen; + /* keep checksum for ICMP reply + * ti->ti_sum = cksum(m, len); + * if (ti->ti_sum) { */ + if (cksum(m, len)) + { + tcpstat.tcps_rcvbadsum++; + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + } + + /* + * Check that TCP offset makes sense, + * pull out TCP options and adjust length. XXX + */ + off = ti->ti_off << 2; + if ( off < sizeof (struct tcphdr) + || off > tlen) + { + tcpstat.tcps_rcvbadoff++; + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + } + tlen -= off; + ti->ti_len = tlen; + if (off > sizeof (struct tcphdr)) + { + optlen = off - sizeof (struct tcphdr); + optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr); + + /* + * Do quick retrieval of timestamp options ("options + * prediction?"). If timestamp is the only option and it's + * formatted as recommended in RFC 1323 appendix A, we + * quickly get the values now and not bother calling + * tcp_dooptions(), etc. + */ +#if 0 + if (( optlen == TCPOLEN_TSTAMP_APPA + || ( optlen > TCPOLEN_TSTAMP_APPA + && optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) && + *(u_int32_t *)optp == RT_H2N_U32_C(TCPOPT_TSTAMP_HDR) && + (ti->ti_flags & TH_SYN) == 0) + { + ts_present = 1; + ts_val = RT_N2H_U32(*(u_int32_t *)(optp + 4)); + ts_ecr = RT_N2H_U32(*(u_int32_t *)(optp + 8)); + optp = NULL; / * we have parsed the options * / + } +#endif + } + tiflags = ti->ti_flags; + + /* + * Convert TCP protocol specific fields to host format. + */ + NTOHL(ti->ti_seq); + NTOHL(ti->ti_ack); + NTOHS(ti->ti_win); + NTOHS(ti->ti_urp); + + /* + * Drop TCP, IP headers and TCP options. + */ + m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + + /* + * Locate pcb for segment. + */ +findso: + LogFlowFunc(("(enter) findso: %R[natsock]\n", so)); + if (so != NULL && so != &tcb) + SOCKET_UNLOCK(so); + QSOCKET_LOCK(tcb); + so = tcp_last_so; + if ( so->so_fport != ti->ti_dport + || so->so_lport != ti->ti_sport + || so->so_laddr.s_addr != ti->ti_src.s_addr + || so->so_faddr.s_addr != ti->ti_dst.s_addr) + { + QSOCKET_UNLOCK(tcb); + /** @todo fix SOLOOKUP macrodefinition to be usable here */ + so = solookup(&tcb, ti->ti_src, ti->ti_sport, + ti->ti_dst, ti->ti_dport); + if (so) + { + tcp_last_so = so; + } + ++tcpstat.tcps_socachemiss; + } + else + { + SOCKET_LOCK(so); + QSOCKET_UNLOCK(tcb); + } + LogFlowFunc(("(leave) findso: %R[natsock]\n", so)); + + /* + * Check whether the packet is targeting CTL_ALIAS and drop it if the connection wasn't + * initiated by localhost (so == NULL), see @bugref{9896}. + */ + if ( (CTL_CHECK(ti->ti_dst.s_addr, CTL_ALIAS)) + && !pData->fLocalhostReachable + && !so) + { + LogFlowFunc(("Packet for CTL_ALIAS and fLocalhostReachable=false so=NULL -> drop\n")); + goto drop; + } + + /* + * If the state is CLOSED (i.e., TCB does not exist) then + * all data in the incoming segment is discarded. + * If the TCB exists but is in CLOSED state, it is embryonic, + * but should either do a listen or a connect soon. + * + * state == CLOSED means we've done socreate() but haven't + * attached it to a protocol yet... + * + * XXX If a TCB does not exist, and the TH_SYN flag is + * the only flag set, then create a session, mark it + * as if it was LISTENING, and continue... + */ + if (so == 0) + { + if ((tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) != TH_SYN) + { + LogFlowFunc(("%d -> dropwithreset\n", __LINE__)); + goto dropwithreset; + } + + if ((so = socreate()) == NULL) + { + LogFlowFunc(("%d -> dropwithreset\n", __LINE__)); + goto dropwithreset; + } + if (tcp_attach(pData, so) < 0) + { + RTMemFree(so); /* Not sofree (if it failed, it's not insqued) */ + LogFlowFunc(("%d -> dropwithreset\n", __LINE__)); + goto dropwithreset; + } + SOCKET_LOCK(so); + sbreserve(pData, &so->so_snd, tcp_sndspace); + sbreserve(pData, &so->so_rcv, tcp_rcvspace); + +/* tcp_last_so = so; */ /* XXX ? */ +/* tp = sototcpcb(so); */ + + so->so_laddr = ti->ti_src; + so->so_lport = ti->ti_sport; + so->so_faddr = ti->ti_dst; + so->so_fport = ti->ti_dport; + + so->so_iptos = ((struct ip *)ti)->ip_tos; + + tp = sototcpcb(so); + TCP_STATE_SWITCH_TO(tp, TCPS_LISTEN); + } + + /* + * If this is a still-connecting socket, this probably + * a retransmit of the SYN. Whether it's a retransmit SYN + * or something else, we nuke it. + */ + if (so->so_state & SS_ISFCONNECTING) + { + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + } + + tp = sototcpcb(so); + + /* XXX Should never fail */ + if (tp == 0) + { + LogFlowFunc(("%d -> dropwithreset\n", __LINE__)); + goto dropwithreset; + } + if (tp->t_state == TCPS_CLOSED) + { + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + } + + /* Unscale the window into a 32-bit value. */ +/* if ((tiflags & TH_SYN) == 0) + * tiwin = ti->ti_win << tp->snd_scale; + * else + */ + tiwin = ti->ti_win; + + /* + * Segment received on connection. + * Reset idle time and keep-alive timer. + */ + tp->t_idle = 0; + if (so_options) + tp->t_timer[TCPT_KEEP] = tcp_keepintvl; + else + tp->t_timer[TCPT_KEEP] = tcp_keepidle; + + /* + * Process options if not in LISTEN state, + * else do it below (after getting remote address). + */ + if (optp && tp->t_state != TCPS_LISTEN) + tcp_dooptions(pData, tp, (u_char *)optp, optlen, ti); +/* , */ +/* &ts_present, &ts_val, &ts_ecr); */ + + /* + * Header prediction: check for the two common cases + * of a uni-directional data xfer. If the packet has + * no control flags, is in-sequence, the window didn't + * change and we're not retransmitting, it's a + * candidate. If the length is zero and the ack moved + * forward, we're the sender side of the xfer. Just + * free the data acked & wake any higher level process + * that was blocked waiting for space. If the length + * is non-zero and the ack didn't move, we're the + * receiver side. If we're getting packets in-order + * (the reassembly queue is empty), add the data to + * the socket buffer and note that we need a delayed ack. + * + * XXX Some of these tests are not needed + * eg: the tiwin == tp->snd_wnd prevents many more + * predictions.. with no *real* advantage.. + */ + if ( tp->t_state == TCPS_ESTABLISHED + && (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK +/* && (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) */ + && ti->ti_seq == tp->rcv_nxt + && tiwin && tiwin == tp->snd_wnd + && tp->snd_nxt == tp->snd_max) + { + /* + * If last ACK falls within this segment's sequence numbers, + * record the timestamp. + */ +#if 0 + if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) && + SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) + { + tp->ts_recent_age = tcp_now; + tp->ts_recent = ts_val; + } +#endif + + if (ti->ti_len == 0) + { + if ( SEQ_GT(ti->ti_ack, tp->snd_una) + && SEQ_LEQ(ti->ti_ack, tp->snd_max) + && tp->snd_cwnd >= tp->snd_wnd) + { + /* + * this is a pure ack for outstanding data. + */ + ++tcpstat.tcps_predack; +#if 0 + if (ts_present) + tcp_xmit_timer(tp, tcp_now-ts_ecr+1); + else +#endif + if ( tp->t_rtt + && SEQ_GT(ti->ti_ack, tp->t_rtseq)) + tcp_xmit_timer(pData, tp, tp->t_rtt); + acked = ti->ti_ack - tp->snd_una; + tcpstat.tcps_rcvackpack++; + tcpstat.tcps_rcvackbyte += acked; + sbdrop(&so->so_snd, acked); + tp->snd_una = ti->ti_ack; + m_freem(pData, m); + + /* + * If all outstanding data are acked, stop + * retransmit timer, otherwise restart timer + * using current (possibly backed-off) value. + * If process is waiting for space, + * wakeup/selwakeup/signal. If data + * are ready to send, let tcp_output + * decide between more output or persist. + */ + if (tp->snd_una == tp->snd_max) + tp->t_timer[TCPT_REXMT] = 0; + else if (tp->t_timer[TCPT_PERSIST] == 0) + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + + /* + * There's room in so_snd, sowwakup will read() + * from the socket if we can + */ +#if 0 + if (so->so_snd.sb_flags & SB_NOTIFY) + sowwakeup(so); +#endif + /* + * This is called because sowwakeup might have + * put data into so_snd. Since we don't so sowwakeup, + * we don't need this.. XXX??? + */ + if (SBUF_LEN(&so->so_snd)) + (void) tcp_output(pData, tp); + + SOCKET_UNLOCK(so); + STAM_PROFILE_STOP(&pData->StatTCP_input, counter_input); + return; + } + } + else if ( ti->ti_ack == tp->snd_una + && LIST_EMPTY(&tp->t_segq) + && ti->ti_len <= sbspace(&so->so_rcv)) + { + /* + * this is a pure, in-sequence data packet + * with nothing on the reassembly queue and + * we have enough buffer space to take it. + */ + ++tcpstat.tcps_preddat; + tp->rcv_nxt += ti->ti_len; + tcpstat.tcps_rcvpack++; + tcpstat.tcps_rcvbyte += ti->ti_len; + /* + * Add data to socket buffer. + */ + sbappend(pData, so, m); + + /* + * XXX This is called when data arrives. Later, check + * if we can actually write() to the socket + * XXX Need to check? It's be NON_BLOCKING + */ +/* sorwakeup(so); */ + + /* + * If this is a short packet, then ACK now - with Nagle + * congestion avoidance sender won't send more until + * he gets an ACK. + * + * It is better to not delay acks at all to maximize + * TCP throughput. See RFC 2581. + */ + tp->t_flags |= TF_ACKNOW; + tcp_output(pData, tp); + SOCKET_UNLOCK(so); + STAM_PROFILE_STOP(&pData->StatTCP_input, counter_input); + return; + } + } /* header prediction */ + /* + * Calculate amount of space in receive window, + * and then do TCP input processing. + * Receive window is amount of space in rcv queue, + * but not less than advertised window. + */ + { + int win; + win = sbspace(&so->so_rcv); + if (win < 0) + win = 0; + tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt)); + } + + switch (tp->t_state) + { + /* + * If the state is LISTEN then ignore segment if it contains an RST. + * If the segment contains an ACK then it is bad and send a RST. + * If it does not contain a SYN then it is not interesting; drop it. + * Don't bother responding if the destination was a broadcast. + * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial + * tp->iss, and send a segment: + * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> + * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. + * Fill in remote peer address fields if not previously specified. + * Enter SYN_RECEIVED state, and process any other fields of this + * segment in this state. + */ + case TCPS_LISTEN: + { + if (tiflags & TH_RST) + { + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + } + if (tiflags & TH_ACK) + { + LogFlowFunc(("%d -> dropwithreset\n", __LINE__)); + goto dropwithreset; + } + if ((tiflags & TH_SYN) == 0) + { + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + } + + /* + * This has way too many gotos... + * But a bit of spaghetti code never hurt anybody :) + */ + if ( (tcp_fconnect(pData, so) == -1) + && errno != EINPROGRESS + && errno != EWOULDBLOCK) + { + u_char code = ICMP_UNREACH_NET; + Log2((" tcp fconnect errno = %d (%s)\n", errno, strerror(errno))); + if (errno == ECONNREFUSED) + { + /* ACK the SYN, send RST to refuse the connection */ + tcp_respond(pData, tp, ti, m, ti->ti_seq+1, (tcp_seq)0, + TH_RST|TH_ACK); + } + else + { + if (errno == EHOSTUNREACH) + code = ICMP_UNREACH_HOST; + HTONL(ti->ti_seq); /* restore tcp header */ + HTONL(ti->ti_ack); + HTONS(ti->ti_win); + HTONS(ti->ti_urp); + m->m_data -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + m->m_len += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + *ip = *save_ip; + icmp_error(pData, m, ICMP_UNREACH, code, 0, strerror(errno)); + tp->t_socket->so_m = NULL; + } + tp = tcp_close(pData, tp); + } + else + { + /* + * Haven't connected yet, save the current mbuf + * and ti, and return + * XXX Some OS's don't tell us whether the connect() + * succeeded or not. So we must time it out. + */ + so->so_m = m; + so->so_ti = ti; + so->so_ohdr = RTMemDup(ohdr, ohdrlen); + so->so_optp = optp; + so->so_optlen = optlen; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + TCP_STATE_SWITCH_TO(tp, TCPS_SYN_RECEIVED); + } + SOCKET_UNLOCK(so); + STAM_PROFILE_STOP(&pData->StatTCP_input, counter_input); + LogFlowFuncLeave(); + return; + +cont_conn: + /* m==NULL + * Check if the connect succeeded + */ + LogFlowFunc(("cont_conn:\n")); + if (so->so_state & SS_NOFDREF) + { + tp = tcp_close(pData, tp); + LogFlowFunc(("%d -> dropwithreset\n", __LINE__)); + goto dropwithreset; + } + + tcp_template(tp); + + if (optp) + tcp_dooptions(pData, tp, (u_char *)optp, optlen, ti); + + if (iss) + tp->iss = iss; + else + tp->iss = tcp_iss; + tcp_iss += TCP_ISSINCR/2; + tp->irs = ti->ti_seq; + tcp_sendseqinit(tp); + tcp_rcvseqinit(tp); + tp->t_flags |= TF_ACKNOW; + TCP_STATE_SWITCH_TO(tp, TCPS_SYN_RECEIVED); + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + tcpstat.tcps_accepts++; + LogFlowFunc(("%d -> trimthenstep6\n", __LINE__)); + goto trimthenstep6; + } /* case TCPS_LISTEN */ + + /* + * If the state is SYN_SENT: + * if seg contains an ACK, but not for our SYN, drop the input. + * if seg contains a RST, then drop the connection. + * if seg does not contain SYN, then drop it. + * Otherwise this is an acceptable SYN segment + * initialize tp->rcv_nxt and tp->irs + * if seg contains ack then advance tp->snd_una + * if SYN has been acked change to ESTABLISHED else SYN_RCVD state + * arrange for segment to be acked (eventually) + * continue processing rest of data/controls, beginning with URG + */ + case TCPS_SYN_SENT: + if ( (tiflags & TH_ACK) + && ( SEQ_LEQ(ti->ti_ack, tp->iss) + || SEQ_GT(ti->ti_ack, tp->snd_max))) + { + LogFlowFunc(("%d -> dropwithreset\n", __LINE__)); + goto dropwithreset; + } + + if (tiflags & TH_RST) + { + if (tiflags & TH_ACK) + tp = tcp_drop(pData, tp, 0); /* XXX Check t_softerror! */ + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + } + + if ((tiflags & TH_SYN) == 0) + { + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + } + if (tiflags & TH_ACK) + { + tp->snd_una = ti->ti_ack; + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + } + + tp->t_timer[TCPT_REXMT] = 0; + tp->irs = ti->ti_seq; + tcp_rcvseqinit(tp); + tp->t_flags |= TF_ACKNOW; + if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) + { + tcpstat.tcps_connects++; + soisfconnected(so); + TCP_STATE_SWITCH_TO(tp, TCPS_ESTABLISHED); + + /* Do window scaling on this connection? */ +#if 0 + if (( tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) + == (TF_RCVD_SCALE|TF_REQ_SCALE)) + { + tp->snd_scale = tp->requested_s_scale; + tp->rcv_scale = tp->request_r_scale; + } +#endif + (void) tcp_reass(pData, tp, (struct tcphdr *)0, NULL, (struct mbuf *)0); + /* + * if we didn't have to retransmit the SYN, + * use its rtt as our initial srtt & rtt var. + */ + if (tp->t_rtt) + tcp_xmit_timer(pData, tp, tp->t_rtt); + } + else + TCP_STATE_SWITCH_TO(tp, TCPS_SYN_RECEIVED); + +trimthenstep6: + LogFlowFunc(("trimthenstep6:\n")); + /* + * Advance ti->ti_seq to correspond to first data byte. + * If data, trim to stay within window, + * dropping FIN if necessary. + */ + ti->ti_seq++; + if (ti->ti_len > tp->rcv_wnd) + { + todrop = ti->ti_len - tp->rcv_wnd; + m_adj(m, -todrop); + ti->ti_len = tp->rcv_wnd; + tiflags &= ~TH_FIN; + tcpstat.tcps_rcvpackafterwin++; + tcpstat.tcps_rcvbyteafterwin += todrop; + } + tp->snd_wl1 = ti->ti_seq - 1; + tp->rcv_up = ti->ti_seq; + LogFlowFunc(("%d -> step6\n", __LINE__)); + goto step6; + } /* switch tp->t_state */ + /* + * States other than LISTEN or SYN_SENT. + * First check timestamp, if present. + * Then check that at least some bytes of segment are within + * receive window. If segment begins before rcv_nxt, + * drop leading data (and SYN); if nothing left, just ack. + * + * RFC 1323 PAWS: If we have a timestamp reply on this segment + * and it's less than ts_recent, drop it. + */ +#if 0 + if ( ts_present + && (tiflags & TH_RST) == 0 + && tp->ts_recent + && TSTMP_LT(ts_val, tp->ts_recent)) + { + /* Check to see if ts_recent is over 24 days old. */ + if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) + { + /* + * Invalidate ts_recent. If this segment updates + * ts_recent, the age will be reset later and ts_recent + * will get a valid value. If it does not, setting + * ts_recent to zero will at least satisfy the + * requirement that zero be placed in the timestamp + * echo reply when ts_recent isn't valid. The + * age isn't reset until we get a valid ts_recent + * because we don't want out-of-order segments to be + * dropped when ts_recent is old. + */ + tp->ts_recent = 0; + } + else + { + tcpstat.tcps_rcvduppack++; + tcpstat.tcps_rcvdupbyte += ti->ti_len; + tcpstat.tcps_pawsdrop++; + goto dropafterack; + } + } +#endif + + todrop = tp->rcv_nxt - ti->ti_seq; + if (todrop > 0) + { + if (tiflags & TH_SYN) + { + tiflags &= ~TH_SYN; + ti->ti_seq++; + if (ti->ti_urp > 1) + ti->ti_urp--; + else + tiflags &= ~TH_URG; + todrop--; + } + /* + * Following if statement from Stevens, vol. 2, p. 960. + */ + if ( todrop > ti->ti_len + || ( todrop == ti->ti_len + && (tiflags & TH_FIN) == 0)) + { + /* + * Any valid FIN must be to the left of the window. + * At this point the FIN must be a duplicate or out + * of sequence; drop it. + */ + tiflags &= ~TH_FIN; + + /* + * Send an ACK to resynchronize and drop any data. + * But keep on processing for RST or ACK. + */ + tp->t_flags |= TF_ACKNOW; + todrop = ti->ti_len; + tcpstat.tcps_rcvduppack++; + tcpstat.tcps_rcvdupbyte += todrop; + } + else + { + tcpstat.tcps_rcvpartduppack++; + tcpstat.tcps_rcvpartdupbyte += todrop; + } + m_adj(m, todrop); + ti->ti_seq += todrop; + ti->ti_len -= todrop; + if (ti->ti_urp > todrop) + ti->ti_urp -= todrop; + else + { + tiflags &= ~TH_URG; + ti->ti_urp = 0; + } + } + /* + * If new data are received on a connection after the + * user processes are gone, then RST the other end. + */ + if ( (so->so_state & SS_NOFDREF) + && tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) + { + tp = tcp_close(pData, tp); + tcpstat.tcps_rcvafterclose++; + LogFlowFunc(("%d -> dropwithreset\n", __LINE__)); + goto dropwithreset; + } + + /* + * If segment ends after window, drop trailing data + * (and PUSH and FIN); if nothing left, just ACK. + */ + todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd); + if (todrop > 0) + { + tcpstat.tcps_rcvpackafterwin++; + if (todrop >= ti->ti_len) + { + tcpstat.tcps_rcvbyteafterwin += ti->ti_len; + /* + * If a new connection request is received + * while in TIME_WAIT, drop the old connection + * and start over if the sequence numbers + * are above the previous ones. + */ + if ( tiflags & TH_SYN + && tp->t_state == TCPS_TIME_WAIT + && SEQ_GT(ti->ti_seq, tp->rcv_nxt)) + { + iss = tp->rcv_nxt + TCP_ISSINCR; + tp = tcp_close(pData, tp); + SOCKET_UNLOCK(tp->t_socket); + LogFlowFunc(("%d -> findso\n", __LINE__)); + goto findso; + } + /* + * If window is closed can only take segments at + * window edge, and have to drop data and PUSH from + * incoming segments. Continue processing, but + * remember to ack. Otherwise, drop segment + * and ack. + */ + if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) + { + tp->t_flags |= TF_ACKNOW; + tcpstat.tcps_rcvwinprobe++; + } + else + { + LogFlowFunc(("%d -> dropafterack\n", __LINE__)); + goto dropafterack; + } + } + else + tcpstat.tcps_rcvbyteafterwin += todrop; + m_adj(m, -todrop); + ti->ti_len -= todrop; + tiflags &= ~(TH_PUSH|TH_FIN); + } + + /* + * If last ACK falls within this segment's sequence numbers, + * record its timestamp. + */ +#if 0 + if ( ts_present + && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) + && SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len + ((tiflags & (TH_SYN|TH_FIN)) != 0))) + { + tp->ts_recent_age = tcp_now; + tp->ts_recent = ts_val; + } +#endif + + /* + * If the RST bit is set examine the state: + * SYN_RECEIVED STATE: + * If passive open, return to LISTEN state. + * If active open, inform user that connection was refused. + * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: + * Inform user that connection was reset, and close tcb. + * CLOSING, LAST_ACK, TIME_WAIT STATES + * Close the tcb. + */ + if (tiflags&TH_RST) + switch (tp->t_state) + { + case TCPS_SYN_RECEIVED: +/* so->so_error = ECONNREFUSED; */ + LogFlowFunc(("%d -> close\n", __LINE__)); + goto close; + + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: +/* so->so_error = ECONNRESET; */ +close: + LogFlowFunc(("close:\n")); + TCP_STATE_SWITCH_TO(tp, TCPS_CLOSED); + tcpstat.tcps_drops++; + tp = tcp_close(pData, tp); + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + + case TCPS_CLOSING: + case TCPS_LAST_ACK: + case TCPS_TIME_WAIT: + tp = tcp_close(pData, tp); + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + } + + /* + * If a SYN is in the window, then this is an + * error and we send an RST and drop the connection. + */ + if (tiflags & TH_SYN) + { + tp = tcp_drop(pData, tp, 0); + LogFlowFunc(("%d -> dropwithreset\n", __LINE__)); + goto dropwithreset; + } + + /* + * If the ACK bit is off we drop the segment and return. + */ + if ((tiflags & TH_ACK) == 0) + { + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + } + + /* + * Ack processing. + */ + switch (tp->t_state) + { + /* + * In SYN_RECEIVED state if the ack ACKs our SYN then enter + * ESTABLISHED state and continue processing, otherwise + * send an RST. una<=ack<=max + */ + case TCPS_SYN_RECEIVED: + LogFlowFunc(("%d -> TCPS_SYN_RECEIVED\n", __LINE__)); + if ( SEQ_GT(tp->snd_una, ti->ti_ack) + || SEQ_GT(ti->ti_ack, tp->snd_max)) + goto dropwithreset; + tcpstat.tcps_connects++; + TCP_STATE_SWITCH_TO(tp, TCPS_ESTABLISHED); + /* + * The sent SYN is ack'ed with our sequence number +1 + * The first data byte already in the buffer will get + * lost if no correction is made. This is only needed for + * SS_CTL since the buffer is empty otherwise. + * tp->snd_una++; or: + */ + tp->snd_una = ti->ti_ack; + soisfconnected(so); + + /* Do window scaling? */ +#if 0 + if ( (tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) + == (TF_RCVD_SCALE|TF_REQ_SCALE)) + { + tp->snd_scale = tp->requested_s_scale; + tp->rcv_scale = tp->request_r_scale; + } +#endif + (void) tcp_reass(pData, tp, (struct tcphdr *)0, (int *)0, (struct mbuf *)0); + tp->snd_wl1 = ti->ti_seq - 1; + /* Avoid ack processing; snd_una==ti_ack => dup ack */ + LogFlowFunc(("%d -> synrx_to_est\n", __LINE__)); + goto synrx_to_est; + /* fall into ... */ + + /* + * In ESTABLISHED state: drop duplicate ACKs; ACK out of range + * ACKs. If the ack is in the range + * tp->snd_una < ti->ti_ack <= tp->snd_max + * then advance tp->snd_una to ti->ti_ack and drop + * data from the retransmission queue. If this ACK reflects + * more up to date window information we update our window information. + */ + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: + case TCPS_CLOSING: + case TCPS_LAST_ACK: + case TCPS_TIME_WAIT: + LogFlowFunc(("%d -> TCPS_ESTABLISHED|TCPS_FIN_WAIT_1|TCPS_FIN_WAIT_2|TCPS_CLOSE_WAIT|" + "TCPS_CLOSING|TCPS_LAST_ACK|TCPS_TIME_WAIT\n", __LINE__)); + if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) + { + if (ti->ti_len == 0 && tiwin == tp->snd_wnd) + { + tcpstat.tcps_rcvdupack++; + Log2((" dup ack m = %p, so = %p\n", m, so)); + /* + * If we have outstanding data (other than + * a window probe), this is a completely + * duplicate ack (ie, window info didn't + * change), the ack is the biggest we've + * seen and we've seen exactly our rexmt + * threshold of them, assume a packet + * has been dropped and retransmit it. + * Kludge snd_nxt & the congestion + * window so we send only this one + * packet. + * + * We know we're losing at the current + * window size so do congestion avoidance + * (set ssthresh to half the current window + * and pull our congestion window back to + * the new ssthresh). + * + * Dup acks mean that packets have left the + * network (they're now cached at the receiver) + * so bump cwnd by the amount in the receiver + * to keep a constant cwnd packets in the + * network. + */ + if ( tp->t_timer[TCPT_REXMT] == 0 + || ti->ti_ack != tp->snd_una) + tp->t_dupacks = 0; + else if (++tp->t_dupacks == tcprexmtthresh) + { + tcp_seq onxt = tp->snd_nxt; + u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; + if (win < 2) + win = 2; + tp->snd_ssthresh = win * tp->t_maxseg; + tp->t_timer[TCPT_REXMT] = 0; + tp->t_rtt = 0; + tp->snd_nxt = ti->ti_ack; + tp->snd_cwnd = tp->t_maxseg; + (void) tcp_output(pData, tp); + tp->snd_cwnd = tp->snd_ssthresh + + tp->t_maxseg * tp->t_dupacks; + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + } + else if (tp->t_dupacks > tcprexmtthresh) + { + tp->snd_cwnd += tp->t_maxseg; + (void) tcp_output(pData, tp); + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + } + } + else + tp->t_dupacks = 0; + break; + } +synrx_to_est: + LogFlowFunc(("synrx_to_est:\n")); + /* + * If the congestion window was inflated to account + * for the other side's cached packets, retract it. + */ + if ( tp->t_dupacks > tcprexmtthresh + && tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd = tp->snd_ssthresh; + tp->t_dupacks = 0; + if (SEQ_GT(ti->ti_ack, tp->snd_max)) + { + tcpstat.tcps_rcvacktoomuch++; + LogFlowFunc(("%d -> dropafterack\n", __LINE__)); + goto dropafterack; + } + acked = ti->ti_ack - tp->snd_una; + tcpstat.tcps_rcvackpack++; + tcpstat.tcps_rcvackbyte += acked; + + /* + * If we have a timestamp reply, update smoothed + * round trip time. If no timestamp is present but + * transmit timer is running and timed sequence + * number was acked, update smoothed round trip time. + * Since we now have an rtt measurement, cancel the + * timer backoff (cf., Phil Karn's retransmit alg.). + * Recompute the initial retransmit timer. + */ +#if 0 + if (ts_present) + tcp_xmit_timer(tp, tcp_now-ts_ecr+1); + else +#endif + if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) + tcp_xmit_timer(pData, tp, tp->t_rtt); + + /* + * If all outstanding data is acked, stop retransmit + * timer and remember to restart (more output or persist). + * If there is more data to be acked, restart retransmit + * timer, using current (possibly backed-off) value. + */ + if (ti->ti_ack == tp->snd_max) + { + tp->t_timer[TCPT_REXMT] = 0; + needoutput = 1; + } + else if (tp->t_timer[TCPT_PERSIST] == 0) + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + /* + * When new data is acked, open the congestion window. + * If the window gives us less than ssthresh packets + * in flight, open exponentially (maxseg per packet). + * Otherwise open linearly: maxseg per window + * (maxseg^2 / cwnd per packet). + */ + { + register u_int cw = tp->snd_cwnd; + register u_int incr = tp->t_maxseg; + + if (cw > tp->snd_ssthresh) + incr = incr * incr / cw; + tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale); + } + if (acked > SBUF_LEN(&so->so_snd)) + { + tp->snd_wnd -= SBUF_LEN(&so->so_snd); + sbdrop(&so->so_snd, (int)so->so_snd.sb_cc); + ourfinisacked = 1; + } + else + { + sbdrop(&so->so_snd, acked); + tp->snd_wnd -= acked; + ourfinisacked = 0; + } + /* + * XXX sowwakup is called when data is acked and there's room for + * for more data... it should read() the socket + */ +#if 0 + if (so->so_snd.sb_flags & SB_NOTIFY) + sowwakeup(so); +#endif + tp->snd_una = ti->ti_ack; + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + + switch (tp->t_state) + { + /* + * In FIN_WAIT_1 STATE in addition to the processing + * for the ESTABLISHED state if our FIN is now acknowledged + * then enter FIN_WAIT_2. + */ + case TCPS_FIN_WAIT_1: + if (ourfinisacked) + { + /* + * If we can't receive any more + * data, then closing user can proceed. + * Starting the timer is contrary to the + * specification, but if we don't get a FIN + * we'll hang forever. + */ + if (so->so_state & SS_FCANTRCVMORE) + { + soisfdisconnected(so); + tp->t_timer[TCPT_2MSL] = tcp_maxidle; + } + TCP_STATE_SWITCH_TO(tp, TCPS_FIN_WAIT_2); + } + break; + + /* + * In CLOSING STATE in addition to the processing for + * the ESTABLISHED state if the ACK acknowledges our FIN + * then enter the TIME-WAIT state, otherwise ignore + * the segment. + */ + case TCPS_CLOSING: + if (ourfinisacked) + { + TCP_STATE_SWITCH_TO(tp, TCPS_TIME_WAIT); + tcp_canceltimers(tp); + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + soisfdisconnected(so); + } + break; + + /* + * In LAST_ACK, we may still be waiting for data to drain + * and/or to be acked, as well as for the ack of our FIN. + * If our FIN is now acknowledged, delete the TCB, + * enter the closed state and return. + */ + case TCPS_LAST_ACK: + if (ourfinisacked) + { + tp = tcp_close(pData, tp); + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + } + break; + + /* + * In TIME_WAIT state the only thing that should arrive + * is a retransmission of the remote FIN. Acknowledge + * it and restart the finack timer. + */ + case TCPS_TIME_WAIT: + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + LogFlowFunc(("%d -> dropafterack\n", __LINE__)); + goto dropafterack; + } + } /* switch(tp->t_state) */ + +step6: + LogFlowFunc(("step6:\n")); + /* + * Update window information. + * Don't look at window if no ACK: TAC's send garbage on first SYN. + */ + if ( (tiflags & TH_ACK) + && ( SEQ_LT(tp->snd_wl1, ti->ti_seq) + || ( tp->snd_wl1 == ti->ti_seq + && ( SEQ_LT(tp->snd_wl2, ti->ti_ack) + || ( tp->snd_wl2 == ti->ti_ack + && tiwin > tp->snd_wnd))))) + { + /* keep track of pure window updates */ + if ( ti->ti_len == 0 + && tp->snd_wl2 == ti->ti_ack + && tiwin > tp->snd_wnd) + tcpstat.tcps_rcvwinupd++; + tp->snd_wnd = tiwin; + tp->snd_wl1 = ti->ti_seq; + tp->snd_wl2 = ti->ti_ack; + if (tp->snd_wnd > tp->max_sndwnd) + tp->max_sndwnd = tp->snd_wnd; + needoutput = 1; + } + + /* + * Process segments with URG. + */ + if ((tiflags & TH_URG) && ti->ti_urp && + TCPS_HAVERCVDFIN(tp->t_state) == 0) + { + /* + * This is a kludge, but if we receive and accept + * random urgent pointers, we'll crash in + * soreceive. It's hard to imagine someone + * actually wanting to send this much urgent data. + */ + if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) + { + ti->ti_urp = 0; + tiflags &= ~TH_URG; + LogFlowFunc(("%d -> dodata\n", __LINE__)); + goto dodata; + } + + /* + * If this segment advances the known urgent pointer, + * then mark the data stream. This should not happen + * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since + * a FIN has been received from the remote side. + * In these states we ignore the URG. + * + * According to RFC961 (Assigned Protocols), + * the urgent pointer points to the last octet + * of urgent data. We continue, however, + * to consider it to indicate the first octet + * of data past the urgent section as the original + * spec states (in one of two places). + */ + if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) + { + tp->rcv_up = ti->ti_seq + ti->ti_urp; + so->so_urgc = SBUF_LEN(&so->so_rcv) + + (tp->rcv_up - tp->rcv_nxt); /* -1; */ + tp->rcv_up = ti->ti_seq + ti->ti_urp; + } + } + else + /* + * If no out of band data is expected, + * pull receive urgent pointer along + * with the receive window. + */ + if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) + tp->rcv_up = tp->rcv_nxt; +dodata: + LogFlowFunc(("dodata:\n")); + + /* + * If this is a small packet, then ACK now - with Nagel + * congestion avoidance sender won't send more until + * he gets an ACK. + * + * XXX: In case you wonder... The magic "27" below is ESC that + * presumably starts a terminal escape-sequence and that we want + * to ACK ASAP. [Original slirp code had three different + * heuristics to chose from here and in the header prediction case + * above, but the commented out alternatives were lost and the + * header prediction case that had an expanded comment about this + * has been modified to always send an ACK]. + */ + if ( ti->ti_len + && (unsigned)ti->ti_len <= 5 + && ((struct tcpiphdr_2 *)ti)->first_char == (char)27) + { + tp->t_flags |= TF_ACKNOW; + } + + /* + * Process the segment text, merging it into the TCP sequencing queue, + * and arranging for acknowledgment of receipt if necessary. + * This process logically involves adjusting tp->rcv_wnd as data + * is presented to the user (this happens in tcp_usrreq.c, + * case PRU_RCVD). If a FIN has already been received on this + * connection then we just ignore the text. + */ + if ( (ti->ti_len || (tiflags&TH_FIN)) + && TCPS_HAVERCVDFIN(tp->t_state) == 0) + { + if ( ti->ti_seq == tp->rcv_nxt + && LIST_EMPTY(&tp->t_segq) + && tp->t_state == TCPS_ESTABLISHED) + { + DELAY_ACK(tp, ti); /* little bit different from BSD declaration see netinet/tcp_input.c */ + tp->rcv_nxt += tlen; + tiflags = ti->ti_t.th_flags & TH_FIN; + tcpstat.tcps_rcvpack++; + tcpstat.tcps_rcvbyte += tlen; + if (so->so_state & SS_FCANTRCVMORE) + m_freem(pData, m); + else + sbappend(pData, so, m); + } + else + { + tiflags = tcp_reass(pData, tp, &ti->ti_t, &tlen, m); + tp->t_flags |= TF_ACKNOW; + } + /* + * Note the amount of data that peer has sent into + * our window, in order to estimate the sender's + * buffer size. + */ + len = SBUF_SIZE(&so->so_rcv) - (tp->rcv_adv - tp->rcv_nxt); + } + else + { + m_freem(pData, m); + tiflags &= ~TH_FIN; + } + + /* + * If FIN is received ACK the FIN and let the user know + * that the connection is closing. + */ + if (tiflags & TH_FIN) + { + if (TCPS_HAVERCVDFIN(tp->t_state) == 0) + { + /* + * If we receive a FIN we can't send more data, + * set it SS_FDRAIN + * Shutdown the socket if there is no rx data in the + * buffer. + * soread() is called on completion of shutdown() and + * will got to TCPS_LAST_ACK, and use tcp_output() + * to send the FIN. + */ +/* sofcantrcvmore(so); */ + sofwdrain(so); + + tp->t_flags |= TF_ACKNOW; + tp->rcv_nxt++; + } + switch (tp->t_state) + { + /* + * In SYN_RECEIVED and ESTABLISHED STATES + * enter the CLOSE_WAIT state. + */ + case TCPS_SYN_RECEIVED: + case TCPS_ESTABLISHED: + TCP_STATE_SWITCH_TO(tp, TCPS_CLOSE_WAIT); + break; + + /* + * If still in FIN_WAIT_1 STATE FIN has not been acked so + * enter the CLOSING state. + */ + case TCPS_FIN_WAIT_1: + TCP_STATE_SWITCH_TO(tp, TCPS_CLOSING); + break; + + /* + * In FIN_WAIT_2 state enter the TIME_WAIT state, + * starting the time-wait timer, turning off the other + * standard timers. + */ + case TCPS_FIN_WAIT_2: + TCP_STATE_SWITCH_TO(tp, TCPS_TIME_WAIT); + tcp_canceltimers(tp); + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + soisfdisconnected(so); + break; + + /* + * In TIME_WAIT state restart the 2 MSL time_wait timer. + */ + case TCPS_TIME_WAIT: + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + break; + } + } + + /* + * Return any desired output. + */ + if (needoutput || (tp->t_flags & TF_ACKNOW)) + tcp_output(pData, tp); + + SOCKET_UNLOCK(so); + STAM_PROFILE_STOP(&pData->StatTCP_input, counter_input); + LogFlowFuncLeave(); + return; + +dropafterack: + LogFlowFunc(("dropafterack:\n")); + /* + * Generate an ACK dropping incoming segment if it occupies + * sequence space, where the ACK reflects our state. + */ + if (tiflags & TH_RST) + { + LogFlowFunc(("%d -> drop\n", __LINE__)); + goto drop; + } + m_freem(pData, m); + tp->t_flags |= TF_ACKNOW; + (void) tcp_output(pData, tp); + SOCKET_UNLOCK(so); + STAM_PROFILE_STOP(&pData->StatTCP_input, counter_input); + LogFlowFuncLeave(); + return; + +dropwithreset: + LogFlowFunc(("dropwithreset:\n")); + /* reuses m if m!=NULL, m_free() unnecessary */ + if (tiflags & TH_ACK) + tcp_respond(pData, tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST); + else + { + if (tiflags & TH_SYN) + ti->ti_len++; + tcp_respond(pData, tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0, + TH_RST|TH_ACK); + } + + if (so != &tcb) + SOCKET_UNLOCK(so); + STAM_PROFILE_STOP(&pData->StatTCP_input, counter_input); + LogFlowFuncLeave(); + return; + +drop: + LogFlowFunc(("drop:\n")); + /* + * Drop space held by incoming segment and return. + */ + m_freem(pData, m); + +#ifdef VBOX_WITH_SLIRP_MT + if (RTCritSectIsOwned(&so->so_mutex)) + { + SOCKET_UNLOCK(so); + } +#endif + + STAM_PROFILE_STOP(&pData->StatTCP_input, counter_input); + LogFlowFuncLeave(); + return; +} + + +void +tcp_fconnect_failed(PNATState pData, struct socket *so, int sockerr) +{ + struct tcpcb *tp; + int code; + + Log2(("NAT: connect error %d %R[natsock]\n", sockerr, so)); + + Assert(so->so_state & SS_ISFCONNECTING); + so->so_state = SS_NOFDREF; + + if (sockerr == ECONNREFUSED || sockerr == ECONNRESET) + { + /* hand off to tcp_input():cont_conn to send RST */ + TCP_INPUT(pData, NULL, 0, so); + return; + } + + tp = sototcpcb(so); + if (RT_UNLIKELY(tp == NULL)) /* should never happen */ + { + LogRel(("NAT: tp == NULL %R[natsock]\n", so)); + sofree(pData, so); + return; + } + + if (sockerr == ENETUNREACH || sockerr == ENETDOWN) + code = ICMP_UNREACH_NET; + else if (sockerr == EHOSTUNREACH || sockerr == EHOSTDOWN) + code = ICMP_UNREACH_HOST; + else + code = -1; + + if (code >= 0) + { + struct ip *oip; + unsigned ohdrlen; + struct mbuf *m; + + if (RT_UNLIKELY(so->so_ohdr == NULL)) + goto out; + + oip = (struct ip *)so->so_ohdr; + ohdrlen = oip->ip_hl * 4 + 8; + + m = m_gethdr(pData, M_NOWAIT, MT_HEADER); + if (RT_UNLIKELY(m == NULL)) + goto out; + + m_copyback(pData, m, 0, ohdrlen, (caddr_t)so->so_ohdr); + m->m_pkthdr.header = mtod(m, void *); + + icmp_error(pData, m, ICMP_UNREACH, code, 0, NULL); + } + + out: + tcp_close(pData, tp); +} + + +void +tcp_dooptions(PNATState pData, struct tcpcb *tp, u_char *cp, int cnt, struct tcpiphdr *ti) +{ + u_int16_t mss; + int opt, optlen; + + LogFlowFunc(("tcp_dooptions: tp = %R[tcpcb793], cnt=%i\n", tp, cnt)); + + for (; cnt > 0; cnt -= optlen, cp += optlen) + { + opt = cp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else + { + optlen = cp[1]; + if (optlen <= 0) + break; + } + switch (opt) + { + default: + continue; + + case TCPOPT_MAXSEG: + if (optlen != TCPOLEN_MAXSEG) + continue; + if (!(ti->ti_flags & TH_SYN)) + continue; + memcpy((char *) &mss, (char *) cp + 2, sizeof(mss)); + NTOHS(mss); + (void) tcp_mss(pData, tp, mss); /* sets t_maxseg */ + break; + +#if 0 + case TCPOPT_WINDOW: + if (optlen != TCPOLEN_WINDOW) + continue; + if (!(ti->ti_flags & TH_SYN)) + continue; + tp->t_flags |= TF_RCVD_SCALE; + tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT); + break; + + case TCPOPT_TIMESTAMP: + if (optlen != TCPOLEN_TIMESTAMP) + continue; + *ts_present = 1; + memcpy((char *) ts_val, (char *)cp + 2, sizeof(*ts_val)); + NTOHL(*ts_val); + memcpy((char *) ts_ecr, (char *)cp + 6, sizeof(*ts_ecr)); + NTOHL(*ts_ecr); + + /* + * A timestamp received in a SYN makes + * it ok to send timestamp requests and replies. + */ + if (ti->ti_flags & TH_SYN) + { + tp->t_flags |= TF_RCVD_TSTMP; + tp->ts_recent = *ts_val; + tp->ts_recent_age = tcp_now; + } + break; +#endif + } + } +} + + +/* + * Pull out of band byte out of a segment so + * it doesn't appear in the user's data queue. + * It is still reflected in the segment length for + * sequencing purposes. + */ + +#if 0 +void +tcp_pulloutofband(struct socket *so, struct tcpiphdr *ti, struct mbuf *m) +{ + int cnt = ti->ti_urp - 1; + + while (cnt >= 0) + { + if (m->m_len > cnt) + { + char *cp = mtod(m, caddr_t) + cnt; + struct tcpcb *tp = sototcpcb(so); + + tp->t_iobc = *cp; + tp->t_oobflags |= TCPOOB_HAVEDATA; + memcpy(sp, cp+1, (unsigned)(m->m_len - cnt - 1)); + m->m_len--; + return; + } + cnt -= m->m_len; + m = m->m_next; /* XXX WRONG! Fix it! */ + if (m == 0) + break; + } + panic("tcp_pulloutofband"); +} +#endif + +/* + * Collect new round-trip time estimate + * and update averages and current timeout. + */ + +void +tcp_xmit_timer(PNATState pData, register struct tcpcb *tp, int rtt) +{ + register short delta; + + LogFlowFunc(("ENTER: tcp_xmit_timer: tp = %R[tcpcb793] rtt = %d\n", tp, rtt)); + + tcpstat.tcps_rttupdated++; + if (tp->t_srtt != 0) + { + /* + * srtt is stored as fixed point with 3 bits after the + * binary point (i.e., scaled by 8). The following magic + * is equivalent to the smoothing algorithm in rfc793 with + * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed + * point). Adjust rtt to origin 0. + */ + delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); + if ((tp->t_srtt += delta) <= 0) + tp->t_srtt = 1; + /* + * We accumulate a smoothed rtt variance (actually, a + * smoothed mean difference), then set the retransmit + * timer to smoothed rtt + 4 times the smoothed variance. + * rttvar is stored as fixed point with 2 bits after the + * binary point (scaled by 4). The following is + * equivalent to rfc793 smoothing with an alpha of .75 + * (rttvar = rttvar*3/4 + |delta| / 4). This replaces + * rfc793's wired-in beta. + */ + if (delta < 0) + delta = -delta; + delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); + if ((tp->t_rttvar += delta) <= 0) + tp->t_rttvar = 1; + } + else + { + /* + * No rtt measurement yet - use the unsmoothed rtt. + * Set the variance to half the rtt (so our first + * retransmit happens at 3*rtt). + */ + tp->t_srtt = rtt << TCP_RTT_SHIFT; + tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); + } + tp->t_rtt = 0; + tp->t_rxtshift = 0; + + /* + * the retransmit should happen at rtt + 4 * rttvar. + * Because of the way we do the smoothing, srtt and rttvar + * will each average +1/2 tick of bias. When we compute + * the retransmit timer, we want 1/2 tick of rounding and + * 1 extra tick because of +-1/2 tick uncertainty in the + * firing of the timer. The bias will give us exactly the + * 1.5 tick we need. But, because the bias is + * statistical, we have to test that we don't drop below + * the minimum feasible timer (which is 2 ticks). + */ + TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), + (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */ + + /* + * We received an ack for a packet that wasn't retransmitted; + * it is probably safe to discard any error indications we've + * received recently. This isn't quite right, but close enough + * for now (a route might have failed after we sent a segment, + * and the return path might not be symmetrical). + */ + tp->t_softerror = 0; +} + +/* + * Determine a reasonable value for maxseg size. + * If the route is known, check route for mtu. + * If none, use an mss that can be handled on the outgoing + * interface without forcing IP to fragment; if bigger than + * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES + * to utilize large mbufs. If no route is found, route has no mtu, + * or the destination isn't local, use a default, hopefully conservative + * size (usually 512 or the default IP max size, but no more than the mtu + * of the interface), as we can't discover anything about intervening + * gateways or networks. We also initialize the congestion/slow start + * window to be a single segment if the destination isn't local. + * While looking at the routing entry, we also initialize other path-dependent + * parameters from pre-set or cached values in the routing entry. + */ + +int +tcp_mss(PNATState pData, register struct tcpcb *tp, u_int offer) +{ + struct socket *so = tp->t_socket; + int mss; + + LogFlowFunc(("ENTER: tcp_mss: offer=%u, t_maxseg=%u; tp=%R[natsock]\n", + offer, (unsigned int)tp->t_maxseg, so)); + + mss = min(if_mtu, if_mru) - sizeof(struct tcpiphdr); + if (offer) + mss = min(mss, offer); + mss = max(mss, 32); + if (mss < tp->t_maxseg || offer != 0) + tp->t_maxseg = mss; + + tp->snd_cwnd = mss; + + sbreserve(pData, &so->so_snd, tcp_sndspace+((tcp_sndspace%mss)?(mss-(tcp_sndspace%mss)):0)); + sbreserve(pData, &so->so_rcv, tcp_rcvspace+((tcp_rcvspace%mss)?(mss-(tcp_rcvspace%mss)):0)); + + LogFlowFunc(("LEAVE: mss=%d\n", mss)); + return mss; +} diff --git a/src/VBox/Devices/Network/slirp/tcp_output.c b/src/VBox/Devices/Network/slirp/tcp_output.c new file mode 100644 index 00000000..9e61e77b --- /dev/null +++ b/src/VBox/Devices/Network/slirp/tcp_output.c @@ -0,0 +1,739 @@ +/* $Id: tcp_output.c $ */ +/** @file + * NAT - TCP output. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 + * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> + +/* + * Since this is only used in "stats socket", we give meaning + * names instead of the REAL names + */ +const char * const tcpstates[] = +{ +/* "CLOSED", "LISTEN", "SYN_SENT", "SYN_RCVD", */ + "REDIRECT", "LISTEN", "SYN_SENT", "SYN_RCVD", + "ESTABLISHED", "CLOSE_WAIT", "FIN_WAIT_1", "CLOSING", + "LAST_ACK", "FIN_WAIT_2", "TIME_WAIT", +}; + +static const u_char tcp_outflags[TCP_NSTATES] = +{ + TH_RST|TH_ACK, 0, TH_SYN, TH_SYN|TH_ACK, + TH_ACK, TH_ACK, TH_FIN|TH_ACK, TH_FIN|TH_ACK, + TH_FIN|TH_ACK, TH_ACK, TH_ACK, +}; + + +#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ + +/* + * Tcp output routine: figure out what should be sent and send it. + */ +int +tcp_output(PNATState pData, register struct tcpcb *tp) +{ + register struct socket *so = tp->t_socket; + register long len, win; + int off, flags, error; + register struct mbuf *m = NULL; + register struct tcpiphdr *ti; + u_char opt[MAX_TCPOPTLEN]; + unsigned optlen, hdrlen; + int idle, sendalot; + int size = 0; + + LogFlowFunc(("ENTER: tcp_output: tp = %R[tcpcb793]\n", tp)); + + /* + * Determine length of data that should be transmitted, + * and flags that will be used. + * If there is some data or critical controls (SYN, RST) + * to send, then transmit; otherwise, investigate further. + */ + idle = (tp->snd_max == tp->snd_una); + if (idle && tp->t_idle >= tp->t_rxtcur) + /* + * We have been idle for "a while" and no acks are + * expected to clock out any data we send -- + * slow start to get ack "clock" running again. + */ + tp->snd_cwnd = tp->t_maxseg; + +again: + sendalot = 0; + off = tp->snd_nxt - tp->snd_una; + win = min(tp->snd_wnd, tp->snd_cwnd); + + flags = tcp_outflags[tp->t_state]; + + Log2((" --- tcp_output flags = 0x%x\n", flags)); + + /* + * If in persist timeout with window of 0, send 1 byte. + * Otherwise, if window is small but nonzero + * and timer expired, we will send what we can + * and go to transmit state. + */ + if (tp->t_force) + { + if (win == 0) + { + /* + * If we still have some data to send, then + * clear the FIN bit. Usually this would + * happen below when it realizes that we + * aren't sending all the data. However, + * if we have exactly 1 byte of unset data, + * then it won't clear the FIN bit below, + * and if we are in persist state, we wind + * up sending the packet without recording + * that we sent the FIN bit. + * + * We can't just blindly clear the FIN bit, + * because if we don't have any more data + * to send then the probe will be the FIN + * itself. + */ + if (off < SBUF_LEN(&so->so_snd)) + flags &= ~TH_FIN; + win = 1; + } + else + { + tp->t_timer[TCPT_PERSIST] = 0; + tp->t_rxtshift = 0; + } + } + + len = min(SBUF_LEN(&so->so_snd), win) - off; + if (len < 0) + { + /* + * If FIN has been sent but not acked, + * but we haven't been called to retransmit, + * len will be -1. Otherwise, window shrank + * after we sent into it. If window shrank to 0, + * cancel pending retransmit and pull snd_nxt + * back to (closed) window. We will enter persist + * state below. If the window didn't close completely, + * just wait for an ACK. + */ + len = 0; + if (win == 0) + { + tp->t_timer[TCPT_REXMT] = 0; + tp->snd_nxt = tp->snd_una; + } + } + if (len > tp->t_maxseg) + { + len = tp->t_maxseg; + sendalot = 1; + } + if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + SBUF_LEN(&so->so_snd))) + flags &= ~TH_FIN; + + win = sbspace(&so->so_rcv); + + /* + * Sender silly window avoidance. If connection is idle + * and can send all data, a maximum segment, + * at least a maximum default-size segment do it, + * or are forced, do it; otherwise don't bother. + * If peer's buffer is tiny, then send + * when window is at least half open. + * If retransmitting (possibly after persist timer forced us + * to send into a small window), then must resend. + */ + if (len) + { + if (len == tp->t_maxseg) + goto send; + if ((1 || idle || tp->t_flags & TF_NODELAY) && + len + off >= SBUF_LEN(&so->so_snd)) + goto send; + if (tp->t_force) + goto send; + if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) + goto send; + if (SEQ_LT(tp->snd_nxt, tp->snd_max)) + goto send; + } + + /* + * Compare available window to amount of window + * known to peer (as advertised window less + * next expected input). If the difference is at least two + * max size segments, or at least 50% of the maximum possible + * window, then want to send a window update to peer. + */ + if (win > 0) + { + /* + * "adv" is the amount we can increase the window, + * taking into account that we are limited by + * TCP_MAXWIN << tp->rcv_scale. + */ + long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale); + if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) + adv -= tp->rcv_adv - tp->rcv_nxt; + + if (adv >= (long) (2 * tp->t_maxseg)) + goto send; + if (2 * adv >= (long) SBUF_SIZE(&so->so_rcv)) + goto send; + } + + /* + * Send if we owe peer an ACK. + */ + if (tp->t_flags & TF_ACKNOW) + goto send; + if (flags & (TH_SYN|TH_RST)) + goto send; + if (SEQ_GT(tp->snd_up, tp->snd_una)) + goto send; + /* + * If our state indicates that FIN should be sent + * and we have not yet done so, or we're retransmitting the FIN, + * then we need to send. + */ + if ( flags & TH_FIN + && ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) + goto send; + + /* + * TCP window updates are not reliable, rather a polling protocol + * using ``persist'' packets is used to insure receipt of window + * updates. The three ``states'' for the output side are: + * idle not doing retransmits or persists + * persisting to move a small or zero window + * (re)transmitting and thereby not persisting + * + * tp->t_timer[TCPT_PERSIST] + * is set when we are in persist state. + * tp->t_force + * is set when we are called to send a persist packet. + * tp->t_timer[TCPT_REXMT] + * is set when we are retransmitting + * The output side is idle when both timers are zero. + * + * If send window is too small, there is data to transmit, and no + * retransmit or persist is pending, then go to persist state. + * If nothing happens soon, send when timer expires: + * if window is nonzero, transmit what we can, + * otherwise force out a byte. + */ + if ( SBUF_LEN(&so->so_snd) + && tp->t_timer[TCPT_REXMT] == 0 + && tp->t_timer[TCPT_PERSIST] == 0) + { + tp->t_rxtshift = 0; + tcp_setpersist(tp); + } + + /* + * No reason to send a segment, just return. + */ + tcpstat.tcps_didnuttin++; + + LogFlowFuncLeave(); + return (0); + +send: + LogFlowFunc(("send\n")); + /* + * Before ESTABLISHED, force sending of initial options + * unless TCP set not to do any options. + * NOTE: we assume that the IP/TCP header plus TCP options + * always fit in a single mbuf, leaving room for a maximum + * link header, i.e. + * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN + */ + optlen = 0; + hdrlen = sizeof (struct tcpiphdr); + if (flags & TH_SYN) + { + tp->snd_nxt = tp->iss; + if ((tp->t_flags & TF_NOOPT) == 0) + { + u_int16_t mss; + + opt[0] = TCPOPT_MAXSEG; + opt[1] = 4; + mss = RT_H2N_U16((u_int16_t) tcp_mss(pData, tp, 0)); + memcpy((caddr_t)(opt + 2), (caddr_t)&mss, sizeof(mss)); + optlen = 4; + +#if 0 + if ( (tp->t_flags & TF_REQ_SCALE) + && ( (flags & TH_ACK) == 0 + || (tp->t_flags & TF_RCVD_SCALE))) + { + *((u_int32_t *) (opt + optlen)) = RT_H2N_U32( TCPOPT_NOP << 24 + | TCPOPT_WINDOW << 16 + | TCPOLEN_WINDOW << 8 + | tp->request_r_scale); + optlen += 4; + } +#endif + } + } + + /* + * Send a timestamp and echo-reply if this is a SYN and our side + * wants to use timestamps (TF_REQ_TSTMP is set) or both our side + * and our peer have sent timestamps in our SYN's. + */ +#if 0 + if ( (tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP + && (flags & TH_RST) == 0 + && ( (flags & (TH_SYN|TH_ACK)) == TH_SYN + || (tp->t_flags & TF_RCVD_TSTMP))) + { + u_int32_t *lp = (u_int32_t *)(opt + optlen); + + /* Form timestamp option as shown in appendix A of RFC 1323. */ + *lp++ = RT_H2N_U32_C(TCPOPT_TSTAMP_HDR); + *lp++ = RT_H2N_U32(tcp_now); + *lp = RT_H2N_U32(tp->ts_recent); + optlen += TCPOLEN_TSTAMP_APPA; + } +#endif + hdrlen += optlen; + + /* + * Adjust data length if insertion of options will + * bump the packet length beyond the t_maxseg length. + */ + if (len > tp->t_maxseg - optlen) + { + len = tp->t_maxseg - optlen; + sendalot = 1; + } + + /* + * Grab a header mbuf, attaching a copy of data to + * be transmitted, and initialize the header from + * the template for sends on this connection. + */ + if (len) + { + if (tp->t_force && len == 1) + tcpstat.tcps_sndprobe++; + else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) + { + tcpstat.tcps_sndrexmitpack++; + tcpstat.tcps_sndrexmitbyte += len; + } + else + { + tcpstat.tcps_sndpack++; + tcpstat.tcps_sndbyte += len; + } + + size = MCLBYTES; + if ((len + hdrlen + ETH_HLEN) < MSIZE) + size = MCLBYTES; + else if ((len + hdrlen + ETH_HLEN) < MCLBYTES) + size = MCLBYTES; + else if((len + hdrlen + ETH_HLEN) < MJUM9BYTES) + size = MJUM9BYTES; + else if ((len + hdrlen + ETH_HLEN) < MJUM16BYTES) + size = MJUM16BYTES; + else + AssertMsgFailed(("Unsupported size")); + m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size); + if (m == NULL) + { +/* error = ENOBUFS; */ + error = 1; + goto out; + } + m->m_data += if_maxlinkhdr; + m->m_pkthdr.header = mtod(m, void *); + m->m_len = hdrlen; + + /* + * This will always succeed, since we make sure our mbufs + * are big enough to hold one MSS packet + header + ... etc. + */ +#if 0 + if (len <= MHLEN - hdrlen - max_linkhdr) + { +#endif + sbcopy(&so->so_snd, off, (int) len, mtod(m, caddr_t) + hdrlen); + m->m_len += len; +#if 0 + } + else + { + m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len); + if (m->m_next == 0) + len = 0; + } +#endif + /* + * If we're sending everything we've got, set PUSH. + * (This will keep happy those implementations which only + * give data to the user when a buffer fills or + * a PUSH comes in.) + */ + if (off + len == (ssize_t)SBUF_LEN(&so->so_snd)) + flags |= TH_PUSH; + } + else + { + bool fUninitializedTemplate = false; + if (tp->t_flags & TF_ACKNOW) + tcpstat.tcps_sndacks++; + else if (flags & (TH_SYN|TH_FIN|TH_RST)) + tcpstat.tcps_sndctrl++; + else if (SEQ_GT(tp->snd_up, tp->snd_una)) + tcpstat.tcps_sndurg++; + else + tcpstat.tcps_sndwinup++; + + if ((hdrlen + ETH_HLEN) < MSIZE) + { + size = MCLBYTES; + } + else if ((hdrlen + ETH_HLEN) < MCLBYTES) + { + size = MCLBYTES; + } + else if((hdrlen + ETH_HLEN) < MJUM9BYTES) + { + size = MJUM9BYTES; + } + else if ((hdrlen + ETH_HLEN) < MJUM16BYTES) + { + size = MJUM16BYTES; + } + else + { + AssertMsgFailed(("Unsupported size")); + } + m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size); + if (m == NULL) + { +/* error = ENOBUFS; */ + error = 1; + goto out; + } + m->m_data += if_maxlinkhdr; + m->m_pkthdr.header = mtod(m, void *); + m->m_len = hdrlen; + /* + * Uninitialized TCP template looks very suspicious at this processing state, thus why we have + * to workaround the problem till right fix. Warning appears once at release log. + */ + fUninitializedTemplate = RT_BOOL(( tp->t_template.ti_src.s_addr == INADDR_ANY + || tp->t_template.ti_dst.s_addr == INADDR_ANY)); +#ifndef DEBUG_vvl + if (fUninitializedTemplate) + { + static bool fWarn; + tcp_template(tp); + if(!fWarn) + { + LogRel(("NAT: TCP: TCP template was created forcely from socket information\n")); + fWarn = true; + } + } +#else + Assert((!fUninitializedTemplate)); +#endif + } + + ti = mtod(m, struct tcpiphdr *); + + memcpy((caddr_t)ti, &tp->t_template, sizeof (struct tcpiphdr)); + + /* + * Fill in fields, remembering maximum advertised + * window for use in delaying messages about window sizes. + * If resending a FIN, be sure not to use a new sequence number. + */ + if ( flags & TH_FIN + && tp->t_flags & TF_SENTFIN + && tp->snd_nxt == tp->snd_max) + tp->snd_nxt--; + /* + * If we are doing retransmissions, then snd_nxt will + * not reflect the first unsent octet. For ACK only + * packets, we do not want the sequence number of the + * retransmitted packet, we want the sequence number + * of the next unsent octet. So, if there is no data + * (and no SYN or FIN), use snd_max instead of snd_nxt + * when filling in ti_seq. But if we are in persist + * state, snd_max might reflect one byte beyond the + * right edge of the window, so use snd_nxt in that + * case, since we know we aren't doing a retransmission. + * (retransmit and persist are mutually exclusive...) + */ + if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST]) + ti->ti_seq = RT_H2N_U32(tp->snd_nxt); + else + ti->ti_seq = RT_H2N_U32(tp->snd_max); + ti->ti_ack = RT_H2N_U32(tp->rcv_nxt); + if (optlen) + { + memcpy((caddr_t)(ti + 1), (caddr_t)opt, optlen); + ti->ti_off = (uint8_t)((sizeof (struct tcphdr) + optlen) >> 2); + } + ti->ti_flags = flags; + /* + * Calculate receive window. Don't shrink window, + * but avoid silly window syndrome. + */ + if (win < (long)(SBUF_SIZE(&so->so_rcv) / 4) && win < (long)tp->t_maxseg) + win = 0; + if (win > (long)TCP_MAXWIN << tp->rcv_scale) + win = (long)TCP_MAXWIN << tp->rcv_scale; + if (win < (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt)) + win = (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt); + ti->ti_win = RT_H2N_U16((u_int16_t) (win>>tp->rcv_scale)); + +#if 0 + if (SEQ_GT(tp->snd_up, tp->snd_nxt)) + { + ti->ti_urp = RT_H2N_U16((u_int16_t)(tp->snd_up - tp->snd_nxt)); +#else + if (SEQ_GT(tp->snd_up, tp->snd_una)) + { + ti->ti_urp = RT_H2N_U16((u_int16_t)(tp->snd_up - RT_N2H_U32(ti->ti_seq))); +#endif + ti->ti_flags |= TH_URG; + } + else + /* + * If no urgent pointer to send, then we pull + * the urgent pointer to the left edge of the send window + * so that it doesn't drift into the send window on sequence + * number wraparound. + */ + tp->snd_up = tp->snd_una; /* drag it along */ + + /* + * Put TCP length in extended header, and then + * checksum extended header and data. + */ + if (len + optlen) + ti->ti_len = RT_H2N_U16((u_int16_t)(sizeof (struct tcphdr) + + optlen + len)); + ti->ti_sum = cksum(m, (int)(hdrlen + len)); + + /* + * In transmit state, time the transmission and arrange for + * the retransmit. In persist state, just set snd_max. + */ + if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) + { + tcp_seq startseq = tp->snd_nxt; + + /* + * Advance snd_nxt over sequence space of this segment. + */ + if (flags & (TH_SYN|TH_FIN)) + { + if (flags & TH_SYN) + tp->snd_nxt++; + if (flags & TH_FIN) + { + tp->snd_nxt++; + tp->t_flags |= TF_SENTFIN; + } + } + tp->snd_nxt += len; + if (SEQ_GT(tp->snd_nxt, tp->snd_max)) + { + tp->snd_max = tp->snd_nxt; + /* + * Time this transmission if not a retransmission and + * not currently timing anything. + */ + if (tp->t_rtt == 0) + { + tp->t_rtt = 1; + tp->t_rtseq = startseq; + tcpstat.tcps_segstimed++; + } + } + + /* + * Set retransmit timer if not currently set, + * and not doing an ack or a keep-alive probe. + * Initial value for retransmit timer is smoothed + * round-trip time + 2 * round-trip time variance. + * Initialize shift counter which is used for backoff + * of retransmit time. + */ + if ( tp->t_timer[TCPT_REXMT] == 0 + && tp->snd_nxt != tp->snd_una) + { + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + if (tp->t_timer[TCPT_PERSIST]) + { + tp->t_timer[TCPT_PERSIST] = 0; + tp->t_rxtshift = 0; + } + } + } + else + if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) + tp->snd_max = tp->snd_nxt + len; + + /* + * Fill in IP length and desired time to live and + * send to IP level. There should be a better way + * to handle ttl and tos; we could keep them in + * the template, but need a way to checksum without them. + */ + M_ASSERTPKTHDR(m); + m->m_pkthdr.header = mtod(m, void *); + m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */ + + { + ((struct ip *)ti)->ip_len = m->m_len; + ((struct ip *)ti)->ip_ttl = ip_defttl; + ((struct ip *)ti)->ip_tos = so->so_iptos; + + /* #if BSD >= 43 */ + /* Don't do IP options... */ +#if 0 + error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, + so->so_options & SO_DONTROUTE, 0); +#endif + error = ip_output(pData, so, m); + +#if 0 +/* #else */ + error = ip_output(m, (struct mbuf *)0, &tp->t_inpcb->inp_route, + so->so_options & SO_DONTROUTE); +/* #endif */ +#endif + } + if (error) + { +out: +#if 0 + if (error == ENOBUFS) + { + tcp_quench(tp->t_inpcb, 0); + return (0); + } + + if ( ( error == EHOSTUNREACH + || error == ENETDOWN) + && TCPS_HAVERCVDSYN(tp->t_state)) + { + tp->t_softerror = error; + return (0); + } +#endif + if (m != NULL) + m_freem(pData, m); + return (error); + } + tcpstat.tcps_sndtotal++; + + /* + * Data sent (as far as we can tell). + * If this advertises a larger window than any other segment, + * then remember the size of the advertised window. + * Any pending ACK has now been sent. + */ + if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) + tp->rcv_adv = tp->rcv_nxt + win; + tp->last_ack_sent = tp->rcv_nxt; + tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); + if (sendalot) + goto again; + + return (0); +} + +void +tcp_setpersist(struct tcpcb *tp) +{ + int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; + +#if 0 + if (tp->t_timer[TCPT_REXMT]) + panic("tcp_output REXMT"); +#endif + /* + * Start/restart persistence timer. + */ + TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], + t * tcp_backoff[tp->t_rxtshift], + TCPTV_PERSMIN, TCPTV_PERSMAX); + if (tp->t_rxtshift < TCP_MAXRXTSHIFT) + tp->t_rxtshift++; +} diff --git a/src/VBox/Devices/Network/slirp/tcp_subr.c b/src/VBox/Devices/Network/slirp/tcp_subr.c new file mode 100644 index 00000000..c7820488 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/tcp_subr.c @@ -0,0 +1,654 @@ +/* $Id: tcp_subr.c $ */ +/** @file + * NAT - TCP support. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 + * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> + + +/* + * Tcp initialization + */ +void +tcp_init(PNATState pData) +{ + tcp_iss = 1; /* wrong */ + tcb.so_next = tcb.so_prev = &tcb; + tcp_last_so = &tcb; + tcp_reass_maxqlen = 48; + tcp_reass_maxseg = 256; +} + +/* + * Create template to be used to send tcp packets on a connection. + * Call after host entry created, fills + * in a skeletal tcp/ip header, minimizing the amount of work + * necessary when the connection is used. + */ +/* struct tcpiphdr * */ +void +tcp_template(struct tcpcb *tp) +{ + struct socket *so = tp->t_socket; + register struct tcpiphdr *n = &tp->t_template; + + memset(n->ti_x1, 0, 9); + n->ti_pr = IPPROTO_TCP; + n->ti_len = RT_H2N_U16(sizeof (struct tcpiphdr) - sizeof (struct ip)); + n->ti_src = so->so_faddr; + n->ti_dst = so->so_laddr; + n->ti_sport = so->so_fport; + n->ti_dport = so->so_lport; + + n->ti_seq = 0; + n->ti_ack = 0; + n->ti_x2 = 0; + n->ti_off = 5; + n->ti_flags = 0; + n->ti_win = 0; + n->ti_sum = 0; + n->ti_urp = 0; +} + +/* + * Send a single message to the TCP at address specified by + * the given TCP/IP header. If m == 0, then we make a copy + * of the tcpiphdr at ti and send directly to the addressed host. + * This is used to force keep alive messages out using the TCP + * template for a connection tp->t_template. If flags are given + * then we send a message back to the TCP which originated the + * segment ti, and discard the mbuf containing it and any other + * attached mbufs. + * + * In any case the ack and sequence number of the transmitted + * segment are as specified by the parameters. + */ +void +tcp_respond(PNATState pData, struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, tcp_seq ack, tcp_seq seq, int flags) +{ + register int tlen; + + LogFlowFunc(("ENTER: tp = %R[tcpcb793], ti = %p, m = %p, ack = %u, seq = %u, flags = %x\n", tp, ti, m, ack, seq, flags)); + + if (m == 0) + { + if ((m = m_gethdr(pData, M_DONTWAIT, MT_HEADER)) == NULL) + return; +#ifdef TCP_COMPAT_42 + tlen = 1; +#else + tlen = 0; +#endif + m->m_data += if_maxlinkhdr; + m->m_pkthdr.header = mtod(m, void *); + *mtod(m, struct tcpiphdr *) = *ti; + ti = mtod(m, struct tcpiphdr *); + flags = TH_ACK; + } + else + { + /* + * ti points into m so the next line is just making + * the mbuf point to ti + */ + m->m_data = (caddr_t)ti; + + m->m_len = sizeof (struct tcpiphdr); + tlen = 0; +#define xchg(a,b,type) { type t; t = a; a = b; b = t; } + xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_int32_t); + xchg(ti->ti_dport, ti->ti_sport, u_int16_t); +#undef xchg + } + ti->ti_len = RT_H2N_U16((u_short)(sizeof (struct tcphdr) + tlen)); + tlen += sizeof (struct tcpiphdr); + m->m_len = tlen; + + memset(ti->ti_x1, 0, 9); + ti->ti_seq = RT_H2N_U32(seq); + ti->ti_ack = RT_H2N_U32(ack); + ti->ti_x2 = 0; + ti->ti_off = sizeof (struct tcphdr) >> 2; + ti->ti_flags = flags; + if (tp) + { + int win = sbspace(&tp->t_socket->so_rcv); + ti->ti_win = RT_H2N_U16((u_int16_t) (win >> tp->rcv_scale)); + } + else + ti->ti_win = 0; + ti->ti_urp = 0; + ti->ti_sum = 0; + ti->ti_sum = cksum(m, tlen); + ((struct ip *)ti)->ip_len = tlen; + + if(flags & TH_RST) + ((struct ip *)ti)->ip_ttl = MAXTTL; + else + ((struct ip *)ti)->ip_ttl = ip_defttl; + + (void) ip_output(pData, (struct socket *)0, m); +} + +/* + * Create a new TCP control block, making an + * empty reassembly queue and hooking it to the argument + * protocol control block. + */ +struct tcpcb * +tcp_newtcpcb(PNATState pData, struct socket *so) +{ + register struct tcpcb *tp; + + tp = (struct tcpcb *)RTMemAllocZ(sizeof(*tp)); + if (tp == NULL) + return ((struct tcpcb *)0); + + tp->t_maxseg = tcp_mssdflt; + + tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0; + tp->t_socket = so; + + /* + * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no + * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives + * reasonable initial retransmit time. + */ + tp->t_srtt = TCPTV_SRTTBASE; + tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << 2; + tp->t_rttmin = TCPTV_MIN; + + TCPT_RANGESET(tp->t_rxtcur, + ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, + TCPTV_MIN, TCPTV_REXMTMAX); + + tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; + tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; + TCP_STATE_SWITCH_TO(tp, TCPS_CLOSED); + + so->so_tcpcb = tp; + so->so_type = IPPROTO_TCP; + + return (tp); +} + +/* + * Drop a TCP connection, reporting + * the specified error. If connection is synchronized, + * then send a RST to peer. + */ +struct tcpcb *tcp_drop(PNATState pData, struct tcpcb *tp, int err) +{ +/* tcp_drop(tp, errno) + register struct tcpcb *tp; + int errno; +{ +*/ + int fUninitializedTemplate = 0; +#ifndef LOG_ENABLED + NOREF(err); +#endif + LogFlowFunc(("ENTER: tp = %R[tcpcb793], errno = %d\n", tp, err)); + fUninitializedTemplate = RT_BOOL(( tp + && ( tp->t_template.ti_src.s_addr == INADDR_ANY + || tp->t_template.ti_dst.s_addr == INADDR_ANY))); + + if ( TCPS_HAVERCVDSYN(tp->t_state) + && !fUninitializedTemplate) + { + TCP_STATE_SWITCH_TO(tp, TCPS_CLOSED); + (void) tcp_output(pData, tp); + tcpstat.tcps_drops++; + } + else + tcpstat.tcps_conndrops++; +#if 0 + if (errno == ETIMEDOUT && tp->t_softerror) + errno = tp->t_softerror; + + so->so_error = errno; +#endif + return (tcp_close(pData, tp)); +} + +/* + * Close a TCP control block: + * discard all space held by the tcp + * discard internet protocol block + * wake up any sleepers + */ +struct tcpcb * +tcp_close(PNATState pData, register struct tcpcb *tp) +{ + struct socket *so = tp->t_socket; + + struct tseg_qent *te = NULL; + LogFlowFunc(("ENTER: tp = %R[tcpcb793]\n", tp)); + /*XXX: freeing the reassembly queue */ + while (!LIST_EMPTY(&tp->t_segq)) + { + te = LIST_FIRST(&tp->t_segq); + LIST_REMOVE(te, tqe_q); + m_freem(pData, te->tqe_m); + RTMemFree(te); + tcp_reass_qsize--; + } + RTMemFree(tp); + so->so_tcpcb = 0; + soisfdisconnected(so); + /* clobber input socket cache if we're closing the cached connection */ + if (so == tcp_last_so) + tcp_last_so = &tcb; + if (so->s != -1) + closesocket(so->s); + /* Avoid double free if the socket is listening and therefore doesn't have + * any sbufs reserved. */ + if (!(so->so_state & SS_FACCEPTCONN)) + { + sbfree(&so->so_rcv); + sbfree(&so->so_snd); + } + sofree(pData, so); + SOCKET_UNLOCK(so); + tcpstat.tcps_closed++; + return ((struct tcpcb *)0); +} + +void +tcp_drain(void) +{ + /* XXX */ +} + +/* + * When a source quench is received, close congestion window + * to one segment. We will gradually open it again as we proceed. + */ + +#if 0 + +void +tcp_quench(i, int errno) +{ + struct tcpcb *tp = intotcpcb(inp); + + if (tp) + tp->snd_cwnd = tp->t_maxseg; +} + +#endif + +/* + * TCP protocol interface to socket abstraction. + */ + +/* + * User issued close, and wish to trail through shutdown states: + * if never received SYN, just forget it. If got a SYN from peer, + * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. + * If already got a FIN from peer, then almost done; go to LAST_ACK + * state. In all other cases, have already sent FIN to peer (e.g. + * after PRU_SHUTDOWN), and just have to play tedious game waiting + * for peer to send FIN or not respond to keep-alives, etc. + * We can let the user exit from the close as soon as the FIN is acked. + */ +void +tcp_sockclosed(PNATState pData, struct tcpcb *tp) +{ + LogFlowFunc(("ENTER: tp = %R[tcpcb793]\n", tp)); + LogFunc(("tp->t_socket:%R[natsock]\n",tp->t_socket)); + + switch (tp->t_state) + { + case TCPS_CLOSED: + case TCPS_LISTEN: + case TCPS_SYN_SENT: + TCP_STATE_SWITCH_TO(tp, TCPS_CLOSED); + tp = tcp_close(pData, tp); + break; + + case TCPS_SYN_RECEIVED: + case TCPS_ESTABLISHED: + TCP_STATE_SWITCH_TO(tp, TCPS_FIN_WAIT_1); + break; + + case TCPS_CLOSE_WAIT: + TCP_STATE_SWITCH_TO(tp, TCPS_LAST_ACK); + break; + } +/* soisfdisconnecting(tp->t_socket); */ + if ( tp + && tp->t_state >= TCPS_FIN_WAIT_2) + soisfdisconnected(tp->t_socket); + /* + * (vasily) there're situations when the FIN or FIN,ACK are lost (Windows host) + * and retransmitting keeps VBox busy on sending closing sequences *very* frequent, + * easting a lot of CPU. To avoid this we don't sent on sockets marked as closed + * (see slirp.c for details about setting so_close member). + */ + if ( tp + && tp->t_socket + && !tp->t_socket->so_close) + tcp_output(pData, tp); +} + +/* + * Connect to a host on the Internet + * Called by tcp_input + * Only do a connect, the tcp fields will be set in tcp_input + * return 0 if there's a result of the connect, + * else return -1 means we're still connecting + * The return value is almost always -1 since the socket is + * nonblocking. Connect returns after the SYN is sent, and does + * not wait for ACK+SYN. + */ +int tcp_fconnect(PNATState pData, struct socket *so) +{ + int ret = 0; + + LogFlowFunc(("ENTER: so = %R[natsock]\n", so)); + + if ((ret = so->s = socket(AF_INET, SOCK_STREAM, 0)) >= 0) + { + int opt, s = so->s; + struct sockaddr_in addr; + + fd_nonblock(s); + + opt = 1; + setsockopt(s, SOL_SOCKET, SO_OOBINLINE, (char *)&opt, sizeof(opt)); + opt = 1; + setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt)); + + ret = sobind(pData, so); + if (ret != 0) + return ret; + + addr.sin_family = AF_INET; + if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr) + { + /* It's an alias */ + switch(RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask) + { + case CTL_DNS: + /* + * TCP DNS proxy. We only support "forwarding" to + * single server. We don't have infrastructure in + * place to re-try connections to other servers. + */ + if ( pData->fUseDnsProxy + && so->so_fport == RT_H2N_U16_C(53)) + { + struct dns_entry *ns = TAILQ_LAST(&pData->pDnsList, dns_list_head); + if (ns != NULL) + { + addr.sin_addr = ns->de_addr; + break; + } + } + RT_FALL_THRU(); + case CTL_ALIAS: + default: + addr.sin_addr = loopback_addr; + break; + } + } + else + addr.sin_addr = so->so_faddr; + addr.sin_port = so->so_fport; + + Log2(("NAT: tcp connect to %RTnaipv4:%d\n", + addr.sin_addr.s_addr, RT_N2H_U16(addr.sin_port))); + + ret = connect(s,(struct sockaddr *)&addr,sizeof (addr)); + + /* + * If it's not in progress, it failed, so we just return 0, + * without clearing SS_NOFDREF + */ + soisfconnecting(so); + } + + return(ret); +} + +/* + * Accept the socket and connect to the local-host + * + * We have a problem. The correct thing to do would be + * to first connect to the local-host, and only if the + * connection is accepted, then do an accept() here. + * But, a) we need to know who's trying to connect + * to the socket to be able to SYN the local-host, and + * b) we are already connected to the foreign host by + * the time it gets to accept(), so... We simply accept + * here and SYN the local-host. + */ +void +tcp_connect(PNATState pData, struct socket *inso) +{ + struct socket *so; + struct sockaddr_in addr; + socklen_t addrlen = sizeof(struct sockaddr_in); + struct tcpcb *tp; + int s, opt; + int status; + socklen_t optlen; + static int cVerbose = 1; + + LogFlowFunc(("ENTER: inso = %R[natsock]\n", inso)); + + if ( inso->so_laddr.s_addr == INADDR_ANY /* delayed port-forwarding? */ + && pData->guest_addr_guess.s_addr == INADDR_ANY) + { + LogRel2(("NAT: Port-forward: guest address unknown for %R[natsock]\n", inso)); + closesocket(accept(inso->s, NULL, NULL)); + if (inso->so_state & SS_FACCEPTONCE) + tcp_close(pData, sototcpcb(inso)); + return; + } + + /* + * If it's an SS_ACCEPTONCE socket, no need to socreate() + * another socket, just use the accept() socket. + */ + if (inso->so_state & SS_FACCEPTONCE) + { + /* FACCEPTONCE already have a tcpcb */ + so = inso; + } + else + { + if ((so = socreate()) == NULL) + { + /* If it failed, get rid of the pending connection */ + closesocket(accept(inso->s,(struct sockaddr *)&addr,&addrlen)); + return; + } + if (tcp_attach(pData, so) < 0) + { + RTMemFree(so); /* NOT sofree */ + return; + } + so->so_laddr = inso->so_laddr; + so->so_lport = inso->so_lport; + } + + if (so->so_laddr.s_addr == INADDR_ANY) + { + LogRel2(("NAT: Port-forward: using %RTnaipv4 for %R[natsock]\n", + pData->guest_addr_guess.s_addr, inso)); + so->so_laddr = pData->guest_addr_guess; + } + + (void) tcp_mss(pData, sototcpcb(so), 0); + + fd_nonblock(inso->s); + if ((s = accept(inso->s,(struct sockaddr *)&addr,&addrlen)) < 0) + { + tcp_close(pData, sototcpcb(so)); /* This will sofree() as well */ + return; + } + fd_nonblock(s); + opt = 1; + setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)); + opt = 1; + setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int)); + opt = 1; + setsockopt(s, IPPROTO_TCP, TCP_NODELAY,(char *)&opt, sizeof(int)); + + optlen = sizeof(int); + status = getsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, &optlen); + if (status < 0) + { + LogRel(("NAT: Error(%d) while getting RCV capacity\n", errno)); + goto no_sockopt; + } + if (cVerbose > 0) + LogRel(("NAT: Old socket recv size: %dKB\n", opt / 1024)); + /** @todo (r-vvl) make it configurable (via extra data) */ + opt = pData->socket_rcv; + status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int)); + if (status < 0) + { + LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt)); + goto no_sockopt; + } + optlen = sizeof(int); + status = getsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, &optlen); + if (status < 0) + { + LogRel(("NAT: Error(%d) while getting SND capacity\n", errno)); + goto no_sockopt; + } + if (cVerbose > 0) + LogRel(("NAT: Old socket send size: %dKB\n", opt / 1024)); + opt = pData->socket_rcv; + status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int)); + if (status < 0) + { + LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt)); + goto no_sockopt; + } + if (cVerbose > 0) + cVerbose--; + + no_sockopt: + so->so_fport = addr.sin_port; + so->so_faddr = addr.sin_addr; + /* Translate connections from localhost to the real hostname */ + if (so->so_faddr.s_addr == 0 || so->so_faddr.s_addr == loopback_addr.s_addr) + so->so_faddr = alias_addr; + + /* Close the accept() socket, set right state */ + if (inso->so_state & SS_FACCEPTONCE) + { + closesocket(so->s); /* If we only accept once, close the accept() socket */ + so->so_state = SS_NOFDREF; /* Don't select it yet, even though we have an FD */ + /* if it's not FACCEPTONCE, it's already NOFDREF */ + } + so->s = s; + + tp = sototcpcb(so); + + tcp_template(tp); + + /* Compute window scaling to request. */ +/* while (tp->request_r_scale < TCP_MAX_WINSHIFT + * && (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) + * tp->request_r_scale++; + */ + +/* soisconnecting(so); */ /* NOFDREF used instead */ + tcpstat.tcps_connattempt++; + + TCP_STATE_SWITCH_TO(tp, TCPS_SYN_SENT); + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + tp->iss = tcp_iss; + tcp_iss += TCP_ISSINCR/2; + tcp_sendseqinit(tp); + tcp_output(pData, tp); +} + +/* + * Attach a TCPCB to a socket. + */ +int +tcp_attach(PNATState pData, struct socket *so) +{ + /* We're attaching already attached socket??? */ + Assert(so->so_type == 0); + if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL) + return -1; + + SOCKET_LOCK_CREATE(so); + QSOCKET_LOCK(tcb); + insque(pData, so, &tcb); + NSOCK_INC(); + QSOCKET_UNLOCK(tcb); + return 0; +} diff --git a/src/VBox/Devices/Network/slirp/tcp_timer.c b/src/VBox/Devices/Network/slirp/tcp_timer.c new file mode 100644 index 00000000..5ee67c9e --- /dev/null +++ b/src/VBox/Devices/Network/slirp/tcp_timer.c @@ -0,0 +1,361 @@ +/* $Id: tcp_timer.c $ */ +/** @file + * NAT - TCP timers. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 + * tcp_timer.c,v 1.2 1994/08/02 07:49:10 davidg Exp + */ + +#include <slirp.h> + + +static struct tcpcb *tcp_timers(PNATState pData, register struct tcpcb *tp, int timer); +/* + * Fast timeout routine for processing delayed acks + */ +void +tcp_fasttimo(PNATState pData) +{ + register struct socket *so, *so_next; + register struct tcpcb *tp; + + LogFlowFuncEnter(); + + so = tcb.so_next; + if (so) + QSOCKET_FOREACH (so, so_next, tcp) + /* { */ + if ( (tp = (struct tcpcb *)so->so_tcpcb) + && (tp->t_flags & TF_DELACK)) + { + tp->t_flags &= ~TF_DELACK; + tp->t_flags |= TF_ACKNOW; + tcpstat.tcps_delack++; + TCP_OUTPUT(pData, tp); + } + LOOP_LABEL(tcp, so, so_next); + } +} + +/* + * Tcp protocol timeout routine called every 500 ms. + * Updates the timers in all active tcb's and + * causes finite state machine actions if timers expire. + */ +void +tcp_slowtimo(PNATState pData) +{ + register struct socket *ip, *ipnxt; + register struct tcpcb *tp; + register int i; + + LogFlowFuncEnter(); + + /* + * Search through tcb's and update active timers. + */ + ip = tcb.so_next; + if (ip == 0) + return; + QSOCKET_FOREACH(ip, ipnxt, tcp) + /* { */ + ipnxt = ip->so_next; + tp = sototcpcb(ip); + if (tp == 0) + CONTINUE(tcp); + for (i = 0; i < TCPT_NTIMERS; i++) + { + if (tp->t_timer[i] && --tp->t_timer[i] == 0) + { + tcp_timers(pData, tp, i); + if (ipnxt->so_prev != ip) + goto tpgone; + } + } + tp->t_idle++; + if (tp->t_rtt) + tp->t_rtt++; +tpgone: + ; + LOOP_LABEL(tcp, ip, ipnxt); + } + tcp_iss += TCP_ISSINCR / PR_SLOWHZ; /* increment iss */ +#ifdef TCP_COMPAT_42 + if ((int)tcp_iss < 0) + tcp_iss = 0; /* XXX */ +#endif + tcp_now++; /* for timestamps */ +} + +/* + * Cancel all timers for TCP tp. + */ +void +tcp_canceltimers(struct tcpcb *tp) +{ + register int i; + + for (i = 0; i < TCPT_NTIMERS; i++) + tp->t_timer[i] = 0; +} + +const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = +{ + 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 +}; + +/* + * TCP timer processing. + */ +static struct tcpcb * +tcp_timers(PNATState pData, register struct tcpcb *tp, int timer) +{ + register int rexmt; + int fUninitializedTemplate = 0; + + LogFlowFunc(("ENTER: tp:%R[tcpcb793], timer:%d\n", tp, timer)); + fUninitializedTemplate = RT_BOOL(( tp->t_template.ti_src.s_addr == INADDR_ANY + || tp->t_template.ti_dst.s_addr == INADDR_ANY)); + if (fUninitializedTemplate) + { + tp = tcp_drop(pData, tp, 0); + return tp; + } + + switch (timer) + { + /* + * 2 MSL timeout in shutdown went off. If we're closed but + * still waiting for peer to close and connection has been idle + * too long, or if 2MSL time is up from TIME_WAIT, delete connection + * control block. Otherwise, check again in a bit. + */ + case TCPT_2MSL: + if (tp->t_state != TCPS_TIME_WAIT && + tp->t_idle <= tcp_maxidle) + tp->t_timer[TCPT_2MSL] = tcp_keepintvl; + else + tp = tcp_close(pData, tp); + break; + + /* + * Retransmission timer went off. Message has not + * been acked within retransmit interval. Back off + * to a longer retransmit interval and retransmit one segment. + */ + case TCPT_REXMT: + STAM_COUNTER_INC(&pData->StatTCP_retransmit); + /* + * XXX If a packet has timed out, then remove all the queued + * packets for that session. + */ + if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) + { + /* + * This is a hack to suit our terminal server here at the uni of canberra + * since they have trouble with zeroes... It usually lets them through + * unharmed, but under some conditions, it'll eat the zeros. If we + * keep retransmitting it, it'll keep eating the zeroes, so we keep + * retransmitting, and eventually the connection dies... + * (this only happens on incoming data) + * + * So, if we were gonna drop the connection from too many retransmits, + * don't... instead halve the t_maxseg, which might break up the NULLs and + * let them through + * + * *sigh* + */ + tp->t_maxseg >>= 1; + if (tp->t_maxseg < 32) + { + /* + * We tried our best, now the connection must die! + */ + tp->t_rxtshift = TCP_MAXRXTSHIFT; + tcpstat.tcps_timeoutdrop++; + tp = tcp_drop(pData, tp, tp->t_softerror); + /* tp->t_softerror : ETIMEDOUT); */ /* XXX */ + return (tp); /* XXX */ + } + + /* + * Set rxtshift to 6, which is still at the maximum + * backoff time + */ + tp->t_rxtshift = 6; + } + tcpstat.tcps_rexmttimeo++; + rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; + TCPT_RANGESET(tp->t_rxtcur, rexmt, + (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */ + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + /* + * If losing, let the lower level know and try for + * a better route. Also, if we backed off this far, + * our srtt estimate is probably bogus. Clobber it + * so we'll take the next rtt measurement as our srtt; + * move the current srtt into rttvar to keep the current + * retransmit times until then. + */ + if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) + { +/* in_losing(tp->t_inpcb); */ + tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); + tp->t_srtt = 0; + } + tp->snd_nxt = tp->snd_una; + /* + * If timing a segment in this window, stop the timer. + */ + tp->t_rtt = 0; + /* + * Close the congestion window down to one segment + * (we'll open it by one segment for each ack we get). + * Since we probably have a window's worth of unacked + * data accumulated, this "slow start" keeps us from + * dumping all that data as back-to-back packets (which + * might overwhelm an intermediate gateway). + * + * There are two phases to the opening: Initially we + * open by one mss on each ack. This makes the window + * size increase exponentially with time. If the + * window is larger than the path can handle, this + * exponential growth results in dropped packet(s) + * almost immediately. To get more time between + * drops but still "push" the network to take advantage + * of improving conditions, we switch from exponential + * to linear window opening at some threshold size. + * For a threshold, we use half the current window + * size, truncated to a multiple of the mss. + * + * (the minimum cwnd that will give us exponential + * growth is 2 mss. We don't allow the threshold + * to go below this.) + */ + { + u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; + if (win < 2) + win = 2; + tp->snd_cwnd = tp->t_maxseg; + tp->snd_ssthresh = win * tp->t_maxseg; + tp->t_dupacks = 0; + } + (void) tcp_output(pData, tp); + break; + + /* + * Persistence timer into zero window. + * Force a byte to be output, if possible. + */ + case TCPT_PERSIST: + tcpstat.tcps_persisttimeo++; + tcp_setpersist(tp); + tp->t_force = 1; + (void) tcp_output(pData, tp); + tp->t_force = 0; + break; + + /* + * Keep-alive timer went off; send something + * or drop connection if idle for too long. + */ + case TCPT_KEEP: + tcpstat.tcps_keeptimeo++; + if (tp->t_state < TCPS_ESTABLISHED) + goto dropit; +/* if (tp->t_socket->so_options & SO_KEEPALIVE && */ + if ((so_options) && tp->t_state <= TCPS_CLOSE_WAIT) + { + if (tp->t_idle >= tcp_keepidle + tcp_maxidle) + goto dropit; + /* + * Send a packet designed to force a response + * if the peer is up and reachable: + * either an ACK if the connection is still alive, + * or an RST if the peer has closed the connection + * due to timeout or reboot. + * Using sequence number tp->snd_una-1 + * causes the transmitted zero-length segment + * to lie outside the receive window; + * by the protocol spec, this requires the + * correspondent TCP to respond. + */ + tcpstat.tcps_keepprobe++; +#ifdef TCP_COMPAT_42 + /* + * The keepalive packet must have nonzero length + * to get a 4.2 host to respond. + */ + tcp_respond(tp, &tp->t_template, (struct mbuf *)NULL, + tp->rcv_nxt - 1, tp->snd_una - 1, 0); +#else + tcp_respond(pData, tp, &tp->t_template, (struct mbuf *)NULL, + tp->rcv_nxt, tp->snd_una - 1, 0); +#endif + tp->t_timer[TCPT_KEEP] = tcp_keepintvl; + } + else + tp->t_timer[TCPT_KEEP] = tcp_keepidle; + break; + + dropit: + tcpstat.tcps_keepdrops++; + tp = tcp_drop(pData, tp, 0); /* ETIMEDOUT); */ + break; + } + + return tp; +} diff --git a/src/VBox/Devices/Network/slirp/tcp_timer.h b/src/VBox/Devices/Network/slirp/tcp_timer.h new file mode 100644 index 00000000..c2c03b77 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/tcp_timer.h @@ -0,0 +1,160 @@ +/* $Id: tcp_timer.h $ */ +/** @file + * NAT - TCP timer (declarations/defines). + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 + * tcp_timer.h,v 1.4 1994/08/21 05:27:38 paul Exp + */ + +#ifndef _TCP_TIMER_H_ +#define _TCP_TIMER_H_ + +/* + * Definitions of the TCP timers. These timers are counted + * down PR_SLOWHZ times a second. + */ +#define TCPT_NTIMERS 4 + +#define TCPT_REXMT 0 /* retransmit */ +#define TCPT_PERSIST 1 /* retransmit persistence */ +#define TCPT_KEEP 2 /* keep alive */ +#define TCPT_2MSL 3 /* 2*msl quiet time timer */ + +/* + * The TCPT_REXMT timer is used to force retransmissions. + * The TCP has the TCPT_REXMT timer set whenever segments + * have been sent for which ACKs are expected but not yet + * received. If an ACK is received which advances tp->snd_una, + * then the retransmit timer is cleared (if there are no more + * outstanding segments) or reset to the base value (if there + * are more ACKs expected). Whenever the retransmit timer goes off, + * we retransmit one unacknowledged segment, and do a backoff + * on the retransmit timer. + * + * The TCPT_PERSIST timer is used to keep window size information + * flowing even if the window goes shut. If all previous transmissions + * have been acknowledged (so that there are no retransmissions in progress), + * and the window is too small to bother sending anything, then we start + * the TCPT_PERSIST timer. When it expires, if the window is nonzero, + * we go to transmit state. Otherwise, at intervals send a single byte + * into the peer's window to force him to update our window information. + * We do this at most as often as TCPT_PERSMIN time intervals, + * but no more frequently than the current estimate of round-trip + * packet time. The TCPT_PERSIST timer is cleared whenever we receive + * a window update from the peer. + * + * The TCPT_KEEP timer is used to keep connections alive. If an + * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, + * but not yet established, then we drop the connection. Once the connection + * is established, if the connection is idle for TCPTV_KEEP_IDLE time + * (and keepalives have been enabled on the socket), we begin to probe + * the connection. We force the peer to send us a segment by sending: + * <SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK> + * This segment is (deliberately) outside the window, and should elicit + * an ack segment in response from the peer. If, despite the TCPT_KEEP + * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE + * amount of time probing, then we drop the connection. + */ + +/* + * Time constants. + */ +#define TCPTV_MSL ( 5*PR_SLOWHZ) /* max seg lifetime (hah!) */ + +#define TCPTV_SRTTBASE 0 /* base roundtrip time; + if 0, no idea yet */ +#define TCPTV_SRTTDFLT ( 3*PR_SLOWHZ) /* assumed RTT if no info */ + +#define TCPTV_PERSMIN ( 5*PR_SLOWHZ) /* retransmit persistence */ +#define TCPTV_PERSMAX ( 60*PR_SLOWHZ) /* maximum persist interval */ + +#define TCPTV_KEEP_INIT ( 75*PR_SLOWHZ) /* initial connect keep alive */ +#define TCPTV_KEEP_IDLE (120*60*PR_SLOWHZ) /* dflt time before probing */ +#define TCPTV_KEEPINTVL ( 75*PR_SLOWHZ) /* default probe interval */ +#define TCPTV_KEEPCNT 8 /* max probes before drop */ + +#define TCPTV_MIN ( 1*PR_SLOWHZ) /* minimum allowable value */ +#define TCPTV_REXMTMAX ( 12*PR_SLOWHZ) /* max allowable REXMT value */ + +#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ + +#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ + + +#ifdef TCPTIMERS +char *tcptimers[] = + { "REXMT", "PERSIST", "KEEP", "2MSL" }; +#endif + +/* + * Force a time value to be in a certain range. + */ +#define TCPT_RANGESET(tv, value, tvmin, tvmax) { \ + (tv) = (value); \ + if ((tv) < (tvmin)) \ + (tv) = (tvmin); \ + else if ((tv) > (tvmax)) \ + (tv) = (tvmax); \ +} + +extern const int tcp_backoff[]; + +struct tcpcb; + +void tcp_fasttimo (PNATState); +void tcp_slowtimo (PNATState); +void tcp_canceltimers (struct tcpcb *); +#endif diff --git a/src/VBox/Devices/Network/slirp/tcp_var.h b/src/VBox/Devices/Network/slirp/tcp_var.h new file mode 100644 index 00000000..ae183f6c --- /dev/null +++ b/src/VBox/Devices/Network/slirp/tcp_var.h @@ -0,0 +1,269 @@ +/* $Id: tcp_var.h $ */ +/** @file + * NAT - TCP (declarations). + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_var.h 8.3 (Berkeley) 4/10/94 + * tcp_var.h,v 1.3 1994/08/21 05:27:39 paul Exp + */ + +#ifndef _TCP_VAR_H_ +#define _TCP_VAR_H_ + +#include "queue.h" +#include "tcpip.h" +#include "tcp_timer.h" + +/* TCP segment queue entry */ +struct tseg_qent +{ + LIST_ENTRY(tseg_qent) tqe_q; + int tqe_len; /* TCP segment data length */ + struct tcphdr *tqe_th; /* a pointer to tcp header */ + struct mbuf *tqe_m; /* mbuf contains packet */ +}; +LIST_HEAD(tsegqe_head, tseg_qent); + +/* + * Tcp control block, one per tcp; fields: + */ +struct tcpcb +{ + LIST_ENTRY(tcpcb) t_list; + struct tsegqe_head t_segq; /* segment reassembly queue */ + int t_segqlen; /* segment reassembly queue length */ + int16_t t_state; /* state of this connection */ + int16_t t_timer[TCPT_NTIMERS]; /* tcp timers */ + int16_t t_rxtshift; /* log(2) of rexmt exp. backoff */ + int16_t t_rxtcur; /* current retransmit value */ + int16_t t_dupacks; /* consecutive dup acks recd */ + uint16_t t_maxseg; /* maximum segment size */ + char t_force; /* 1 if forcing out a byte */ + uint16_t t_flags; +#define TF_ACKNOW 0x0001 /* ack peer immediately */ +#define TF_DELACK 0x0002 /* ack, but try to delay it */ +#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ +#define TF_NOOPT 0x0008 /* don't use tcp options */ +#define TF_SENTFIN 0x0010 /* have sent FIN */ +#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ +#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ +#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ +#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ +#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ + + /* Make it static for now */ +/* struct tcpiphdr *t_template; / * skeletal packet for transmit */ + struct tcpiphdr t_template; + + struct socket *t_socket; /* back pointer to socket */ +/* + * The following fields are used as in the protocol specification. + * See RFC783, Dec. 1981, page 21. + */ + /* send sequence variables */ + tcp_seq snd_una; /* send unacknowledged */ + tcp_seq snd_nxt; /* send next */ + tcp_seq snd_up; /* send urgent pointer */ + tcp_seq snd_wl1; /* window update seg seq number */ + tcp_seq snd_wl2; /* window update seg ack number */ + tcp_seq iss; /* initial send sequence number */ + uint32_t snd_wnd; /* send window */ + /* receive sequence variables */ + uint32_t rcv_wnd; /* receive window */ + tcp_seq rcv_nxt; /* receive next */ + tcp_seq rcv_up; /* receive urgent pointer */ + tcp_seq irs; /* initial receive sequence number */ +/* + * Additional variables for this implementation. + */ + /* receive variables */ + tcp_seq rcv_adv; /* advertised window */ + /* retransmit variables */ + tcp_seq snd_max; /* highest sequence number sent; + * used to recognize retransmits + */ + /* congestion control (for slow start, source quench, retransmit after loss) */ + uint32_t snd_cwnd; /* congestion-controlled window */ + uint32_t snd_ssthresh; /* snd_cwnd size threshold for + * for slow start exponential to + * linear switch + */ +/* + * transmit timing stuff. See below for scale of srtt and rttvar. + * "Variance" is actually smoothed difference. + */ + int16_t t_idle; /* inactivity time */ + int16_t t_rtt; /* round trip time */ + tcp_seq t_rtseq; /* sequence number being timed */ + int16_t t_srtt; /* smoothed round-trip time */ + int16_t t_rttvar; /* variance in round-trip time */ + uint16_t t_rttmin; /* minimum rtt allowed */ + uint32_t max_sndwnd; /* largest window peer has offered */ + +/* out-of-band data */ + char t_oobflags; /* have some */ + char t_iobc; /* input character */ +#define TCPOOB_HAVEDATA 0x01 +#define TCPOOB_HADDATA 0x02 + short t_softerror; /* possible error not yet reported */ + +/* RFC 1323 variables */ + uint8_t snd_scale; /* window scaling for send window */ + uint8_t rcv_scale; /* window scaling for recv window */ + uint8_t request_r_scale; /* pending window scaling */ + uint8_t requested_s_scale; + uint32_t ts_recent; /* timestamp echo data */ + uint32_t ts_recent_age; /* when last updated */ + tcp_seq last_ack_sent; +}; + +LIST_HEAD(tcpcbhead, tcpcb); + +#define sototcpcb(so) ((so)->so_tcpcb) + +/* + * The smoothed round-trip time and estimated variance + * are stored as fixed point numbers scaled by the values below. + * For convenience, these scales are also used in smoothing the average + * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). + * With these scales, srtt has 3 bits to the right of the binary point, + * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the + * binary point, and is smoothed with an ALPHA of 0.75. + */ +#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */ +#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ +#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */ +#define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ + +/* + * The initial retransmission should happen at rtt + 4 * rttvar. + * Because of the way we do the smoothing, srtt and rttvar + * will each average +1/2 tick of bias. When we compute + * the retransmit timer, we want 1/2 tick of rounding and + * 1 extra tick because of +-1/2 tick uncertainty in the + * firing of the timer. The bias will give us exactly the + * 1.5 tick we need. But, because the bias is + * statistical, we have to test that we don't drop below + * the minimum feasible timer (which is 2 ticks). + * This macro assumes that the value of TCP_RTTVAR_SCALE + * is the same as the multiplier for rttvar. + */ +#define TCP_REXMTVAL(tp) \ + (((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) + +/* + * TCP statistics. + * Many of these should be kept per connection, + * but that's inconvenient at the moment. + */ +struct tcpstat_t +{ + u_long tcps_connattempt; /* connections initiated */ + u_long tcps_accepts; /* connections accepted */ + u_long tcps_connects; /* connections established */ + u_long tcps_drops; /* connections dropped */ + u_long tcps_conndrops; /* embryonic connections dropped */ + u_long tcps_closed; /* conn. closed (includes drops) */ + u_long tcps_segstimed; /* segs where we tried to get rtt */ + u_long tcps_rttupdated; /* times we succeeded */ + u_long tcps_delack; /* delayed acks sent */ + u_long tcps_timeoutdrop; /* conn. dropped in rxmt timeout */ + u_long tcps_rexmttimeo; /* retransmit timeouts */ + u_long tcps_persisttimeo; /* persist timeouts */ + u_long tcps_keeptimeo; /* keepalive timeouts */ + u_long tcps_keepprobe; /* keepalive probes sent */ + u_long tcps_keepdrops; /* connections dropped in keepalive */ + + u_long tcps_sndtotal; /* total packets sent */ + u_long tcps_sndpack; /* data packets sent */ + u_long tcps_sndbyte; /* data bytes sent */ + u_long tcps_sndrexmitpack; /* data packets retransmitted */ + u_long tcps_sndrexmitbyte; /* data bytes retransmitted */ + u_long tcps_sndacks; /* ack-only packets sent */ + u_long tcps_sndprobe; /* window probes sent */ + u_long tcps_sndurg; /* packets sent with URG only */ + u_long tcps_sndwinup; /* window update-only packets sent */ + u_long tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */ + + u_long tcps_rcvtotal; /* total packets received */ + u_long tcps_rcvpack; /* packets received in sequence */ + u_long tcps_rcvbyte; /* bytes received in sequence */ + u_long tcps_rcvbadsum; /* packets received with ccksum errs */ + u_long tcps_rcvbadoff; /* packets received with bad offset */ +/* u_long tcps_rcvshort; */ /* packets received too short */ + u_long tcps_rcvduppack; /* duplicate-only packets received */ + u_long tcps_rcvdupbyte; /* duplicate-only bytes received */ + u_long tcps_rcvpartduppack; /* packets with some duplicate data */ + u_long tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */ + u_long tcps_rcvoopack; /* out-of-order packets received */ + u_long tcps_rcvoobyte; /* out-of-order bytes received */ + u_long tcps_rcvpackafterwin; /* packets with data after window */ + u_long tcps_rcvbyteafterwin; /* bytes rcvd after window */ + u_long tcps_rcvafterclose; /* packets rcvd after "close" */ + u_long tcps_rcvwinprobe; /* rcvd window probe packets */ + u_long tcps_rcvdupack; /* rcvd duplicate acks */ + u_long tcps_rcvacktoomuch; /* rcvd acks for unsent data */ + u_long tcps_rcvackpack; /* rcvd ack packets */ + u_long tcps_rcvackbyte; /* bytes acked by rcvd acks */ + u_long tcps_rcvwinupd; /* rcvd window update packets */ +/* u_long tcps_pawsdrop; */ /* segments dropped due to PAWS */ + u_long tcps_predack; /* times hdr predict ok for acks */ + u_long tcps_preddat; /* times hdr predict ok for data pkts */ + u_long tcps_socachemiss; /* tcp_last_so misses */ + u_long tcps_didnuttin; /* Times tcp_output didn't do anything XXX */ + u_long tcps_rcvmemdrop; +}; + +#endif diff --git a/src/VBox/Devices/Network/slirp/tcpip.h b/src/VBox/Devices/Network/slirp/tcpip.h new file mode 100644 index 00000000..fc6c3b08 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/tcpip.h @@ -0,0 +1,102 @@ +/* $Id: tcpip.h $ */ +/** @file + * NAT - TCP/IP (declarations/defines). + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcpip.h 8.1 (Berkeley) 6/10/93 + * tcpip.h,v 1.3 1994/08/21 05:27:40 paul Exp + */ + +#ifndef _TCPIP_H_ +#define _TCPIP_H_ + +/* + * Tcp+ip header, after ip options removed. + */ +struct tcpiphdr +{ + struct ipovly ti_i; /* overlaid ip structure */ + struct tcphdr ti_t; /* tcp header */ +}; +AssertCompileSize(struct tcpiphdr, 40); +#define ti_next ti_i.ih_next +#define ti_prev ti_i.ih_prev +#define ti_x1 ti_i.ih_x1 +#define ti_pr ti_i.ih_pr +#define ti_len ti_i.ih_len +#define ti_src ti_i.ih_src +#define ti_dst ti_i.ih_dst +#define ti_sport ti_t.th_sport +#define ti_dport ti_t.th_dport +#define ti_seq ti_t.th_seq +#define ti_ack ti_t.th_ack +#define ti_x2 ti_t.th_x2 +#define ti_off ti_t.th_off +#define ti_flags ti_t.th_flags +#define ti_win ti_t.th_win +#define ti_sum ti_t.th_sum +#define ti_urp ti_t.th_urp + +/* + * Just a clean way to get to the first byte + * of the packet + */ +struct tcpiphdr_2 +{ + struct tcpiphdr dummy; + char first_char; +}; + +#endif diff --git a/src/VBox/Devices/Network/slirp/tftp.c b/src/VBox/Devices/Network/slirp/tftp.c new file mode 100644 index 00000000..6bf4c5b7 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/tftp.c @@ -0,0 +1,931 @@ +/* $Id: tftp.c $ */ +/** @file + * NAT - TFTP server. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * tftp.c - a simple, read-only tftp server for qemu + * + * Copyright (c) 2004 Magnus Damm <damm@opensource.se> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <slirp.h> +#include <iprt/file.h> +#include <iprt/err.h> +#include <iprt/path.h> + +typedef enum ENMTFTPSESSIONFMT +{ + TFTPFMT_NONE = 0, + TFTPFMT_OCTET, + TFTPFMT_NETASCII, + TFTPFMT_MAIL, + TFTPFMT_NOT_FMT = 0xffff +} ENMTFTPSESSIONFMT; + +typedef struct TFPTPSESSIONOPTDESC +{ + int fRequested; + uint64_t u64Value; +} TFPTPSESSIONOPTDESC, *PTFPTPSESSIONOPTDESC; + +typedef struct TFTPSESSION +{ + int fInUse; + struct in_addr IpClientAddress; + uint16_t u16ClientPort; + int iTimestamp; + uint64_t cbTransfered; + uint16_t cTftpAck; + ENMTFTPSESSIONFMT enmTftpFmt; + TFPTPSESSIONOPTDESC OptionBlkSize; + TFPTPSESSIONOPTDESC OptionTSize; + TFPTPSESSIONOPTDESC OptionTimeout; + + const char *pcszFilenameHost; + char szFilename[TFTP_FILENAME_MAX]; +} TFTPSESSION, *PTFTPSESSION, **PPTFTPSESSION; + +#pragma pack(1) +typedef struct TFTPCOREHDR +{ + uint16_t u16TftpOpCode; + /* Data lays here (might be raw uint8_t* or header of payload ) */ +} TFTPCOREHDR, *PTFTPCOREHDR; + +typedef struct TFTPIPHDR +{ + struct ip IPv4Hdr; + struct udphdr UdpHdr; + uint16_t u16TftpOpType; + TFTPCOREHDR Core; + /* Data lays here */ +} TFTPIPHDR, *PTFTPIPHDR; +#pragma pack() + +typedef const PTFTPIPHDR PCTFTPIPHDR; + +typedef const PTFTPSESSION PCTFTPSESSION; + + +typedef struct TFTPOPTIONDESC +{ + const char *pszName; + ENMTFTPSESSIONFMT enmType; + int cbName; + bool fHasValue; +} TFTPOPTIONDESC, *PTFTPOPTIONDESC; + +typedef const PTFTPOPTIONDESC PCTFTPOPTIONDESC; +static TFTPOPTIONDESC g_TftpTransferFmtDesc[] = +{ + {"octet", TFTPFMT_OCTET, 5, false}, /* RFC1350 */ + {"netascii", TFTPFMT_NETASCII, 8, false}, /* RFC1350 */ + {"mail", TFTPFMT_MAIL, 4, false}, /* RFC1350 */ +}; + +static TFTPOPTIONDESC g_TftpDesc[] = +{ + {"blksize", TFTPFMT_NOT_FMT, 7, true}, /* RFC2348 */ + {"timeout", TFTPFMT_NOT_FMT, 7, true}, /* RFC2349 */ + {"tsize", TFTPFMT_NOT_FMT, 5, true}, /* RFC2349 */ + {"size", TFTPFMT_NOT_FMT, 4, true}, /* RFC2349 */ +}; + + +DECLINLINE(struct mbuf *) slirpTftpMbufAlloc(PNATState pData) +{ + struct mbuf *m = slirpServiceMbufAlloc(pData, CTL_TFTP); + if (RT_UNLIKELY(m == NULL)) + LogFlowFunc(("LEAVE: Can't allocate mbuf\n")); + return m; +} + + +/** + * This function resolves file name relative to tftp prefix. + * @param pData + * @param pTftpSession + */ +DECLINLINE(int) tftpSecurityFilenameCheck(PNATState pData, PTFTPSESSION pTftpSession) +{ + int rc = VERR_FILE_NOT_FOUND; /* guilty until proved innocent */ + + AssertPtrReturn(pTftpSession, VERR_INVALID_PARAMETER); + AssertReturn(pTftpSession->pcszFilenameHost == NULL, VERR_INVALID_PARAMETER); + + /* prefix must be set to an absolute pathname. assert? */ + if (tftp_prefix == NULL || RTPathSkipRootSpec(tftp_prefix) == tftp_prefix) + goto done; + + /* replace backslashes with forward slashes */ + char *s = pTftpSession->szFilename; + while ((s = strchr(s, '\\')) != NULL) + *s++ = '/'; + + /* deny dot-dot by itself or at the beginning */ + if ( pTftpSession->szFilename[0] == '.' + && pTftpSession->szFilename[1] == '.' + && ( pTftpSession->szFilename[2] == '\0' + || pTftpSession->szFilename[2] == '/')) + goto done; + + /* deny dot-dot in the middle */ + if (RTStrStr(pTftpSession->szFilename, "/../") != NULL) + goto done; + + /* deny dot-dot at the end (there's no RTStrEndsWith) */ + const char *dotdot = RTStrStr(pTftpSession->szFilename, "/.."); + if (dotdot != NULL && dotdot[3] == '\0') + goto done; + + char *pszPathHostAbs; + int cbLen = RTStrAPrintf(&pszPathHostAbs, "%s/%s", + tftp_prefix, pTftpSession->szFilename); + if (cbLen == -1) + goto done; + + LogRel2(("NAT: TFTP: %s\n", pszPathHostAbs)); + pTftpSession->pcszFilenameHost = pszPathHostAbs; + rc = VINF_SUCCESS; + + done: + LogFlowFuncLeaveRC(rc); + return rc; +} + +/* + * This function returns index of option descriptor in passed descriptor array + * @param piIdxOpt returned index value + * @param paTftpDesc array of known Tftp descriptors + * @param caTftpDesc size of array of tftp descriptors + * @param pszOpt name of option + */ +DECLINLINE(int) tftpFindDesciptorIndexByName(int *piIdxOpt, PCTFTPOPTIONDESC paTftpDesc, int caTftpDesc, const char *pszOptName) +{ + int rc = VINF_SUCCESS; + int idxOption = 0; + AssertReturn(piIdxOpt, VERR_INVALID_PARAMETER); + AssertReturn(paTftpDesc, VERR_INVALID_PARAMETER); + AssertReturn(pszOptName, VERR_INVALID_PARAMETER); + for (idxOption = 0; idxOption < caTftpDesc; ++idxOption) + { + if (!RTStrNICmp(pszOptName, paTftpDesc[idxOption].pszName, 10)) + { + *piIdxOpt = idxOption; + return rc; + } + } + rc = VERR_NOT_FOUND; + return rc; +} + +/** + * Helper function to look for index of descriptor in transfer format descriptors + * @param piIdxOpt returned value of index + * @param pszOpt name of option + */ +DECLINLINE(int) tftpFindTransferFormatIdxbyName(int *piIdxOpt, const char *pszOpt) +{ + return tftpFindDesciptorIndexByName(piIdxOpt, &g_TftpTransferFmtDesc[0], RT_ELEMENTS(g_TftpTransferFmtDesc), pszOpt); +} + +/** + * Helper function to look for index of descriptor in options descriptors + * @param piIdxOpt returned value of index + * @param pszOpt name of option + */ +DECLINLINE(int) tftpFindOptionIdxbyName(int *piIdxOpt, const char *pszOpt) +{ + return tftpFindDesciptorIndexByName(piIdxOpt, &g_TftpDesc[0], RT_ELEMENTS(g_TftpDesc), pszOpt); +} + + +#if 0 /* unused */ +DECLINLINE(bool) tftpIsAcceptableOption(const char *pszOptionName) +{ + int idxOptDesc = 0; + AssertPtrReturn(pszOptionName, false); + AssertReturn(RTStrNLen(pszOptionName,10) >= 4, false); + AssertReturn(RTStrNLen(pszOptionName,10) < 8, false); + for(idxOptDesc = 0; idxOptDesc < RT_ELEMENTS(g_TftpTransferFmtDesc); ++idxOptDesc) + { + if (!RTStrNICmp(pszOptionName, g_TftpTransferFmtDesc[idxOptDesc].pszName, 10)) + return true; + } + for(idxOptDesc = 0; idxOptDesc < RT_ELEMENTS(g_TftpDesc); ++idxOptDesc) + { + if (!RTStrNICmp(pszOptionName, g_TftpDesc[idxOptDesc].pszName, 10)) + return true; + } + return false; +} +#endif /* unused */ + + +/** + * This helper function that validate if client want to operate in supported by server mode. + * @param pcTftpHeader comulative header (IP, UDP, TFTP) + * @param pcu8Options pointer to the options supposing that pointer points at the mode option + * @param cbOptions size of the options buffer + */ +DECLINLINE(int) tftpIsSupportedTransferMode(PCTFTPSESSION pcTftpSession) +{ + AssertPtrReturn(pcTftpSession, 0); + return (pcTftpSession->enmTftpFmt == TFTPFMT_OCTET); +} + + +DECLINLINE(void) tftpSessionUpdate(PNATState pData, PTFTPSESSION pTftpSession) +{ + pTftpSession->iTimestamp = curtime; + pTftpSession->fInUse = 1; +} + +DECLINLINE(void) tftpSessionTerminate(PTFTPSESSION pTftpSession) +{ + if (pTftpSession->pcszFilenameHost != NULL) + { + RTStrFree((char *)pTftpSession->pcszFilenameHost); + pTftpSession->pcszFilenameHost = NULL; + } + + pTftpSession->fInUse = 0; +} + +DECLINLINE(int) tftpSessionParseAndMarkOption(const char *pcszRawOption, PTFPTPSESSIONOPTDESC pTftpSessionOption) +{ + int rc = VINF_SUCCESS; + rc = RTStrToInt64Full(pcszRawOption, 0, (int64_t *)&pTftpSessionOption->u64Value); + AssertRCReturn(rc, rc); + pTftpSessionOption->fRequested = 1; + return rc; +} + +DECLINLINE(int) tftpSessionOptionParse(PTFTPSESSION pTftpSession, PCTFTPIPHDR pcTftpIpHeader) +{ + int rc = VINF_SUCCESS; + char *pszTftpRRQRaw; + size_t idxTftpRRQRaw = 0; + ssize_t cbTftpRRQRaw = 0; + int fWithArg = 0; + int idxOptionArg = 0; + + AssertPtrReturn(pTftpSession, VERR_INVALID_PARAMETER); + AssertPtrReturn(pcTftpIpHeader, VERR_INVALID_PARAMETER); + AssertReturn(RT_N2H_U16(pcTftpIpHeader->u16TftpOpType) == TFTP_RRQ, VERR_INVALID_PARAMETER); + LogFlowFunc(("pTftpSession:%p, pcTftpIpHeader:%p\n", pTftpSession, pcTftpIpHeader)); + + pszTftpRRQRaw = (char *)&pcTftpIpHeader->Core; + cbTftpRRQRaw = RT_H2N_U16(pcTftpIpHeader->UdpHdr.uh_ulen) + sizeof(struct ip) - RT_UOFFSETOF(TFTPIPHDR, Core); + while (cbTftpRRQRaw) + { + rc = RTStrNLenEx(pszTftpRRQRaw, cbTftpRRQRaw, &idxTftpRRQRaw); + if (RT_SUCCESS(rc)) + ++idxTftpRRQRaw; /* count the NUL too */ + else + break; + + if (RTStrNLen(pTftpSession->szFilename, TFTP_FILENAME_MAX) == 0) + { + rc = RTStrCopy(pTftpSession->szFilename, TFTP_FILENAME_MAX, pszTftpRRQRaw); + if (RT_FAILURE(rc)) + { + LogFlowFuncLeaveRC(rc); + AssertRCReturn(rc,rc); + } + } + else if (pTftpSession->enmTftpFmt == TFTPFMT_NONE) + { + int idxFmt = 0; + rc = tftpFindTransferFormatIdxbyName(&idxFmt, pszTftpRRQRaw); + if (RT_FAILURE(rc)) + { + LogFlowFuncLeaveRC(VERR_INTERNAL_ERROR); + return VERR_INTERNAL_ERROR; + } + AssertReturn( g_TftpTransferFmtDesc[idxFmt].enmType != TFTPFMT_NONE + && g_TftpTransferFmtDesc[idxFmt].enmType != TFTPFMT_NOT_FMT, VERR_INTERNAL_ERROR); + pTftpSession->enmTftpFmt = g_TftpTransferFmtDesc[idxFmt].enmType; + } + else if (fWithArg) + { + if (!RTStrICmp("blksize", g_TftpDesc[idxOptionArg].pszName)) + rc = tftpSessionParseAndMarkOption(pszTftpRRQRaw, &pTftpSession->OptionBlkSize); + + if ( RT_SUCCESS(rc) + && !RTStrICmp("tsize", g_TftpDesc[idxOptionArg].pszName)) + rc = tftpSessionParseAndMarkOption(pszTftpRRQRaw, &pTftpSession->OptionTSize); + + /** @todo we don't use timeout, but its value in the range 0-255 */ + if ( RT_SUCCESS(rc) + && !RTStrICmp("timeout", g_TftpDesc[idxOptionArg].pszName)) + rc = tftpSessionParseAndMarkOption(pszTftpRRQRaw, &pTftpSession->OptionTimeout); + + /** @todo unknown option detection */ + if (RT_FAILURE(rc)) + { + LogFlowFuncLeaveRC(rc); + AssertRCReturn(rc,rc); + } + fWithArg = 0; + idxOptionArg = 0; + } + else + { + rc = tftpFindOptionIdxbyName(&idxOptionArg, pszTftpRRQRaw); + if (RT_SUCCESS(rc)) + fWithArg = 1; + else + { + LogFlowFuncLeaveRC(rc); + AssertRCReturn(rc,rc); + } + } + pszTftpRRQRaw += idxTftpRRQRaw; + cbTftpRRQRaw -= idxTftpRRQRaw; + } + + LogFlowFuncLeaveRC(rc); + return rc; +} + +static int tftpAllocateSession(PNATState pData, PCTFTPIPHDR pcTftpIpHeader, PPTFTPSESSION ppTftpSession) +{ + PTFTPSESSION pTftpSession = NULL; + int rc = VINF_SUCCESS; + int idxSession; + AssertPtrReturn(pData, VERR_INVALID_PARAMETER); + AssertPtrReturn(pcTftpIpHeader, VERR_INVALID_PARAMETER); + AssertPtrReturn(ppTftpSession, VERR_INVALID_PARAMETER); + + for (idxSession = 0; idxSession < TFTP_SESSIONS_MAX; idxSession++) + { + pTftpSession = &((PTFTPSESSION)pData->pvTftpSessions)[idxSession]; + + if (!pTftpSession->fInUse) + goto found; + + /* sessions time out after 5 inactive seconds */ + if ((int)(curtime - pTftpSession->iTimestamp) > 5000) + goto found; + } + + return VERR_NOT_FOUND; + + found: + if (pTftpSession->pcszFilenameHost != NULL) + { + RTStrFree((char *)pTftpSession->pcszFilenameHost); + // pTftpSession->pcszFilenameHost = NULL; /* will be zeroed out below */ + } + RT_ZERO(*pTftpSession); + + memcpy(&pTftpSession->IpClientAddress, &pcTftpIpHeader->IPv4Hdr.ip_src, sizeof(pTftpSession->IpClientAddress)); + pTftpSession->u16ClientPort = pcTftpIpHeader->UdpHdr.uh_sport; + rc = tftpSessionOptionParse(pTftpSession, pcTftpIpHeader); + AssertRCReturn(rc, VERR_INTERNAL_ERROR); + *ppTftpSession = pTftpSession; + + LogRel(("NAT: TFTP RRQ %s", pTftpSession->szFilename)); + const char *pszPrefix = " "; + if (pTftpSession->OptionBlkSize.fRequested) + { + LogRel(("%s" "blksize=%RU64", pszPrefix, pTftpSession->OptionBlkSize.u64Value)); + pszPrefix = ", "; + } + if (pTftpSession->OptionTSize.fRequested) + { + LogRel(("%s" "tsize=%RU64", pszPrefix, pTftpSession->OptionTSize.u64Value)); + pszPrefix = ", "; + } + if (pTftpSession->OptionTimeout.fRequested) + { + LogRel(("%s" "timeout=%RU64", pszPrefix, pTftpSession->OptionTimeout.u64Value)); + pszPrefix = ", "; + } + LogRel(("\n")); + + tftpSessionUpdate(pData, pTftpSession); + + return VINF_SUCCESS; +} + +static int tftpSessionFind(PNATState pData, PCTFTPIPHDR pcTftpIpHeader, PPTFTPSESSION ppTftpSessions) +{ + PTFTPSESSION pTftpSession; + int idxTftpSession; + AssertPtrReturn(pData, VERR_INVALID_PARAMETER); + AssertPtrReturn(pcTftpIpHeader, VERR_INVALID_PARAMETER); + AssertPtrReturn(ppTftpSessions, VERR_INVALID_PARAMETER); + + for (idxTftpSession = 0; idxTftpSession < TFTP_SESSIONS_MAX; idxTftpSession++) + { + pTftpSession = &((PTFTPSESSION)pData->pvTftpSessions)[idxTftpSession]; + + if (pTftpSession->fInUse) + { + if (!memcmp(&pTftpSession->IpClientAddress, &pcTftpIpHeader->IPv4Hdr.ip_src, sizeof(pTftpSession->IpClientAddress))) + { + if (pTftpSession->u16ClientPort == pcTftpIpHeader->UdpHdr.uh_sport) + { + *ppTftpSessions = pTftpSession; + return VINF_SUCCESS; + } + } + } + } + + return VERR_NOT_FOUND; +} + +DECLINLINE(int) pftpSessionOpenFile(PTFTPSESSION pTftpSession, PRTFILE pSessionFile) +{ + int rc; + LogFlowFuncEnter(); + + if (pTftpSession->pcszFilenameHost == NULL) + { + rc = VERR_FILE_NOT_FOUND; + } + else + { + rc = RTFileOpen(pSessionFile, pTftpSession->pcszFilenameHost, + RTFILE_O_OPEN | RTFILE_O_READ | RTFILE_O_DENY_WRITE); + if (RT_FAILURE(rc)) + rc = VERR_FILE_NOT_FOUND; + } + + LogFlowFuncLeaveRC(rc); + return rc; +} + +DECLINLINE(int) tftpSessionEvaluateOptions(PTFTPSESSION pTftpSession) +{ + int rc; + RTFILE hSessionFile; + uint64_t cbSessionFile = 0; + int cOptions; + LogFlowFunc(("pTftpSession:%p\n", pTftpSession)); + + rc = pftpSessionOpenFile(pTftpSession, &hSessionFile); + if (RT_FAILURE(rc)) + { + LogFlowFuncLeaveRC(rc); + return rc; + } + + rc = RTFileQuerySize(hSessionFile, &cbSessionFile); + RTFileClose(hSessionFile); + if (RT_FAILURE(rc)) + { + LogFlowFuncLeaveRC(rc); + return rc; + } + + cOptions = 0; + + if (pTftpSession->OptionTSize.fRequested) + { + pTftpSession->OptionTSize.u64Value = cbSessionFile; + ++cOptions; + } + + if (pTftpSession->OptionBlkSize.fRequested) + { + if (pTftpSession->OptionBlkSize.u64Value < 8) + { + /* + * we cannot make a counter-offer larger than the client's + * value, so just pretend we didn't recognize it and use + * default block size + */ + pTftpSession->OptionBlkSize.fRequested = 0; + pTftpSession->OptionBlkSize.u64Value = 512; + } + else if (pTftpSession->OptionBlkSize.u64Value > 1428) + { + pTftpSession->OptionBlkSize.u64Value = 1428; + ++cOptions; + } + } + else + { + pTftpSession->OptionBlkSize.u64Value = 512; + } + + rc = cOptions > 0 ? VINF_SUCCESS : VWRN_NOT_FOUND; + LogFlowFuncLeaveRC(rc); + return rc; +} + +DECLINLINE(int) tftpSend(PNATState pData, + PTFTPSESSION pTftpSession, + struct mbuf *pMBuf, + PCTFTPIPHDR pcTftpIpHeaderRecv) +{ + struct sockaddr_in saddr, daddr; + int error, rc; + + LogFlowFunc(("pMBuf:%p, pcTftpIpHeaderRecv:%p\n", pMBuf, pcTftpIpHeaderRecv)); + saddr.sin_addr = pcTftpIpHeaderRecv->IPv4Hdr.ip_dst; + saddr.sin_port = pcTftpIpHeaderRecv->UdpHdr.uh_dport; + + daddr.sin_addr = pTftpSession->IpClientAddress; + daddr.sin_port = pTftpSession->u16ClientPort; + + + pMBuf->m_data += sizeof(struct udpiphdr); + pMBuf->m_len -= sizeof(struct udpiphdr); + + error = udp_output2(pData, NULL, pMBuf, &saddr, &daddr, IPTOS_LOWDELAY); + rc = error ? VERR_GENERAL_FAILURE : VINF_SUCCESS; + + LogFlowFuncLeaveRC(rc); + return rc; +} + + +DECLINLINE(int) tftpSendError(PNATState pData, PTFTPSESSION pTftpSession, uint16_t errorcode, + const char *msg, PCTFTPIPHDR pcTftpIpHeaderRecv); /* gee wiz */ + +DECLINLINE(int) tftpReadDataBlock(PNATState pData, + PTFTPSESSION pcTftpSession, + uint8_t *pu8Data, + int *pcbReadData) +{ + RTFILE hSessionFile; + int rc = VINF_SUCCESS; + uint16_t u16BlkSize = 0; + AssertPtrReturn(pData, VERR_INVALID_PARAMETER); + AssertPtrReturn(pcTftpSession, VERR_INVALID_PARAMETER); + AssertPtrReturn(pu8Data, VERR_INVALID_PARAMETER); + AssertPtrReturn(pcbReadData, VERR_INVALID_PARAMETER); + LogFlowFunc(("pcTftpSession:%p, pu8Data:%p, pcbReadData:%p\n", + pcTftpSession, + pu8Data, + pcbReadData)); + + u16BlkSize = (uint16_t)pcTftpSession->OptionBlkSize.u64Value; + rc = pftpSessionOpenFile(pcTftpSession, &hSessionFile); + if (RT_FAILURE(rc)) + { + LogFlowFuncLeaveRC(rc); + return rc; + } + + if (pcbReadData) + { + size_t cbRead; + + rc = RTFileSeek(hSessionFile, + pcTftpSession->cbTransfered, + RTFILE_SEEK_BEGIN, + NULL); + if (RT_FAILURE(rc)) + { + RTFileClose(hSessionFile); + LogFlowFuncLeaveRC(rc); + return rc; + } + rc = RTFileRead(hSessionFile, pu8Data, u16BlkSize, &cbRead); + if (RT_FAILURE(rc)) + { + RTFileClose(hSessionFile); + LogFlowFuncLeaveRC(rc); + return rc; + } + *pcbReadData = (int)cbRead; + } + + rc = RTFileClose(hSessionFile); + + LogFlowFuncLeaveRC(rc); + return rc; +} + +DECLINLINE(int) tftpAddOptionToOACK(PNATState pData, struct mbuf *pMBuf, const char *pszOptName, uint64_t u64OptValue) +{ + char szOptionBuffer[256]; + size_t iOptLength; + int rc = VINF_SUCCESS; + int cbMBufCurrent = pMBuf->m_len; + LogFlowFunc(("pMBuf:%p, pszOptName:%s, u16OptValue:%ld\n", pMBuf, pszOptName, u64OptValue)); + AssertPtrReturn(pMBuf, VERR_INVALID_PARAMETER); + AssertPtrReturn(pszOptName, VERR_INVALID_PARAMETER); + + RT_ZERO(szOptionBuffer); + iOptLength = RTStrPrintf(szOptionBuffer, 256 , "%s", pszOptName) + 1; + iOptLength += RTStrPrintf(szOptionBuffer + iOptLength, 256 - iOptLength , "%llu", u64OptValue) + 1; + if (iOptLength > M_TRAILINGSPACE(pMBuf)) + rc = VERR_BUFFER_OVERFLOW; /* buffer too small */ + else + { + pMBuf->m_len += (int)iOptLength; + m_copyback(pData, pMBuf, cbMBufCurrent, (int)iOptLength, szOptionBuffer); + } + LogFlowFuncLeaveRC(rc); + return rc; +} + + +DECLINLINE(int) tftpSendOACK(PNATState pData, + PTFTPSESSION pTftpSession, + PCTFTPIPHDR pcTftpIpHeaderRecv) +{ + struct mbuf *m; + PTFTPIPHDR pTftpIpHeader; + int rc; + + rc = tftpSessionEvaluateOptions(pTftpSession); + if (RT_FAILURE(rc)) + { + tftpSendError(pData, pTftpSession, TFTP_EACCESS, "Option negotiation failure (file not found or inaccessible?)", pcTftpIpHeaderRecv); + LogFlowFuncLeave(); + return rc; + } + + if (rc == VWRN_NOT_FOUND) + return rc; + + m = slirpTftpMbufAlloc(pData); + if (m == NULL) + { + tftpSessionTerminate(pTftpSession); + return VERR_NO_MEMORY; + } + + m->m_data += if_maxlinkhdr; + m->m_pkthdr.header = mtod(m, void *); + pTftpIpHeader = mtod(m, PTFTPIPHDR); + m->m_len = sizeof(TFTPIPHDR) - sizeof(uint16_t); /* no u16TftpOpCode */ + + pTftpIpHeader->u16TftpOpType = RT_H2N_U16_C(TFTP_OACK); + + if (pTftpSession->OptionBlkSize.fRequested) + rc = tftpAddOptionToOACK(pData, m, "blksize", pTftpSession->OptionBlkSize.u64Value); + + if ( RT_SUCCESS(rc) + && pTftpSession->OptionTSize.fRequested) + rc = tftpAddOptionToOACK(pData, m, "tsize", pTftpSession->OptionTSize.u64Value); + + rc = tftpSend(pData, pTftpSession, m, pcTftpIpHeaderRecv); + if (RT_FAILURE(rc)) + tftpSessionTerminate(pTftpSession); + + return rc; +} + + +DECLINLINE(int) tftpSendError(PNATState pData, + PTFTPSESSION pTftpSession, + uint16_t errorcode, + const char *msg, + PCTFTPIPHDR pcTftpIpHeaderRecv) +{ + struct mbuf *m = NULL; + + LogFlowFunc(("ENTER: errorcode: %RX16, msg: %s\n", errorcode, msg)); + m = slirpTftpMbufAlloc(pData); + if (m != NULL) + { + u_int cbMsg = (u_int)strlen(msg) + 1; /* ending zero */ + PTFTPIPHDR pTftpIpHeader; + + m->m_data += if_maxlinkhdr; + m->m_len = sizeof(TFTPIPHDR) + cbMsg; + m->m_pkthdr.header = mtod(m, void *); + pTftpIpHeader = mtod(m, PTFTPIPHDR); + + pTftpIpHeader->u16TftpOpType = RT_H2N_U16_C(TFTP_ERROR); + pTftpIpHeader->Core.u16TftpOpCode = RT_H2N_U16(errorcode); + + m_copyback(pData, m, sizeof(TFTPIPHDR), cbMsg, (c_caddr_t)msg); + + tftpSend(pData, pTftpSession, m, pcTftpIpHeaderRecv); + } + + tftpSessionTerminate(pTftpSession); + + LogFlowFuncLeave(); + return 0; +} + + +static int tftpSendData(PNATState pData, + PTFTPSESSION pTftpSession, + uint16_t u16Block, + PCTFTPIPHDR pcTftpIpHeaderRecv) +{ + struct mbuf *m; + PTFTPIPHDR pTftpIpHeader; + int cbRead = 0; + int rc = VINF_SUCCESS; + + if (u16Block == pTftpSession->cTftpAck) + pTftpSession->cTftpAck++; + else + { + tftpSendError(pData, pTftpSession, TFTP_EEXIST, "ACK is wrong", pcTftpIpHeaderRecv); + return -1; + } + + m = slirpTftpMbufAlloc(pData); + if (!m) + return -1; + + m->m_data += if_maxlinkhdr; + m->m_pkthdr.header = mtod(m, void *); + pTftpIpHeader = mtod(m, PTFTPIPHDR); + m->m_len = sizeof(TFTPIPHDR); + + pTftpIpHeader->u16TftpOpType = RT_H2N_U16_C(TFTP_DATA); + pTftpIpHeader->Core.u16TftpOpCode = RT_H2N_U16(pTftpSession->cTftpAck); + + if (RT_LIKELY(M_TRAILINGSPACE(m) >= pTftpSession->OptionBlkSize.u64Value)) + { + uint8_t *pu8Data = (uint8_t *)&pTftpIpHeader->Core.u16TftpOpCode + sizeof(uint16_t); + rc = tftpReadDataBlock(pData, pTftpSession, pu8Data, &cbRead); + } + else + rc = VERR_BUFFER_OVERFLOW; + + if (RT_SUCCESS(rc)) + { + pTftpSession->cbTransfered += cbRead; + m->m_len += cbRead; + tftpSend(pData, pTftpSession, m, pcTftpIpHeaderRecv); + if (cbRead > 0) + tftpSessionUpdate(pData, pTftpSession); + else + tftpSessionTerminate(pTftpSession); + } + else + { + m_freem(pData, m); + tftpSendError(pData, pTftpSession, TFTP_ENOENT, "File not found", pcTftpIpHeaderRecv); + /* send "file not found" error back */ + return -1; + } + + return 0; +} + +DECLINLINE(void) tftpProcessRRQ(PNATState pData, PCTFTPIPHDR pTftpIpHeader, int pktlen) +{ + PTFTPSESSION pTftpSession = NULL; + uint8_t *pu8Payload = NULL; + int cbPayload = 0; + size_t cbFileName = 0; + int rc = VINF_SUCCESS; + + AssertPtrReturnVoid(pTftpIpHeader); + AssertPtrReturnVoid(pData); + AssertReturnVoid(pktlen > sizeof(TFTPIPHDR)); + LogFlowFunc(("ENTER: pTftpIpHeader:%p, pktlen:%d\n", pTftpIpHeader, pktlen)); + + rc = tftpAllocateSession(pData, pTftpIpHeader, &pTftpSession); + if ( RT_FAILURE(rc) + || pTftpSession == NULL) + { + LogFlowFuncLeave(); + return; + } + + pu8Payload = (uint8_t *)&pTftpIpHeader->Core; + cbPayload = pktlen - sizeof(TFTPIPHDR); + + cbFileName = RTStrNLen((char *)pu8Payload, cbPayload); + /* We assume that file name should finish with '\0' and shouldn't bigger + * than buffer for name storage. + */ + AssertReturnVoid( (ssize_t)cbFileName < cbPayload + && cbFileName < TFTP_FILENAME_MAX /* current limit in tftp session handle */ + && cbFileName); + + /* Dont't bother with rest processing in case of invalid access */ + if (RT_FAILURE(tftpSecurityFilenameCheck(pData, pTftpSession))) + { + tftpSendError(pData, pTftpSession, TFTP_EACCESS, "Access violation", pTftpIpHeader); + LogFlowFuncLeave(); + return; + } + + + + if (RT_UNLIKELY(!tftpIsSupportedTransferMode(pTftpSession))) + { + tftpSendError(pData, pTftpSession, TFTP_ENOSYS, "Unsupported transfer mode", pTftpIpHeader); + LogFlowFuncLeave(); + return; + } + + + rc = tftpSendOACK(pData, pTftpSession, pTftpIpHeader); + if (rc == VWRN_NOT_FOUND) + rc = tftpSendData(pData, pTftpSession, 0, pTftpIpHeader); + + LogFlowFuncLeave(); + return; +} + +static void tftpProcessACK(PNATState pData, PTFTPIPHDR pTftpIpHeader) +{ + int rc; + PTFTPSESSION pTftpSession = NULL; + + rc = tftpSessionFind(pData, pTftpIpHeader, &pTftpSession); + if (RT_FAILURE(rc)) + return; + + if (tftpSendData(pData, pTftpSession, + RT_N2H_U16(pTftpIpHeader->Core.u16TftpOpCode), + pTftpIpHeader)) + LogRel(("NAT: TFTP send failed\n")); +} + +int slirpTftpInit(PNATState pData) +{ + AssertPtrReturn(pData, VERR_INVALID_PARAMETER); + pData->pvTftpSessions = RTMemAllocZ(sizeof(TFTPSESSION) * TFTP_SESSIONS_MAX); + AssertPtrReturn(pData->pvTftpSessions, VERR_NO_MEMORY); + return VINF_SUCCESS; +} + +void slirpTftpTerm(PNATState pData) +{ + RTMemFree(pData->pvTftpSessions); +} + +int slirpTftpInput(PNATState pData, struct mbuf *pMbuf) +{ + PTFTPIPHDR pTftpIpHeader = NULL; + AssertPtr(pData); + AssertPtr(pMbuf); + pTftpIpHeader = mtod(pMbuf, PTFTPIPHDR); + + switch(RT_N2H_U16(pTftpIpHeader->u16TftpOpType)) + { + case TFTP_RRQ: + tftpProcessRRQ(pData, pTftpIpHeader, m_length(pMbuf, NULL)); + break; + + case TFTP_ACK: + tftpProcessACK(pData, pTftpIpHeader); + break; + + case TFTP_ERROR: + { + PTFTPSESSION pTftpSession; + int rc = tftpSessionFind(pData, pTftpIpHeader, &pTftpSession); + if (RT_SUCCESS(rc)) + tftpSessionTerminate(pTftpSession); + } + + default:; + } + LogFlowFuncLeaveRC(VINF_SUCCESS); + return VINF_SUCCESS; +} diff --git a/src/VBox/Devices/Network/slirp/tftp.h b/src/VBox/Devices/Network/slirp/tftp.h new file mode 100644 index 00000000..5c84bd58 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/tftp.h @@ -0,0 +1,63 @@ +/* $Id: tftp.h $ */ +/** @file + * NAT - TFTP server (declarations/defines). + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* tftp defines */ + +#ifndef _SLIRP_TFTP_H_ +#define _SLIRP_TFTP_H_ + +#define TFTP_SESSIONS_MAX 3 + +#define TFTP_SERVER 69 + +#define TFTP_RRQ 1 +#define TFTP_WRQ 2 +#define TFTP_DATA 3 +#define TFTP_ACK 4 +#define TFTP_ERROR 5 +#define TFTP_OACK 6 + +/* error codes */ +#define TFTP_EUNDEF 0 /* Not defined, see error message (if any). */ +#define TFTP_ENOENT 1 /* File not found. */ +#define TFTP_EACCESS 2 /* Access violation. */ +#define TFTP_EFBIG 3 /* Disk full or allocation exceeded. */ +#define TFTP_ENOSYS 4 /* Illegal TFTP operation. */ +#define TFTP_ESRCH 5 /* Unknown transfer ID. */ +#define TFTP_EEXIST 6 /* File already exists. */ +#define TFTP_EUSER 7 /* No such user. */ +/* RFC 2347 */ +#define TFTP_EONAK 8 /* Option refused. */ + + +#define TFTP_FILENAME_MAX 512 + + +int slirpTftpInput(PNATState pData, struct mbuf *m); +int slirpTftpInit(PNATState pData); +void slirpTftpTerm(PNATState pData); +#endif diff --git a/src/VBox/Devices/Network/slirp/udp.c b/src/VBox/Devices/Network/slirp/udp.c new file mode 100644 index 00000000..ebd9c20e --- /dev/null +++ b/src/VBox/Devices/Network/slirp/udp.c @@ -0,0 +1,685 @@ +/* $Id: udp.c $ */ +/** @file + * NAT - UDP protocol. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94 + * udp_usrreq.c,v 1.4 1994/10/02 17:48:45 phk Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> +#include "ip_icmp.h" + + +/* + * UDP protocol implementation. + * Per RFC 768, August, 1980. + */ +#define udpcksum 1 + +void +udp_init(PNATState pData) +{ + udp_last_so = &udb; + udb.so_next = udb.so_prev = &udb; +} + +/* m->m_data points at ip packet header + * m->m_len length ip packet + * ip->ip_len length data (IPDU) + */ +void +udp_input(PNATState pData, register struct mbuf *m, int iphlen) +{ + register struct ip *ip; + register struct udphdr *uh; + int len; + struct ip save_ip; + struct socket *so; + int ret; + int ttl, tos; + + LogFlowFunc(("ENTER: m = %p, iphlen = %d\n", m, iphlen)); + ip = mtod(m, struct ip *); + Log2(("%RTnaipv4 iphlen = %d\n", ip->ip_dst, iphlen)); + + udpstat.udps_ipackets++; + + /* + * Strip IP options, if any; should skip this, + * make available to user, and use on returned packets, + * but we don't yet have a way to check the checksum + * with options still present. + */ + if (iphlen > sizeof(struct ip)) + { + ip_stripoptions(m, (struct mbuf *)0); + iphlen = sizeof(struct ip); + } + + /* + * Get IP and UDP header together in first mbuf. + */ + ip = mtod(m, struct ip *); + uh = (struct udphdr *)((caddr_t)ip + iphlen); + + /* + * Make mbuf data length reflect UDP length. + * If not enough data to reflect UDP length, drop. + */ + len = RT_N2H_U16((u_int16_t)uh->uh_ulen); + Assert(ip->ip_len + iphlen == (ssize_t)m_length(m, NULL)); + + if (ip->ip_len != len) + { + if (len > ip->ip_len) + { + udpstat.udps_badlen++; + Log3(("NAT: IP(id: %hd) has bad size\n", ip->ip_id)); + goto bad_free_mbuf; + } + m_adj(m, len - ip->ip_len); + ip->ip_len = len; + } + + /* + * Save a copy of the IP header in case we want restore it + * for sending an ICMP error message in response. + */ + save_ip = *ip; + save_ip.ip_len+= iphlen; /* tcp_input subtracts this */ + + /* + * Checksum extended UDP header and data. + */ + if (udpcksum && uh->uh_sum) + { + memset(((struct ipovly *)ip)->ih_x1, 0, 9); + ((struct ipovly *)ip)->ih_len = uh->uh_ulen; +#if 0 + /* keep uh_sum for ICMP reply */ + uh->uh_sum = cksum(m, len + sizeof (struct ip)); + if (uh->uh_sum) + { + +#endif + if (cksum(m, len + iphlen)) + { + udpstat.udps_badsum++; + Log3(("NAT: IP(id: %hd) has bad (udp) cksum\n", ip->ip_id)); + goto bad_free_mbuf; + } + } +#if 0 + } +#endif + + /* + * handle DHCP/BOOTP + */ + if (uh->uh_dport == RT_H2N_U16_C(BOOTP_SERVER)) + { + bootp_input(pData, m); + goto done_free_mbuf; + } + + LogFunc(("uh src: %RTnaipv4:%d, dst: %RTnaipv4:%d\n", + ip->ip_src.s_addr, RT_N2H_U16(uh->uh_sport), + ip->ip_dst.s_addr, RT_N2H_U16(uh->uh_dport))); + + /* + * handle DNS host resolver without creating a socket + */ + if ( pData->fUseHostResolver + && uh->uh_dport == RT_H2N_U16_C(53) + && CTL_CHECK(ip->ip_dst.s_addr, CTL_DNS)) + { + struct sockaddr_in dst, src; + + src.sin_addr.s_addr = ip->ip_dst.s_addr; + src.sin_port = uh->uh_dport; + dst.sin_addr.s_addr = ip->ip_src.s_addr; + dst.sin_port = uh->uh_sport; + + m_adj(m, sizeof(struct udpiphdr)); + + m = hostresolver(pData, m, ip->ip_src.s_addr, uh->uh_sport); + if (m == NULL) + goto done_free_mbuf; + + slirpMbufTagService(pData, m, CTL_DNS); + + udp_output2(pData, NULL, m, &src, &dst, IPTOS_LOWDELAY); + LogFlowFuncLeave(); + return; + } + + /* + * handle TFTP + */ + if ( uh->uh_dport == RT_H2N_U16_C(TFTP_SERVER) + && CTL_CHECK(ip->ip_dst.s_addr, CTL_TFTP)) + { + if (pData->pvTftpSessions) + slirpTftpInput(pData, m); + goto done_free_mbuf; + } + + /* + * XXX: DNS proxy currently relies on the fact that each socket + * only serves one request. + */ + if ( pData->fUseDnsProxy + && CTL_CHECK(ip->ip_dst.s_addr, CTL_DNS) + && (uh->uh_dport == RT_H2N_U16_C(53))) + { + so = NULL; + goto new_socket; + } + + /* + * Drop UDP packets destind for CTL_ALIAS (i.e. the hosts loopback interface) + * if it is disabled. + */ + if ( CTL_CHECK(ip->ip_dst.s_addr, CTL_ALIAS) + && !pData->fLocalhostReachable) + goto done_free_mbuf; + + /* + * Locate pcb for datagram. + */ + so = udp_last_so; + if ( so->so_lport != uh->uh_sport + || so->so_laddr.s_addr != ip->ip_src.s_addr) + { + struct socket *tmp; + + for (tmp = udb.so_next; tmp != &udb; tmp = tmp->so_next) + { + if ( tmp->so_lport == uh->uh_sport + && tmp->so_laddr.s_addr == ip->ip_src.s_addr) + { + so = tmp; + break; + } + } + if (tmp == &udb) + so = NULL; + else + { + udpstat.udpps_pcbcachemiss++; + udp_last_so = so; + } + } + + new_socket: + if (so == NULL) + { + /* + * If there's no socket for this packet, + * create one + */ + if ((so = socreate()) == NULL) + { + Log2(("NAT: IP(id: %hd) failed to create socket\n", ip->ip_id)); + goto bad_free_mbuf; + } + + /* + * Setup fields + */ + so->so_laddr = ip->ip_src; + so->so_lport = uh->uh_sport; + so->so_iptos = ip->ip_tos; + + if (udp_attach(pData, so) <= 0) + { + Log2(("NAT: IP(id: %hd) udp_attach errno = %d (%s)\n", + ip->ip_id, errno, strerror(errno))); + sofree(pData, so); + goto bad_free_mbuf; + } + + /* udp_last_so = so; */ + /* + * XXXXX Here, check if it's in udpexec_list, + * and if it is, do the fork_exec() etc. + */ + } + + so->so_faddr = ip->ip_dst; /* XXX */ + so->so_fport = uh->uh_dport; /* XXX */ + Assert(so->so_type == IPPROTO_UDP); + + /* + * DNS proxy + */ + if ( pData->fUseDnsProxy + && CTL_CHECK(ip->ip_dst.s_addr, CTL_DNS) + && (uh->uh_dport == RT_H2N_U16_C(53))) + { + dnsproxy_query(pData, so, m, iphlen); + goto done_free_mbuf; + } + + iphlen += sizeof(struct udphdr); + m->m_len -= iphlen; + m->m_data += iphlen; + + ttl = ip->ip_ttl = save_ip.ip_ttl; + if (ttl != so->so_sottl) { + ret = setsockopt(so->s, IPPROTO_IP, IP_TTL, + (char *)&ttl, sizeof(ttl)); + if (RT_LIKELY(ret == 0)) + so->so_sottl = ttl; + } + + tos = save_ip.ip_tos; + if (tos != so->so_sotos) { + ret = setsockopt(so->s, IPPROTO_IP, IP_TOS, + (char *)&tos, sizeof(tos)); + if (RT_LIKELY(ret == 0)) + so->so_sotos = tos; + } + + { + /* + * Different OSes have different socket options for DF. We + * can't use IP_HDRINCL here as it's only valid for SOCK_RAW. + */ +# define USE_DF_OPTION(_Optname) \ + const int dfopt = _Optname +#if defined(IP_MTU_DISCOVER) + USE_DF_OPTION(IP_MTU_DISCOVER); +#elif defined(IP_DONTFRAG) /* Solaris 11+, FreeBSD */ + USE_DF_OPTION(IP_DONTFRAG); +#elif defined(IP_DONTFRAGMENT) /* Windows */ + USE_DF_OPTION(IP_DONTFRAGMENT); +#else + USE_DF_OPTION(0); +#endif + if (dfopt) { + int df = (save_ip.ip_off & IP_DF) != 0; +#if defined(IP_MTU_DISCOVER) + df = df ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; +#endif + if (df != so->so_sodf) { + ret = setsockopt(so->s, IPPROTO_IP, dfopt, + (char *)&df, sizeof(df)); + if (RT_LIKELY(ret == 0)) + so->so_sodf = df; + } + } + } + + if ( sosendto(pData, so, m) == -1 + && ( !soIgnorableErrorCode(errno) + && errno != ENOTCONN)) + { + m->m_len += iphlen; + m->m_data -= iphlen; + *ip = save_ip; + Log2(("NAT: UDP tx errno = %d (%s) on sent to %RTnaipv4\n", + errno, strerror(errno), ip->ip_dst)); + icmp_error(pData, m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); + so->so_m = NULL; + LogFlowFuncLeave(); + return; + } + + if (so->so_m) + m_freem(pData, so->so_m); /* used for ICMP if error on sorecvfrom */ + + /* restore the orig mbuf packet */ + m->m_len += iphlen; + m->m_data -= iphlen; + *ip = save_ip; + so->so_m = m; /* ICMP backup */ + LogFlowFuncLeave(); + return; + +bad_free_mbuf: + Log2(("NAT: UDP(id: %hd) datagram to %RTnaipv4 with size(%d) claimed as bad\n", + ip->ip_id, &ip->ip_dst, ip->ip_len)); + +done_free_mbuf: + /* some services like bootp(built-in), dns(buildt-in) and dhcp don't need sockets + * and create new m'buffers to send them to guest, so we'll free their incomming + * buffers here. + */ + if (m != NULL) + m_freem(pData, m); + LogFlowFuncLeave(); + return; +} + +/** + * Output a UDP packet. + * + * @note This function will finally free m! + */ +int udp_output2(PNATState pData, struct socket *so, struct mbuf *m, + struct sockaddr_in *saddr, struct sockaddr_in *daddr, + int iptos) +{ + register struct udpiphdr *ui; + int error; + int mlen = 0; + + LogFlowFunc(("ENTER: so = %R[natsock], m = %p, saddr = %RTnaipv4, daddr = %RTnaipv4\n", + so, m, saddr->sin_addr.s_addr, daddr->sin_addr.s_addr)); + + /* in case of built-in service so might be NULL */ + if (so) Assert(so->so_type == IPPROTO_UDP); + + /* + * Adjust for header + */ + m->m_data -= sizeof(struct udpiphdr); + m->m_len += sizeof(struct udpiphdr); + mlen = m_length(m, NULL); + + /* + * Fill in mbuf with extended UDP header + * and addresses and length put into network format. + */ + ui = mtod(m, struct udpiphdr *); + memset(ui->ui_x1, 0, 9); + ui->ui_pr = IPPROTO_UDP; + ui->ui_len = RT_H2N_U16((uint16_t)(mlen - sizeof(struct ip))); + /* XXXXX Check for from-one-location sockets, or from-any-location sockets */ + ui->ui_src = saddr->sin_addr; + ui->ui_dst = daddr->sin_addr; + ui->ui_sport = saddr->sin_port; + ui->ui_dport = daddr->sin_port; + ui->ui_ulen = ui->ui_len; + + /* + * Stuff checksum and output datagram. + */ + ui->ui_sum = 0; + if (udpcksum) + { + if ((ui->ui_sum = cksum(m, /* sizeof (struct udpiphdr) + */ mlen)) == 0) + ui->ui_sum = 0xffff; + } + ((struct ip *)ui)->ip_len = mlen; + ((struct ip *)ui)->ip_ttl = ip_defttl; + ((struct ip *)ui)->ip_tos = iptos; + + udpstat.udps_opackets++; + + error = ip_output(pData, so, m); + + return error; +} + +/** + * @note This function will free m! + */ +int udp_output(PNATState pData, struct socket *so, struct mbuf *m, + struct sockaddr_in *addr) +{ + struct sockaddr_in saddr, daddr; + + Assert(so->so_type == IPPROTO_UDP); + LogFlowFunc(("ENTER: so = %R[natsock], m = %p, saddr = %RTnaipv4\n", so, m, addr->sin_addr.s_addr)); + + if (so->so_laddr.s_addr == INADDR_ANY) + { + if (pData->guest_addr_guess.s_addr != INADDR_ANY) + { + LogRel2(("NAT: port-forward: using %RTnaipv4 for %R[natsock]\n", + pData->guest_addr_guess.s_addr, so)); + so->so_laddr = pData->guest_addr_guess; + } + else + { + LogRel2(("NAT: port-forward: guest address unknown for %R[natsock]\n", so)); + m_freem(pData, m); + return 0; + } + } + + saddr = *addr; + if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr) + { + saddr.sin_addr.s_addr = so->so_faddr.s_addr; + if (slirpIsWideCasting(pData, so->so_faddr.s_addr)) + { + /** + * We haven't got real firewall but have got its submodule libalias. + */ + m->m_flags |= M_SKIP_FIREWALL; + /** + * udp/137 port is Name Service in NetBIOS protocol. for some reasons Windows guest rejects + * accept data from non-aliased server. + */ + if ( (so->so_fport == so->so_lport) + && (so->so_fport == RT_H2N_U16(137))) + saddr.sin_addr.s_addr = alias_addr.s_addr; + else + saddr.sin_addr.s_addr = addr->sin_addr.s_addr; + so->so_faddr.s_addr = addr->sin_addr.s_addr; + } + } + + /* Any UDP packet to the loopback address must be translated to be from + * the forwarding address, i.e. 10.0.2.2. */ + if ( (saddr.sin_addr.s_addr & RT_H2N_U32_C(IN_CLASSA_NET)) + == RT_H2N_U32_C(INADDR_LOOPBACK & IN_CLASSA_NET)) + saddr.sin_addr.s_addr = alias_addr.s_addr; + + daddr.sin_addr = so->so_laddr; + daddr.sin_port = so->so_lport; + + return udp_output2(pData, so, m, &saddr, &daddr, so->so_iptos); +} + +int +udp_attach(PNATState pData, struct socket *so) +{ + struct sockaddr sa_addr; + socklen_t socklen = sizeof(struct sockaddr); + int status; + int opt = 1; + + AssertReturn(so->so_type == 0, -1); + so->so_type = IPPROTO_UDP; + + so->s = socket(AF_INET, SOCK_DGRAM, 0); + if (so->s == -1) + goto error; + fd_nonblock(so->s); + + so->so_sottl = 0; + so->so_sotos = 0; + so->so_sodf = -1; + + status = sobind(pData, so); + if (status != 0) + return status; + + /* success, insert in queue */ + so->so_expire = curtime + SO_EXPIRE; + + /* enable broadcast for later use */ + setsockopt(so->s, SOL_SOCKET, SO_BROADCAST, (const char *)&opt, sizeof(opt)); + + status = getsockname(so->s, &sa_addr, &socklen); + if (status == 0) + { + Assert(sa_addr.sa_family == AF_INET); + so->so_hlport = ((struct sockaddr_in *)&sa_addr)->sin_port; + so->so_hladdr.s_addr = ((struct sockaddr_in *)&sa_addr)->sin_addr.s_addr; + } + + SOCKET_LOCK_CREATE(so); + QSOCKET_LOCK(udb); + insque(pData, so, &udb); + NSOCK_INC(); + QSOCKET_UNLOCK(udb); + return so->s; +error: + Log2(("NAT: can't create datagram socket\n")); + return -1; +} + +void +udp_detach(PNATState pData, struct socket *so) +{ + if (so != &pData->icmp_socket) + { + Assert(so->so_type == IPPROTO_UDP); + QSOCKET_LOCK(udb); + SOCKET_LOCK(so); + QSOCKET_UNLOCK(udb); + closesocket(so->s); + sofree(pData, so); + SOCKET_UNLOCK(so); + } +} + +struct socket * +udp_listen(PNATState pData, u_int32_t bind_addr, u_int port, u_int32_t laddr, u_int lport, int flags) +{ + struct sockaddr_in addr; + struct socket *so; + socklen_t addrlen = sizeof(struct sockaddr_in); + int opt = 1; + LogFlowFunc(("ENTER: bind_addr:%RTnaipv4, port:%d, laddr:%RTnaipv4, lport:%d, flags:%x\n", + bind_addr, RT_N2H_U16(port), laddr, RT_N2H_U16(lport), flags)); + + if ((so = socreate()) == NULL) + { + LogFlowFunc(("LEAVE: NULL\n")); + return NULL; + } + + so->s = socket(AF_INET, SOCK_DGRAM, 0); + if (so->s == -1) + { + LogRel(("NAT: can't create datagram socket\n")); + RTMemFree(so); + LogFlowFunc(("LEAVE: NULL\n")); + return NULL; + } + so->so_expire = curtime + SO_EXPIRE; + so->so_type = IPPROTO_UDP; + fd_nonblock(so->s); + so->so_sottl = 0; + so->so_sotos = 0; + so->so_sodf = -1; + SOCKET_LOCK_CREATE(so); + QSOCKET_LOCK(udb); + insque(pData, so, &udb); + NSOCK_INC(); + QSOCKET_UNLOCK(udb); + + memset(&addr, 0, sizeof(addr)); +#ifdef RT_OS_DARWIN + addr.sin_len = sizeof(addr); +#endif + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = bind_addr; + addr.sin_port = port; + + if (bind(so->s,(struct sockaddr *)&addr, addrlen) < 0) + { + LogRel(("NAT: udp bind to %RTnaipv4:%d failed, error %d\n", + addr.sin_addr, RT_N2H_U16(port), errno)); + udp_detach(pData, so); + LogFlowFunc(("LEAVE: NULL\n")); + return NULL; + } + setsockopt(so->s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)); +/* setsockopt(so->s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int)); */ + + getsockname(so->s,(struct sockaddr *)&addr,&addrlen); + so->so_hladdr = addr.sin_addr; + so->so_hlport = addr.sin_port; + + /* XXX: wtf are we setting so_faddr/so_fport here? */ + so->so_fport = addr.sin_port; +#if 0 + /* The original check was completely broken, as the commented out + * if statement was always true (INADDR_ANY=0). */ + /** @todo vvl - alias_addr should be set (if required) + * later by liabalias module. + */ + if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr) + so->so_faddr = alias_addr; + else +#endif + so->so_faddr = addr.sin_addr; + + so->so_lport = lport; + so->so_laddr.s_addr = laddr; + if (flags != SS_FACCEPTONCE) + so->so_expire = 0; + + so->so_state = SS_ISFCONNECTED; + + LogFlowFunc(("LEAVE: %R[natsock]\n", so)); + return so; +} diff --git a/src/VBox/Devices/Network/slirp/udp.h b/src/VBox/Devices/Network/slirp/udp.h new file mode 100644 index 00000000..6e11f0c5 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/udp.h @@ -0,0 +1,142 @@ +/* $Id: udp.h $ */ +/** @file + * NAT - UDP protocol (declarations/defines). + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/* + * This code is based on: + * + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp.h 8.1 (Berkeley) 6/10/93 + * udp.h,v 1.3 1994/08/21 05:27:41 paul Exp + */ + +#ifndef _UDP_H_ +#define _UDP_H_ + +#define UDP_TTL 0x60 +#define UDP_UDPDATALEN 16192 + +extern struct socket *udp_last_so; + +/* + * Udp protocol header. + * Per RFC 768, September, 1981. + */ +struct udphdr +{ + uint16_t uh_sport; /* source port */ + uint16_t uh_dport; /* destination port */ + int16_t uh_ulen; /* udp length */ + uint16_t uh_sum; /* udp checksum */ +}; +AssertCompileSize(struct udphdr, 8); + +/* + * UDP kernel structures and variables. + */ +struct udpiphdr +{ + struct ipovly ui_i; /* overlaid ip structure */ + struct udphdr ui_u; /* udp header */ +}; +AssertCompileSize(struct udpiphdr, 28); +#define ui_next ui_i.ih_next +#define ui_prev ui_i.ih_prev +#define ui_x1 ui_i.ih_x1 +#define ui_pr ui_i.ih_pr +#define ui_len ui_i.ih_len +#define ui_src ui_i.ih_src +#define ui_dst ui_i.ih_dst +#define ui_sport ui_u.uh_sport +#define ui_dport ui_u.uh_dport +#define ui_ulen ui_u.uh_ulen +#define ui_sum ui_u.uh_sum + +struct udpstat_t +{ + /* input statistics: */ + u_long udps_ipackets; /* total input packets */ + u_long udps_hdrops; /* packet shorter than header */ + u_long udps_badsum; /* checksum error */ + u_long udps_badlen; /* data length larger than packet */ + u_long udps_noport; /* no socket on port */ + u_long udps_noportbcast; /* of above, arrived as broadcast */ + u_long udps_fullsock; /* not delivered, input socket full */ + u_long udpps_pcbcachemiss; /* input packets missing pcb cache */ + /* output statistics: */ + u_long udps_opackets; /* total output packets */ +}; + +/* + * Names for UDP sysctl objects + */ +#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */ +#define UDPCTL_MAXID 2 + +extern struct udpstat udpstat; +extern struct socket udb; +struct mbuf; + +void udp_init (PNATState); +void udp_input (PNATState, register struct mbuf *, int); +int udp_output (PNATState, struct socket *, struct mbuf *, struct sockaddr_in *); +int udp_attach (PNATState, struct socket *); +void udp_detach (PNATState, struct socket *); +u_int8_t udp_tos (struct socket *); +void udp_emu (PNATState, struct socket *, struct mbuf *); +struct socket * udp_listen (PNATState, u_int32_t, u_int, u_int32_t, u_int, int); +int udp_output2(PNATState pData, struct socket *so, struct mbuf *m, + struct sockaddr_in *saddr, struct sockaddr_in *daddr, + int iptos); + +#endif diff --git a/src/VBox/Devices/Network/slirp/zone.h b/src/VBox/Devices/Network/slirp/zone.h new file mode 100644 index 00000000..4fabc082 --- /dev/null +++ b/src/VBox/Devices/Network/slirp/zone.h @@ -0,0 +1,64 @@ +/* $Id: zone.h $ */ +/** @file + * NAT - this file is for sharing zone declaration with emu emulation and logging routines. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef __ZONE_H__ +# define __ZONE_H__ + +# define ITEM_MAGIC 0xdead0001 +struct item +{ + uint32_t magic; + uma_zone_t zone; + uint32_t ref_count; + LIST_ENTRY(item) list; +}; + +# define ZONE_MAGIC 0xdead0002 +struct uma_zone +{ + uint32_t magic; + PNATState pData; /* to minimize changes in the rest of UMA emulation code */ + RTCRITSECT csZone; + const char *name; + size_t size; /* item size */ + ctor_t pfCtor; + dtor_t pfDtor; + zinit_t pfInit; + zfini_t pfFini; + uma_alloc_t pfAlloc; + uma_free_t pfFree; + int max_items; + int cur_items; + LIST_HEAD(RT_NOTHING, item) used_items; + LIST_HEAD(RT_NOTHING, item) free_items; + uma_zone_t master_zone; + void *area; + /** Needs call pfnXmitPending when memory becomes available if @c true. + * @remarks Only applies to the master zone (master_zone == NULL) */ + bool fDoXmitPending; +}; +#endif |