diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:54:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:54:18 +0000 |
commit | 8e67fbf68ffeb9eb5f026dd482d73b021660bf9b (patch) | |
tree | bb573facd5d02096f9956b2617a722b88acaa8af /debian/grub-extras/disabled/gpxe/src/net | |
parent | Adding upstream version 2.06. (diff) | |
download | grub2-debian.tar.xz grub2-debian.zip |
Adding debian version 2.06-3~deb11u6.debian/2.06-3_deb11u6debian
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'debian/grub-extras/disabled/gpxe/src/net')
37 files changed, 20787 insertions, 0 deletions
diff --git a/debian/grub-extras/disabled/gpxe/src/net/80211/net80211.c b/debian/grub-extras/disabled/gpxe/src/net/80211/net80211.c new file mode 100644 index 0000000..1fc983a --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/80211/net80211.c @@ -0,0 +1,2645 @@ +/* + * The gPXE 802.11 MAC layer. + * + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <byteswap.h> +#include <stdlib.h> +#include <gpxe/settings.h> +#include <gpxe/if_arp.h> +#include <gpxe/ethernet.h> +#include <gpxe/ieee80211.h> +#include <gpxe/netdevice.h> +#include <gpxe/net80211.h> +#include <gpxe/timer.h> +#include <gpxe/nap.h> +#include <unistd.h> +#include <errno.h> + +/** @file + * + * 802.11 device management + */ + +/* Disambiguate the EINVAL's a bit */ +#define EINVAL_PKT_TOO_SHORT ( EINVAL | EUNIQ_01 ) +#define EINVAL_PKT_VERSION ( EINVAL | EUNIQ_02 ) +#define EINVAL_PKT_NOT_DATA ( EINVAL | EUNIQ_03 ) +#define EINVAL_PKT_NOT_FROMDS ( EINVAL | EUNIQ_04 ) +#define EINVAL_PKT_LLC_HEADER ( EINVAL | EUNIQ_05 ) +#define EINVAL_CRYPTO_REQUEST ( EINVAL | EUNIQ_06 ) +#define EINVAL_ACTIVE_SCAN ( EINVAL | EUNIQ_07 ) + +/* + * 802.11 error codes: The AP can give us a status code explaining why + * authentication failed, or a reason code explaining why we were + * deauthenticated/disassociated. These codes range from 0-63 (the + * field is 16 bits wide, but only up to 45 or so are defined yet; we + * allow up to 63 for extensibility). This is encoded into an error + * code as such: + * + * status & 0x1f goes here --vv-- + * Status code 0-31: ECONNREFUSED | EUNIQ_(status & 0x1f) (0e1a6038) + * Status code 32-63: EHOSTUNREACH | EUNIQ_(status & 0x1f) (171a6011) + * Reason code 0-31: ECONNRESET | EUNIQ_(reason & 0x1f) (0f1a6039) + * Reason code 32-63: ENETRESET | EUNIQ_(reason & 0x1f) (271a6001) + * + * The POSIX error codes more or less convey the appropriate message + * (status codes occur when we can't associate at all, reason codes + * when we lose association unexpectedly) and let us extract the + * complete 802.11 error code from the rc value. + */ + +/** Make return status code from 802.11 status code */ +#define E80211_STATUS( stat ) ( ((stat & 0x20)? EHOSTUNREACH : ECONNREFUSED) \ + | ((stat & 0x1f) << 8) ) + +/** Make return status code from 802.11 reason code */ +#define E80211_REASON( reas ) ( ((reas & 0x20)? ENETRESET : ECONNRESET) \ + | ((reas & 0x1f) << 8) ) + + +/** List of 802.11 devices */ +static struct list_head net80211_devices = LIST_HEAD_INIT ( net80211_devices ); + +/** Set of device operations that does nothing */ +static struct net80211_device_operations net80211_null_ops; + +/** Information associated with a received management packet + * + * This is used to keep beacon signal strengths in a parallel queue to + * the beacons themselves. + */ +struct net80211_rx_info { + int signal; + struct list_head list; +}; + +/** Context for a probe operation */ +struct net80211_probe_ctx { + /** 802.11 device to probe on */ + struct net80211_device *dev; + + /** Value of keep_mgmt before probe was started */ + int old_keep_mgmt; + + /** If scanning actively, pointer to probe packet to send */ + struct io_buffer *probe; + + /** If non-"", the ESSID to limit ourselves to */ + const char *essid; + + /** Time probe was started */ + u32 ticks_start; + + /** Time last useful beacon was received */ + u32 ticks_beacon; + + /** Time channel was last changed */ + u32 ticks_channel; + + /** Time to stay on each channel */ + u32 hop_time; + + /** Channels to hop by when changing channel */ + int hop_step; + + /** List of best beacons for each network found so far */ + struct list_head *beacons; +}; + +/** Context for the association task */ +struct net80211_assoc_ctx { + /** Next authentication method to try using */ + int method; + + /** Time (in ticks) of the last sent association-related packet */ + int last_packet; + + /** Number of times we have tried sending it */ + int times_tried; +}; + +/** + * @defgroup net80211_netdev Network device interface functions + * @{ + */ +static int net80211_netdev_open ( struct net_device *netdev ); +static void net80211_netdev_close ( struct net_device *netdev ); +static int net80211_netdev_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ); +static void net80211_netdev_poll ( struct net_device *netdev ); +static void net80211_netdev_irq ( struct net_device *netdev, int enable ); +/** @} */ + +/** + * @defgroup net80211_linklayer 802.11 link-layer protocol functions + * @{ + */ +static int net80211_ll_push ( struct net_device *netdev, + struct io_buffer *iobuf, const void *ll_dest, + const void *ll_source, uint16_t net_proto ); +static int net80211_ll_pull ( struct net_device *netdev, + struct io_buffer *iobuf, const void **ll_dest, + const void **ll_source, uint16_t * net_proto ); +/** @} */ + +/** + * @defgroup net80211_help 802.11 helper functions + * @{ + */ +static void net80211_add_channels ( struct net80211_device *dev, int start, + int len, int txpower ); +static void net80211_filter_hw_channels ( struct net80211_device *dev ); +static void net80211_set_rtscts_rate ( struct net80211_device *dev ); +static int net80211_process_capab ( struct net80211_device *dev, + u16 capab ); +static int net80211_process_ie ( struct net80211_device *dev, + union ieee80211_ie *ie, void *ie_end ); +static union ieee80211_ie * +net80211_marshal_request_info ( struct net80211_device *dev, + union ieee80211_ie *ie ); +/** @} */ + +/** + * @defgroup net80211_assoc_ll 802.11 association handling functions + * @{ + */ +static void net80211_step_associate ( struct process *proc ); +static void net80211_handle_auth ( struct net80211_device *dev, + struct io_buffer *iob ); +static void net80211_handle_assoc_reply ( struct net80211_device *dev, + struct io_buffer *iob ); +static int net80211_send_disassoc ( struct net80211_device *dev, int reason ); +static void net80211_handle_mgmt ( struct net80211_device *dev, + struct io_buffer *iob, int signal ); +/** @} */ + +/** + * @defgroup net80211_frag 802.11 fragment handling functions + * @{ + */ +static void net80211_free_frags ( struct net80211_device *dev, int fcid ); +static struct io_buffer *net80211_accum_frags ( struct net80211_device *dev, + int fcid, int nfrags, int size ); +static void net80211_rx_frag ( struct net80211_device *dev, + struct io_buffer *iob, int signal ); +/** @} */ + +/** + * @defgroup net80211_settings 802.11 settings handlers + * @{ + */ +static int net80211_check_ssid_update ( void ); + +/** 802.11 settings applicator + * + * When the SSID is changed, this will cause any open devices to + * re-associate. + */ +struct settings_applicator net80211_ssid_applicator __settings_applicator = { + .apply = net80211_check_ssid_update, +}; + +/** The network name to associate with + * + * If this is blank, we scan for all networks and use the one with the + * greatest signal strength. + */ +struct setting net80211_ssid_setting __setting = { + .name = "ssid", + .description = "802.11 SSID (network name)", + .type = &setting_type_string, +}; + +/** Whether to use active scanning + * + * In order to associate with a hidden SSID, it's necessary to use an + * active scan (send probe packets). If this setting is nonzero, an + * active scan on the 2.4GHz band will be used to associate. + */ +struct setting net80211_active_setting __setting = { + .name = "active-scan", + .description = "Use an active scan during 802.11 association", + .type = &setting_type_int8, +}; + +/** @} */ + + +/* ---------- net_device wrapper ---------- */ + +/** + * Open 802.11 device and start association + * + * @v netdev Wrapping network device + * @ret rc Return status code + * + * This sets up a default conservative set of channels for probing, + * and starts the auto-association task unless the @c + * NET80211_NO_ASSOC flag is set in the wrapped 802.11 device's @c + * state field. + */ +static int net80211_netdev_open ( struct net_device *netdev ) +{ + struct net80211_device *dev = netdev->priv; + int rc = 0; + + if ( dev->op == &net80211_null_ops ) + return -EFAULT; + + if ( dev->op->open ) + rc = dev->op->open ( dev ); + + if ( rc < 0 ) + return rc; + + if ( ! ( dev->state & NET80211_NO_ASSOC ) ) + net80211_autoassociate ( dev ); + + return 0; +} + +/** + * Close 802.11 device + * + * @v netdev Wrapping network device. + * + * If the association task is running, this will stop it. + */ +static void net80211_netdev_close ( struct net_device *netdev ) +{ + struct net80211_device *dev = netdev->priv; + + if ( dev->state & NET80211_WORKING ) + process_del ( &dev->proc_assoc ); + + /* Send disassociation frame to AP, to be polite */ + if ( dev->state & NET80211_ASSOCIATED ) + net80211_send_disassoc ( dev, IEEE80211_REASON_LEAVING ); + + netdev_link_down ( netdev ); + dev->state = 0; + + if ( dev->op->close ) + dev->op->close ( dev ); +} + +/** + * Transmit packet on 802.11 device + * + * @v netdev Wrapping network device + * @v iobuf I/O buffer + * @ret rc Return status code + * + * If encryption is enabled for the currently associated network, the + * packet will be encrypted prior to transmission. + */ +static int net80211_netdev_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ) +{ + struct net80211_device *dev = netdev->priv; + int rc = -ENOSYS; + + if ( dev->crypto ) { + struct io_buffer *niob = dev->crypto->encrypt ( dev->crypto, + iobuf ); + if ( ! niob ) + return -ENOMEM; /* only reason encryption could fail */ + + free_iob ( iobuf ); + iobuf = niob; + } + + if ( dev->op->transmit ) + rc = dev->op->transmit ( dev, iobuf ); + + return rc; +} + +/** + * Poll 802.11 device for received packets and completed transmissions + * + * @v netdev Wrapping network device + */ +static void net80211_netdev_poll ( struct net_device *netdev ) +{ + struct net80211_device *dev = netdev->priv; + + if ( dev->op->poll ) + dev->op->poll ( dev ); +} + +/** + * Enable or disable interrupts for 802.11 device + * + * @v netdev Wrapping network device + * @v enable Whether to enable interrupts + */ +static void net80211_netdev_irq ( struct net_device *netdev, int enable ) +{ + struct net80211_device *dev = netdev->priv; + + if ( dev->op->irq ) + dev->op->irq ( dev, enable ); +} + +/** Network device operations for a wrapped 802.11 device */ +static struct net_device_operations net80211_netdev_ops = { + .open = net80211_netdev_open, + .close = net80211_netdev_close, + .transmit = net80211_netdev_transmit, + .poll = net80211_netdev_poll, + .irq = net80211_netdev_irq, +}; + + +/* ---------- 802.11 link-layer protocol ---------- */ + +/** 802.11 broadcast MAC address */ +static u8 net80211_ll_broadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + +/** + * Determine whether a transmission rate uses ERP/OFDM + * + * @v rate Rate in 100 kbps units + * @ret is_erp TRUE if the rate is an ERP/OFDM rate + * + * 802.11b supports rates of 1.0, 2.0, 5.5, and 11.0 Mbps; any other + * rate than these on the 2.4GHz spectrum is an ERP (802.11g) rate. + */ +static inline int net80211_rate_is_erp ( u16 rate ) +{ + if ( rate == 10 || rate == 20 || rate == 55 || rate == 110 ) + return 0; + return 1; +} + + +/** + * Calculate one frame's contribution to 802.11 duration field + * + * @v dev 802.11 device + * @v bytes Amount of data to calculate duration for + * @ret dur Duration field in microseconds + * + * To avoid multiple stations attempting to transmit at once, 802.11 + * provides that every packet shall include a duration field + * specifying a length of time for which the wireless medium will be + * reserved after it is transmitted. The duration is measured in + * microseconds and is calculated with respect to the current + * physical-layer parameters of the 802.11 device. + * + * For an unfragmented data or management frame, or the last fragment + * of a fragmented frame, the duration captures only the 10 data bytes + * of one ACK; call once with bytes = 10. + * + * For a fragment of a data or management rame that will be followed + * by more fragments, the duration captures an ACK, the following + * fragment, and its ACK; add the results of three calls, two with + * bytes = 10 and one with bytes set to the next fragment's size. + * + * For an RTS control frame, the duration captures the responding CTS, + * the frame being sent, and its ACK; add the results of three calls, + * two with bytes = 10 and one with bytes set to the next frame's size + * (assuming unfragmented). + * + * For a CTS-to-self control frame, the duration captures the frame + * being protected and its ACK; add the results of two calls, one with + * bytes = 10 and one with bytes set to the next frame's size. + * + * No other frame types are currently supported by gPXE. + */ +u16 net80211_duration ( struct net80211_device *dev, int bytes, u16 rate ) +{ + struct net80211_channel *chan = &dev->channels[dev->channel]; + u32 kbps = rate * 100; + + if ( chan->band == NET80211_BAND_5GHZ || net80211_rate_is_erp ( rate ) ) { + /* OFDM encoding (802.11a/g) */ + int bits_per_symbol = ( kbps * 4 ) / 1000; /* 4us/symbol */ + int bits = 22 + ( bytes << 3 ); /* 22-bit PLCP */ + int symbols = ( bits + bits_per_symbol - 1 ) / bits_per_symbol; + + return 16 + 20 + ( symbols * 4 ); /* 16us SIFS, 20us preamble */ + } else { + /* CCK encoding (802.11b) */ + int phy_time = 144 + 48; /* preamble + PLCP */ + int bits = bytes << 3; + int data_time = ( bits * 1000 + kbps - 1 ) / kbps; + + if ( dev->phy_flags & NET80211_PHY_USE_SHORT_PREAMBLE ) + phy_time >>= 1; + + return 10 + phy_time + data_time; /* 10us SIFS */ + } +} + +/** + * Add 802.11 link-layer header + * + * @v netdev Wrapping network device + * @v iobuf I/O buffer + * @v ll_dest Link-layer destination address + * @v ll_source Link-layer source address + * @v net_proto Network-layer protocol, in network byte order + * @ret rc Return status code + * + * This adds both the 802.11 frame header and the 802.2 LLC/SNAP + * header used on data packets. + * + * We also check here for state of the link that would make it invalid + * to send a data packet; every data packet must pass through here, + * and no non-data packet (e.g. management frame) should. + */ +static int net80211_ll_push ( struct net_device *netdev, + struct io_buffer *iobuf, const void *ll_dest, + const void *ll_source, uint16_t net_proto ) +{ + struct net80211_device *dev = netdev->priv; + struct ieee80211_frame *hdr = iob_push ( iobuf, + IEEE80211_LLC_HEADER_LEN + + IEEE80211_TYP_FRAME_HEADER_LEN ); + struct ieee80211_llc_snap_header *lhdr = + ( void * ) hdr + IEEE80211_TYP_FRAME_HEADER_LEN; + + /* We can't send data packets if we're not associated. */ + if ( ! netdev_link_ok ( netdev ) ) { + if ( dev->assoc_rc ) + return dev->assoc_rc; + return -ENETUNREACH; + } + + hdr->fc = IEEE80211_THIS_VERSION | IEEE80211_TYPE_DATA | + IEEE80211_STYPE_DATA | IEEE80211_FC_TODS; + + /* We don't send fragmented frames, so duration is the time + for an SIFS + 10-byte ACK. */ + hdr->duration = net80211_duration ( dev, 10, dev->rates[dev->rate] ); + + memcpy ( hdr->addr1, dev->bssid, ETH_ALEN ); + memcpy ( hdr->addr2, ll_source, ETH_ALEN ); + memcpy ( hdr->addr3, ll_dest, ETH_ALEN ); + + hdr->seq = IEEE80211_MAKESEQ ( ++dev->last_tx_seqnr, 0 ); + + lhdr->dsap = IEEE80211_LLC_DSAP; + lhdr->ssap = IEEE80211_LLC_SSAP; + lhdr->ctrl = IEEE80211_LLC_CTRL; + memset ( lhdr->oui, 0x00, 3 ); + lhdr->ethertype = net_proto; + + return 0; +} + +/** + * Remove 802.11 link-layer header + * + * @v netdev Wrapping network device + * @v iobuf I/O buffer + * @ret ll_dest Link-layer destination address + * @ret ll_source Link-layer source + * @ret net_proto Network-layer protocol, in network byte order + * @ret rc Return status code + * + * This expects and removes both the 802.11 frame header and the 802.2 + * LLC/SNAP header that are used on data packets. + */ +static int net80211_ll_pull ( struct net_device *netdev __unused, + struct io_buffer *iobuf, + const void **ll_dest, const void **ll_source, + uint16_t * net_proto ) +{ + struct ieee80211_frame *hdr = iobuf->data; + struct ieee80211_llc_snap_header *lhdr = + ( void * ) hdr + IEEE80211_TYP_FRAME_HEADER_LEN; + + /* Bunch of sanity checks */ + if ( iob_len ( iobuf ) < IEEE80211_TYP_FRAME_HEADER_LEN + + IEEE80211_LLC_HEADER_LEN ) { + DBGC ( netdev->priv, "802.11 %p packet too short (%zd bytes)\n", + netdev->priv, iob_len ( iobuf ) ); + return -EINVAL_PKT_TOO_SHORT; + } + + if ( ( hdr->fc & IEEE80211_FC_VERSION ) != IEEE80211_THIS_VERSION ) { + DBGC ( netdev->priv, "802.11 %p packet invalid version %04x\n", + netdev->priv, hdr->fc & IEEE80211_FC_VERSION ); + return -EINVAL_PKT_VERSION; + } + + if ( ( hdr->fc & IEEE80211_FC_TYPE ) != IEEE80211_TYPE_DATA || + ( hdr->fc & IEEE80211_FC_SUBTYPE ) != IEEE80211_STYPE_DATA ) { + DBGC ( netdev->priv, "802.11 %p packet not data/data (fc=%04x)\n", + netdev->priv, hdr->fc ); + return -EINVAL_PKT_NOT_DATA; + } + + if ( ( hdr->fc & ( IEEE80211_FC_TODS | IEEE80211_FC_FROMDS ) ) != + IEEE80211_FC_FROMDS ) { + DBGC ( netdev->priv, "802.11 %p packet not from DS (fc=%04x)\n", + netdev->priv, hdr->fc ); + return -EINVAL_PKT_NOT_FROMDS; + } + + if ( lhdr->dsap != IEEE80211_LLC_DSAP || lhdr->ssap != IEEE80211_LLC_SSAP || + lhdr->ctrl != IEEE80211_LLC_CTRL || lhdr->oui[0] || lhdr->oui[1] || + lhdr->oui[2] ) { + DBGC ( netdev->priv, "802.11 %p LLC header is not plain EtherType " + "encapsulator: %02x->%02x [%02x] %02x:%02x:%02x %04x\n", + netdev->priv, lhdr->dsap, lhdr->ssap, lhdr->ctrl, + lhdr->oui[0], lhdr->oui[1], lhdr->oui[2], lhdr->ethertype ); + return -EINVAL_PKT_LLC_HEADER; + } + + iob_pull ( iobuf, sizeof ( *hdr ) + sizeof ( *lhdr ) ); + + *ll_dest = hdr->addr1; + *ll_source = hdr->addr3; + *net_proto = lhdr->ethertype; + return 0; +} + +/** 802.11 link-layer protocol */ +static struct ll_protocol net80211_ll_protocol __ll_protocol = { + .name = "802.11", + .push = net80211_ll_push, + .pull = net80211_ll_pull, + .init_addr = eth_init_addr, + .ntoa = eth_ntoa, + .mc_hash = eth_mc_hash, + .ll_proto = htons ( ARPHRD_ETHER ), /* "encapsulated Ethernet" */ + .hw_addr_len = ETH_ALEN, + .ll_addr_len = ETH_ALEN, + .ll_header_len = IEEE80211_TYP_FRAME_HEADER_LEN + + IEEE80211_LLC_HEADER_LEN, +}; + + +/* ---------- 802.11 network management API ---------- */ + +/** + * Get 802.11 device from wrapping network device + * + * @v netdev Wrapping network device + * @ret dev 802.11 device wrapped by network device, or NULL + * + * Returns NULL if the network device does not wrap an 802.11 device. + */ +struct net80211_device * net80211_get ( struct net_device *netdev ) +{ + struct net80211_device *dev; + + list_for_each_entry ( dev, &net80211_devices, list ) { + if ( netdev->priv == dev ) + return netdev->priv; + } + + return NULL; +} + +/** + * Set state of 802.11 device keeping management frames + * + * @v dev 802.11 device + * @v enable Whether to keep management frames + * @ret oldenab Whether management frames were enabled before this call + * + * If enable is TRUE, beacon, probe, and action frames will be kept + * and may be retrieved by calling net80211_mgmt_dequeue(). + */ +int net80211_keep_mgmt ( struct net80211_device *dev, int enable ) +{ + int oldenab = dev->keep_mgmt; + + dev->keep_mgmt = enable; + return oldenab; +} + +/** + * Get 802.11 management frame + * + * @v dev 802.11 device + * @ret signal Signal strength of returned management frame + * @ret iob I/O buffer, or NULL if no management frame is queued + * + * Frames will only be returned by this function if + * net80211_keep_mgmt() has been previously called with enable set to + * TRUE. + * + * The calling function takes ownership of the returned I/O buffer. + */ +struct io_buffer * net80211_mgmt_dequeue ( struct net80211_device *dev, + int *signal ) +{ + struct io_buffer *iobuf; + struct net80211_rx_info *rxi; + + list_for_each_entry ( rxi, &dev->mgmt_info_queue, list ) { + list_del ( &rxi->list ); + if ( signal ) + *signal = rxi->signal; + free ( rxi ); + + list_for_each_entry ( iobuf, &dev->mgmt_queue, list ) { + list_del ( &iobuf->list ); + return iobuf; + } + assert ( 0 ); + } + + return NULL; +} + +/** + * Transmit 802.11 management frame + * + * @v dev 802.11 device + * @v fc Frame Control flags for management frame + * @v dest Destination access point + * @v iob I/O buffer + * @ret rc Return status code + * + * The @a fc argument must contain at least an IEEE 802.11 management + * subtype number (e.g. IEEE80211_STYPE_PROBE_REQ). If it contains + * IEEE80211_FC_PROTECTED, the frame will be encrypted prior to + * transmission. + * + * It is required that @a iob have at least 24 bytes of headroom + * reserved before its data start. + */ +int net80211_tx_mgmt ( struct net80211_device *dev, u16 fc, u8 dest[6], + struct io_buffer *iob ) +{ + struct ieee80211_frame *hdr = iob_push ( iob, + IEEE80211_TYP_FRAME_HEADER_LEN ); + + hdr->fc = IEEE80211_THIS_VERSION | IEEE80211_TYPE_MGMT | + ( fc & ~IEEE80211_FC_PROTECTED ); + hdr->duration = net80211_duration ( dev, 10, dev->rates[dev->rate] ); + hdr->seq = IEEE80211_MAKESEQ ( ++dev->last_tx_seqnr, 0 ); + + memcpy ( hdr->addr1, dest, ETH_ALEN ); /* DA = RA */ + memcpy ( hdr->addr2, dev->netdev->ll_addr, ETH_ALEN ); /* SA = TA */ + memcpy ( hdr->addr3, dest, ETH_ALEN ); /* BSSID */ + + if ( fc & IEEE80211_FC_PROTECTED ) { + if ( ! dev->crypto ) + return -EINVAL_CRYPTO_REQUEST; + + struct io_buffer *eiob = dev->crypto->encrypt ( dev->crypto, + iob ); + free_iob ( iob ); + iob = eiob; + } + + return netdev_tx ( dev->netdev, iob ); +} + + +/* ---------- Driver API ---------- */ + +/** + * Allocate 802.11 device + * + * @v priv_size Size of driver-private allocation area + * @ret dev Newly allocated 802.11 device + * + * This function allocates a net_device with space in its private area + * for both the net80211_device it will wrap and the driver-private + * data space requested. It initializes the link-layer-specific parts + * of the net_device, and links the net80211_device to the net_device + * appropriately. + */ +struct net80211_device * net80211_alloc ( size_t priv_size ) +{ + struct net80211_device *dev; + struct net_device *netdev = + alloc_netdev ( sizeof ( *dev ) + priv_size ); + + if ( ! netdev ) + return NULL; + + netdev->ll_protocol = &net80211_ll_protocol; + netdev->ll_broadcast = net80211_ll_broadcast; + netdev->max_pkt_len = IEEE80211_MAX_DATA_LEN; + netdev_init ( netdev, &net80211_netdev_ops ); + + dev = netdev->priv; + dev->netdev = netdev; + dev->priv = ( u8 * ) dev + sizeof ( *dev ); + dev->op = &net80211_null_ops; + + process_init_stopped ( &dev->proc_assoc, net80211_step_associate, + &netdev->refcnt ); + INIT_LIST_HEAD ( &dev->mgmt_queue ); + INIT_LIST_HEAD ( &dev->mgmt_info_queue ); + + return dev; +} + +/** + * Register 802.11 device with network stack + * + * @v dev 802.11 device + * @v ops 802.11 device operations + * @v hw 802.11 hardware information + * + * This also registers the wrapping net_device with the higher network + * layers. + */ +int net80211_register ( struct net80211_device *dev, + struct net80211_device_operations *ops, + struct net80211_hw_info *hw ) +{ + dev->op = ops; + dev->hw = malloc ( sizeof ( *hw ) ); + if ( ! dev->hw ) + return -ENOMEM; + + memcpy ( dev->hw, hw, sizeof ( *hw ) ); + memcpy ( dev->netdev->hw_addr, hw->hwaddr, ETH_ALEN ); + + /* Set some sensible channel defaults for driver's open() function */ + memcpy ( dev->channels, dev->hw->channels, + NET80211_MAX_CHANNELS * sizeof ( dev->channels[0] ) ); + dev->channel = 0; + + list_add_tail ( &dev->list, &net80211_devices ); + return register_netdev ( dev->netdev ); +} + +/** + * Unregister 802.11 device from network stack + * + * @v dev 802.11 device + * + * After this call, the device operations are cleared so that they + * will not be called. + */ +void net80211_unregister ( struct net80211_device *dev ) +{ + unregister_netdev ( dev->netdev ); + list_del ( &dev->list ); + dev->op = &net80211_null_ops; +} + +/** + * Free 802.11 device + * + * @v dev 802.11 device + * + * The device should be unregistered before this function is called. + */ +void net80211_free ( struct net80211_device *dev ) +{ + free ( dev->hw ); + rc80211_free ( dev->rctl ); + netdev_nullify ( dev->netdev ); + netdev_put ( dev->netdev ); +} + + +/* ---------- 802.11 network management workhorse code ---------- */ + +/** + * Set state of 802.11 device + * + * @v dev 802.11 device + * @v clear Bitmask of flags to clear + * @v set Bitmask of flags to set + * @v status Status or reason code for most recent operation + * + * If @a status represents a reason code, it should be OR'ed with + * NET80211_IS_REASON. + * + * Clearing authentication also clears association; clearing + * association also clears security handshaking state. Clearing + * association removes the link-up flag from the wrapping net_device, + * but setting it does not automatically set the flag; that is left to + * the judgment of higher-level code. + */ +static inline void net80211_set_state ( struct net80211_device *dev, + short clear, short set, + u16 status ) +{ + /* The conditions in this function are deliberately formulated + to be decidable at compile-time in most cases. Since clear + and set are generally passed as constants, the body of this + function can be reduced down to a few statements by the + compiler. */ + + const int statmsk = NET80211_STATUS_MASK | NET80211_IS_REASON; + + if ( clear & NET80211_PROBED ) + clear |= NET80211_AUTHENTICATED; + + if ( clear & NET80211_AUTHENTICATED ) + clear |= NET80211_ASSOCIATED; + + if ( clear & NET80211_ASSOCIATED ) + clear |= NET80211_CRYPTO_SYNCED; + + dev->state = ( dev->state & ~clear ) | set; + dev->state = ( dev->state & ~statmsk ) | ( status & statmsk ); + + if ( clear & NET80211_ASSOCIATED ) + netdev_link_down ( dev->netdev ); + + if ( ( clear | set ) & NET80211_ASSOCIATED ) + dev->op->config ( dev, NET80211_CFG_ASSOC ); + + if ( status != 0 ) { + if ( status & NET80211_IS_REASON ) + dev->assoc_rc = -E80211_REASON ( status ); + else + dev->assoc_rc = -E80211_STATUS ( status ); + netdev_link_err ( dev->netdev, dev->assoc_rc ); + } +} + +/** + * Add channels to 802.11 device + * + * @v dev 802.11 device + * @v start First channel number to add + * @v len Number of channels to add + * @v txpower TX power (dBm) to allow on added channels + * + * To replace the current list of channels instead of adding to it, + * set the nr_channels field of the 802.11 device to 0 before calling + * this function. + */ +static void net80211_add_channels ( struct net80211_device *dev, int start, + int len, int txpower ) +{ + int i, chan = start; + + for ( i = dev->nr_channels; len-- && i < NET80211_MAX_CHANNELS; i++ ) { + dev->channels[i].channel_nr = chan; + dev->channels[i].maxpower = txpower; + dev->channels[i].hw_value = 0; + + if ( chan >= 1 && chan <= 14 ) { + dev->channels[i].band = NET80211_BAND_2GHZ; + if ( chan == 14 ) + dev->channels[i].center_freq = 2484; + else + dev->channels[i].center_freq = 2407 + 5 * chan; + chan++; + } else { + dev->channels[i].band = NET80211_BAND_5GHZ; + dev->channels[i].center_freq = 5000 + 5 * chan; + chan += 4; + } + } + + dev->nr_channels = i; +} + +/** + * Filter 802.11 device channels for hardware capabilities + * + * @v dev 802.11 device + * + * Hardware may support fewer channels than regulatory restrictions + * allow; this function filters out channels in dev->channels that are + * not supported by the hardware list in dev->hwinfo. It also copies + * over the net80211_channel::hw_value and limits maximum TX power + * appropriately. + * + * Channels are matched based on center frequency, ignoring band and + * channel number. + * + * If the driver specifies no supported channels, the effect will be + * as though all were supported. + */ +static void net80211_filter_hw_channels ( struct net80211_device *dev ) +{ + int delta = 0, i = 0; + int old_freq = dev->channels[dev->channel].center_freq; + struct net80211_channel *chan, *hwchan; + + if ( ! dev->hw->nr_channels ) + return; + + dev->channel = 0; + for ( chan = dev->channels; chan < dev->channels + dev->nr_channels; + chan++, i++ ) { + int ok = 0; + for ( hwchan = dev->hw->channels; + hwchan < dev->hw->channels + dev->hw->nr_channels; + hwchan++ ) { + if ( hwchan->center_freq == chan->center_freq ) { + ok = 1; + break; + } + } + + if ( ! ok ) + delta++; + else { + chan->hw_value = hwchan->hw_value; + if ( hwchan->maxpower != 0 && + chan->maxpower > hwchan->maxpower ) + chan->maxpower = hwchan->maxpower; + if ( old_freq == chan->center_freq ) + dev->channel = i - delta; + if ( delta ) + chan[-delta] = *chan; + } + } + + dev->nr_channels -= delta; + + if ( dev->channels[dev->channel].center_freq != old_freq ) + dev->op->config ( dev, NET80211_CFG_CHANNEL ); +} + +/** + * Update 802.11 device state to reflect received capabilities field + * + * @v dev 802.11 device + * @v capab Capabilities field in beacon, probe, or association frame + * @ret rc Return status code + */ +static int net80211_process_capab ( struct net80211_device *dev, + u16 capab ) +{ + u16 old_phy = dev->phy_flags; + + if ( ( capab & ( IEEE80211_CAPAB_MANAGED | IEEE80211_CAPAB_ADHOC ) ) != + IEEE80211_CAPAB_MANAGED ) { + DBGC ( dev, "802.11 %p cannot handle IBSS network\n", dev ); + return -ENOSYS; + } + + if ( capab & IEEE80211_CAPAB_SPECTRUM_MGMT ) { + DBGC ( dev, "802.11 %p cannot handle spectrum managed " + "network\n", dev ); + return -ENOSYS; + } + + dev->phy_flags &= ~( NET80211_PHY_USE_SHORT_PREAMBLE | + NET80211_PHY_USE_SHORT_SLOT ); + + if ( capab & IEEE80211_CAPAB_SHORT_PMBL ) + dev->phy_flags |= NET80211_PHY_USE_SHORT_PREAMBLE; + + if ( capab & IEEE80211_CAPAB_SHORT_SLOT ) + dev->phy_flags |= NET80211_PHY_USE_SHORT_SLOT; + + if ( old_phy != dev->phy_flags ) + dev->op->config ( dev, NET80211_CFG_PHY_PARAMS ); + + return 0; +} + +/** + * Update 802.11 device state to reflect received information elements + * + * @v dev 802.11 device + * @v ie Pointer to first information element + * @v ie_end Pointer to tail of packet I/O buffer + * @ret rc Return status code + */ +static int net80211_process_ie ( struct net80211_device *dev, + union ieee80211_ie *ie, void *ie_end ) +{ + u16 old_rate = dev->rates[dev->rate]; + u16 old_phy = dev->phy_flags; + int have_rates = 0, i; + int ds_channel = 0; + int changed = 0; + int band = dev->channels[dev->channel].band; + + if ( ( void * ) ie >= ie_end ) + return 0; + + for ( ; ie; ie = ieee80211_next_ie ( ie, ie_end ) ) { + switch ( ie->id ) { + case IEEE80211_IE_SSID: + if ( ie->len <= 32 ) { + memcpy ( dev->essid, ie->ssid, ie->len ); + dev->essid[ie->len] = 0; + } + break; + + case IEEE80211_IE_RATES: + case IEEE80211_IE_EXT_RATES: + if ( ! have_rates ) { + dev->nr_rates = 0; + dev->basic_rates = 0; + have_rates = 1; + } + for ( i = 0; i < ie->len && + dev->nr_rates < NET80211_MAX_RATES; i++ ) { + u8 rid = ie->rates[i]; + u16 rate = ( rid & 0x7f ) * 5; + + if ( rid & 0x80 ) + dev->basic_rates |= + ( 1 << dev->nr_rates ); + + dev->rates[dev->nr_rates++] = rate; + } + + break; + + case IEEE80211_IE_DS_PARAM: + if ( dev->channel < dev->nr_channels && ds_channel == + dev->channels[dev->channel].channel_nr ) + break; + ds_channel = ie->ds_param.current_channel; + net80211_change_channel ( dev, ds_channel ); + break; + + case IEEE80211_IE_COUNTRY: + dev->nr_channels = 0; + + DBGC ( dev, "802.11 %p setting country regulations " + "for %c%c\n", dev, ie->country.name[0], + ie->country.name[1] ); + for ( i = 0; i < ( ie->len - 3 ) / 3; i++ ) { + union ieee80211_ie_country_triplet *t = + &ie->country.triplet[i]; + if ( t->first > 200 ) { + DBGC ( dev, "802.11 %p ignoring regulatory " + "extension information\n", dev ); + } else { + net80211_add_channels ( dev, + t->band.first_channel, + t->band.nr_channels, + t->band.max_txpower ); + } + } + net80211_filter_hw_channels ( dev ); + break; + + case IEEE80211_IE_ERP_INFO: + dev->phy_flags &= ~( NET80211_PHY_USE_PROTECTION | + NET80211_PHY_USE_SHORT_PREAMBLE ); + if ( ie->erp_info & IEEE80211_ERP_USE_PROTECTION ) + dev->phy_flags |= NET80211_PHY_USE_PROTECTION; + if ( ! ( ie->erp_info & IEEE80211_ERP_BARKER_LONG ) ) + dev->phy_flags |= NET80211_PHY_USE_SHORT_PREAMBLE; + break; + + case IEEE80211_IE_RSN: + /* XXX need to implement WPA stuff */ + break; + } + } + + if ( have_rates ) { + /* Allow only those rates that are also supported by + the hardware. */ + int delta = 0, j; + + dev->rate = 0; + for ( i = 0; i < dev->nr_rates; i++ ) { + int ok = 0; + for ( j = 0; j < dev->hw->nr_rates[band]; j++ ) { + if ( dev->hw->rates[band][j] == dev->rates[i] ){ + ok = 1; + break; + } + } + + if ( ! ok ) + delta++; + else { + dev->rates[i - delta] = dev->rates[i]; + if ( old_rate == dev->rates[i] ) + dev->rate = i - delta; + } + } + + dev->nr_rates -= delta; + + /* Sort available rates - sorted subclumps tend to already + exist, so insertion sort works well. */ + for ( i = 1; i < dev->nr_rates; i++ ) { + u16 rate = dev->rates[i]; + + for ( j = i - 1; j >= 0 && dev->rates[j] >= rate; j-- ) + dev->rates[j + 1] = dev->rates[j]; + dev->rates[j + 1] = rate; + } + + net80211_set_rtscts_rate ( dev ); + + if ( dev->rates[dev->rate] != old_rate ) + changed |= NET80211_CFG_RATE; + } + + if ( dev->hw->flags & NET80211_HW_NO_SHORT_PREAMBLE ) + dev->phy_flags &= ~NET80211_PHY_USE_SHORT_PREAMBLE; + if ( dev->hw->flags & NET80211_HW_NO_SHORT_SLOT ) + dev->phy_flags &= ~NET80211_PHY_USE_SHORT_SLOT; + + if ( old_phy != dev->phy_flags ) + changed |= NET80211_CFG_PHY_PARAMS; + + if ( changed ) + dev->op->config ( dev, changed ); + + return 0; +} + +/** + * Create information elements for outgoing probe or association packet + * + * @v dev 802.11 device + * @v ie Pointer to start of information element area + * @ret next_ie Pointer to first byte after added information elements + */ +static union ieee80211_ie * +net80211_marshal_request_info ( struct net80211_device *dev, + union ieee80211_ie *ie ) +{ + int i; + + ie->id = IEEE80211_IE_SSID; + ie->len = strlen ( dev->essid ); + memcpy ( ie->ssid, dev->essid, ie->len ); + + ie = ieee80211_next_ie ( ie, NULL ); + + ie->id = IEEE80211_IE_RATES; + ie->len = dev->nr_rates; + for ( i = 0; i < ie->len; i++ ) { + ie->rates[i] = dev->rates[i] / 5; + if ( dev->basic_rates & ( 1 << i ) ) + ie->rates[i] |= 0x80; + } + + if ( ie->len > 8 ) { + /* 802.11 requires we use an Extended Basic Rates IE + for the rates beyond the eighth. */ + int rates = ie->len; + + memmove ( ( void * ) ie + 2 + 8 + 2, ( void * ) ie + 2 + 8, + rates - 8 ); + ie->len = 8; + + ie = ieee80211_next_ie ( ie, NULL ); + + ie->id = IEEE80211_IE_EXT_RATES; + ie->len = rates - 8; + } + + ie = ieee80211_next_ie ( ie, NULL ); + + return ie; +} + +/** Seconds to wait after finding a network, to possibly find better APs for it + * + * This is used when a specific SSID to scan for is specified. + */ +#define NET80211_PROBE_GATHER 1 + +/** Seconds to wait after finding a network, to possibly find other networks + * + * This is used when an empty SSID is specified, to scan for all + * networks. + */ +#define NET80211_PROBE_GATHER_ALL 2 + +/** Seconds to allow a probe to take if no network has been found */ +#define NET80211_PROBE_TIMEOUT 6 + +/** + * Begin probe of 802.11 networks + * + * @v dev 802.11 device + * @v essid SSID to probe for, or "" to accept any (may not be NULL) + * @v active Whether to use active scanning + * @ret ctx Probe context + * + * Active scanning may only be used on channels 1-11 in the 2.4GHz + * band, due to gPXE's lack of a complete regulatory database. If + * active scanning is used, probe packets will be sent on each + * channel; this can allow association with hidden-SSID networks if + * the SSID is properly specified. + * + * A @c NULL return indicates an out-of-memory condition. + * + * The returned context must be periodically passed to + * net80211_probe_step() until that function returns zero. + */ +struct net80211_probe_ctx * net80211_probe_start ( struct net80211_device *dev, + const char *essid, + int active ) +{ + struct net80211_probe_ctx *ctx = zalloc ( sizeof ( *ctx ) ); + + if ( ! ctx ) + return NULL; + + assert ( dev->netdev->state & NETDEV_OPEN ); + + ctx->dev = dev; + ctx->old_keep_mgmt = net80211_keep_mgmt ( dev, 1 ); + ctx->essid = essid; + if ( dev->essid != ctx->essid ) + strcpy ( dev->essid, ctx->essid ); + + if ( active ) { + struct ieee80211_probe_req *probe_req; + union ieee80211_ie *ie; + + ctx->probe = alloc_iob ( 128 ); + iob_reserve ( ctx->probe, IEEE80211_TYP_FRAME_HEADER_LEN ); + probe_req = ctx->probe->data; + + ie = net80211_marshal_request_info ( dev, + probe_req->info_element ); + ie->id = IEEE80211_IE_REQUEST; + ie->len = 3; + ie->request[0] = IEEE80211_IE_COUNTRY; + ie->request[1] = IEEE80211_IE_ERP_INFO; + ie->request[2] = IEEE80211_IE_RSN; + + ie = ieee80211_next_ie ( ie, NULL ); + + iob_put ( ctx->probe, ( void * ) ie - ctx->probe->data ); + } + + ctx->ticks_start = currticks(); + ctx->ticks_beacon = 0; + ctx->ticks_channel = currticks(); + ctx->hop_time = ticks_per_sec() / ( active ? 2 : 6 ); + + /* + * Channels on 2.4GHz overlap, and the most commonly used + * are 1, 6, and 11. We'll get a result faster if we check + * every 5 channels, but in order to hit all of them the + * number of channels must be relatively prime to 5. If it's + * not, tweak the hop. + */ + ctx->hop_step = 5; + while ( dev->nr_channels % ctx->hop_step == 0 && ctx->hop_step > 1 ) + ctx->hop_step--; + + ctx->beacons = malloc ( sizeof ( *ctx->beacons ) ); + INIT_LIST_HEAD ( ctx->beacons ); + + dev->channel = 0; + dev->op->config ( dev, NET80211_CFG_CHANNEL ); + + return ctx; +} + +/** + * Continue probe of 802.11 networks + * + * @v ctx Probe context returned by net80211_probe_start() + * @ret rc Probe status + * + * The return code will be 0 if the probe is still going on (and this + * function should be called again), a positive number if the probe + * completed successfully, or a negative error code if the probe + * failed for that reason. + * + * Whether the probe succeeded or failed, you must call + * net80211_probe_finish_all() or net80211_probe_finish_best() + * (depending on whether you want information on all networks or just + * the best-signal one) in order to release the probe context. A + * failed probe may still have acquired some valid data. + */ +int net80211_probe_step ( struct net80211_probe_ctx *ctx ) +{ + struct net80211_device *dev = ctx->dev; + u32 start_timeout = NET80211_PROBE_TIMEOUT * ticks_per_sec(); + u32 gather_timeout = ticks_per_sec(); + u32 now = currticks(); + struct io_buffer *iob; + int signal; + int rc; + char ssid[IEEE80211_MAX_SSID_LEN + 1]; + + gather_timeout *= ( ctx->essid[0] ? NET80211_PROBE_GATHER : + NET80211_PROBE_GATHER_ALL ); + + /* Time out if necessary */ + if ( now >= ctx->ticks_start + start_timeout ) + return list_empty ( ctx->beacons ) ? -ETIMEDOUT : +1; + + if ( ctx->ticks_beacon > 0 && now >= ctx->ticks_start + gather_timeout ) + return +1; + + /* Change channels if necessary */ + if ( now >= ctx->ticks_channel + ctx->hop_time ) { + dev->channel = ( dev->channel + ctx->hop_step ) + % dev->nr_channels; + dev->op->config ( dev, NET80211_CFG_CHANNEL ); + udelay ( dev->hw->channel_change_time ); + + ctx->ticks_channel = now; + + if ( ctx->probe ) { + struct io_buffer *siob = ctx->probe; /* to send */ + + /* make a copy for future use */ + iob = alloc_iob ( siob->tail - siob->head ); + iob_reserve ( iob, iob_headroom ( siob ) ); + memcpy ( iob_put ( iob, iob_len ( siob ) ), + siob->data, iob_len ( siob ) ); + + ctx->probe = iob; + rc = net80211_tx_mgmt ( dev, IEEE80211_STYPE_PROBE_REQ, + net80211_ll_broadcast, + iob_disown ( siob ) ); + if ( rc ) { + DBGC ( dev, "802.11 %p send probe failed: " + "%s\n", dev, strerror ( rc ) ); + return rc; + } + } + } + + /* Check for new management packets */ + while ( ( iob = net80211_mgmt_dequeue ( dev, &signal ) ) != NULL ) { + struct ieee80211_frame *hdr; + struct ieee80211_beacon *beacon; + union ieee80211_ie *ie; + struct net80211_wlan *wlan; + u16 type; + + hdr = iob->data; + type = hdr->fc & IEEE80211_FC_SUBTYPE; + beacon = ( struct ieee80211_beacon * ) hdr->data; + + if ( type != IEEE80211_STYPE_BEACON && + type != IEEE80211_STYPE_PROBE_RESP ) { + DBGC2 ( dev, "802.11 %p probe: non-beacon\n", dev ); + goto drop; + } + + if ( ( void * ) beacon->info_element >= iob->tail ) { + DBGC ( dev, "802.11 %p probe: beacon with no IEs\n", + dev ); + goto drop; + } + + ie = beacon->info_element; + while ( ie && ie->id != IEEE80211_IE_SSID ) + ie = ieee80211_next_ie ( ie, iob->tail ); + + if ( ! ie ) { + DBGC ( dev, "802.11 %p probe: beacon with no SSID\n", + dev ); + goto drop; + } + + memcpy ( ssid, ie->ssid, ie->len ); + ssid[ie->len] = 0; + + if ( ctx->essid[0] && strcmp ( ctx->essid, ssid ) != 0 ) { + DBGC2 ( dev, "802.11 %p probe: beacon with wrong SSID " + "(%s)\n", dev, ssid ); + goto drop; + } + + /* See if we've got an entry for this network */ + list_for_each_entry ( wlan, ctx->beacons, list ) { + if ( strcmp ( wlan->essid, ssid ) != 0 ) + continue; + + if ( signal < wlan->signal ) { + DBGC2 ( dev, "802.11 %p probe: beacon for %s " + "(%s) with weaker signal %d\n", dev, + ssid, eth_ntoa ( hdr->addr3 ), signal ); + goto drop; + } + + goto fill; + } + + /* No entry yet - make one */ + wlan = zalloc ( sizeof ( *wlan ) ); + strcpy ( wlan->essid, ssid ); + list_add_tail ( &wlan->list, ctx->beacons ); + + /* Whether we're using an old entry or a new one, fill + it with new data. */ + fill: + memcpy ( wlan->bssid, hdr->addr3, ETH_ALEN ); + wlan->signal = signal; + wlan->channel = dev->channels[dev->channel].channel_nr; + + /* Copy this I/O buffer into a new wlan->beacon; the + * iob we've got probably came from the device driver + * and may have the full 2.4k allocation, which we + * don't want to keep around wasting memory. + */ + free_iob ( wlan->beacon ); + wlan->beacon = alloc_iob ( iob_len ( iob ) ); + memcpy ( iob_put ( wlan->beacon, iob_len ( iob ) ), + iob->data, iob_len ( iob ) ); + + /* XXX actually check capab and RSN ie to + figure this out */ + wlan->handshaking = NET80211_SECPROT_NONE; + wlan->crypto = NET80211_CRYPT_NONE; + + ctx->ticks_beacon = now; + + DBGC2 ( dev, "802.11 %p probe: good beacon for %s (%s)\n", + dev, wlan->essid, eth_ntoa ( wlan->bssid ) ); + + drop: + free_iob ( iob ); + } + + return 0; +} + + +/** + * Finish probe of 802.11 networks, returning best-signal network found + * + * @v ctx Probe context + * @ret wlan Best-signal network found, or @c NULL if none were found + * + * If net80211_probe_start() was called with a particular SSID + * parameter as filter, only a network with that SSID (matching + * case-sensitively) can be returned from this function. + */ +struct net80211_wlan * +net80211_probe_finish_best ( struct net80211_probe_ctx *ctx ) +{ + struct net80211_wlan *best = NULL, *wlan; + + if ( ! ctx ) + return NULL; + + list_for_each_entry ( wlan, ctx->beacons, list ) { + if ( ! best || best->signal < wlan->signal ) + best = wlan; + } + + if ( best ) + list_del ( &best->list ); + else + DBGC ( ctx->dev, "802.11 %p probe: found nothing for '%s'\n", + ctx->dev, ctx->essid ); + + net80211_free_wlanlist ( ctx->beacons ); + + net80211_keep_mgmt ( ctx->dev, ctx->old_keep_mgmt ); + + if ( ctx->probe ) + free_iob ( ctx->probe ); + + free ( ctx ); + + return best; +} + + +/** + * Finish probe of 802.11 networks, returning all networks found + * + * @v ctx Probe context + * @ret list List of net80211_wlan detailing networks found + * + * If net80211_probe_start() was called with a particular SSID + * parameter as filter, this will always return either an empty or a + * one-element list. + */ +struct list_head *net80211_probe_finish_all ( struct net80211_probe_ctx *ctx ) +{ + struct list_head *beacons = ctx->beacons; + + if ( ! ctx ) + return NULL; + + net80211_keep_mgmt ( ctx->dev, ctx->old_keep_mgmt ); + + if ( ctx->probe ) + free_iob ( ctx->probe ); + + free ( ctx ); + + return beacons; +} + + +/** + * Free WLAN structure + * + * @v wlan WLAN structure to free + */ +void net80211_free_wlan ( struct net80211_wlan *wlan ) +{ + if ( wlan ) { + free_iob ( wlan->beacon ); + free ( wlan ); + } +} + + +/** + * Free list of WLAN structures + * + * @v list List of WLAN structures to free + */ +void net80211_free_wlanlist ( struct list_head *list ) +{ + struct net80211_wlan *wlan, *tmp; + + if ( ! list ) + return; + + list_for_each_entry_safe ( wlan, tmp, list, list ) { + list_del ( &wlan->list ); + net80211_free_wlan ( wlan ); + } + + free ( list ); +} + + +/** Number of ticks to wait for replies to association management frames */ +#define ASSOC_TIMEOUT TICKS_PER_SEC + +/** Number of times to try sending a particular association management frame */ +#define ASSOC_RETRIES 2 + +/** + * Step 802.11 association process + * + * @v proc Association process + */ +static void net80211_step_associate ( struct process *proc ) +{ + struct net80211_device *dev = + container_of ( proc, struct net80211_device, proc_assoc ); + int rc = 0; + int status = dev->state & NET80211_STATUS_MASK; + + /* + * We use a sort of state machine implemented using bits in + * the dev->state variable. At each call, we take the + * logically first step that has not yet succeeded; either it + * has not been tried yet, it's being retried, or it failed. + * If it failed, we return an error indication; otherwise we + * perform the step. If it succeeds, RX handling code will set + * the appropriate status bit for us. + * + * Probe works a bit differently, since we have to step it + * on every call instead of waiting for a packet to arrive + * that will set the completion bit for us. + */ + + /* If we're waiting for a reply, check for timeout condition */ + if ( dev->state & NET80211_WAITING ) { + /* Sanity check */ + if ( ! dev->associating ) + return; + + if ( currticks() - dev->ctx.assoc->last_packet > ASSOC_TIMEOUT ) { + /* Timed out - fail if too many retries, or retry */ + dev->ctx.assoc->times_tried++; + if ( ++dev->ctx.assoc->times_tried > ASSOC_RETRIES ) { + rc = -ETIMEDOUT; + goto fail; + } + } else { + /* Didn't time out - let it keep going */ + return; + } + } else { + if ( dev->state & NET80211_PROBED ) + dev->ctx.assoc->times_tried = 0; + } + + if ( ! ( dev->state & NET80211_PROBED ) ) { + /* state: probe */ + + if ( ! dev->ctx.probe ) { + /* start probe */ + int active = fetch_intz_setting ( NULL, + &net80211_active_setting ); + int band = dev->hw->bands; + + if ( active ) + band &= ~NET80211_BAND_BIT_5GHZ; + + rc = net80211_prepare_probe ( dev, band, active ); + if ( rc ) + goto fail; + + dev->ctx.probe = net80211_probe_start ( dev, dev->essid, + active ); + if ( ! dev->ctx.probe ) { + dev->assoc_rc = -ENOMEM; + goto fail; + } + } + + rc = net80211_probe_step ( dev->ctx.probe ); + if ( ! rc ) { + return; /* still going */ + } + + dev->associating = net80211_probe_finish_best ( dev->ctx.probe ); + dev->ctx.probe = NULL; + if ( ! dev->associating ) { + if ( rc > 0 ) /* "successful" probe found nothing */ + rc = -ETIMEDOUT; + goto fail; + } + + /* If we probed using a broadcast SSID, record that + fact for the settings applicator before we clobber + it with the specific SSID we've chosen. */ + if ( ! dev->essid[0] ) + dev->state |= NET80211_AUTO_SSID; + + DBGC ( dev, "802.11 %p found network %s (%s)\n", dev, + dev->associating->essid, + eth_ntoa ( dev->associating->bssid ) ); + + dev->ctx.assoc = zalloc ( sizeof ( *dev->ctx.assoc ) ); + if ( ! dev->ctx.assoc ) { + rc = -ENOMEM; + goto fail; + } + + dev->state |= NET80211_PROBED; + dev->ctx.assoc->method = IEEE80211_AUTH_OPEN_SYSTEM; + + return; + } + + /* Record time of sending the packet we're about to send, for timeout */ + dev->ctx.assoc->last_packet = currticks(); + + if ( ! ( dev->state & NET80211_AUTHENTICATED ) ) { + /* state: prepare and authenticate */ + + if ( status != IEEE80211_STATUS_SUCCESS ) { + /* we tried authenticating already, but failed */ + int method = dev->ctx.assoc->method; + + if ( method == IEEE80211_AUTH_OPEN_SYSTEM && + ( status == IEEE80211_STATUS_AUTH_CHALL_INVALID || + status == IEEE80211_STATUS_AUTH_ALGO_UNSUPP ) ) { + /* Maybe this network uses Shared Key? */ + dev->ctx.assoc->method = + IEEE80211_AUTH_SHARED_KEY; + } else { + goto fail; + } + } + + DBGC ( dev, "802.11 %p authenticating with method %d\n", dev, + dev->ctx.assoc->method ); + + rc = net80211_prepare_assoc ( dev, dev->associating ); + if ( rc ) + goto fail; + + rc = net80211_send_auth ( dev, dev->associating, + dev->ctx.assoc->method ); + if ( rc ) + goto fail; + + return; + } + + if ( ! ( dev->state & NET80211_ASSOCIATED ) ) { + /* state: associate */ + + if ( status != IEEE80211_STATUS_SUCCESS ) + goto fail; + + DBGC ( dev, "802.11 %p associating\n", dev ); + + rc = net80211_send_assoc ( dev, dev->associating ); + if ( rc ) + goto fail; + + return; + } + + if ( ! ( dev->state & NET80211_CRYPTO_SYNCED ) ) { + /* state: crypto sync */ + DBGC ( dev, "802.11 %p security handshaking\n", dev ); + + dev->state |= NET80211_CRYPTO_SYNCED; + /* XXX need to actually do something here once we + support WPA */ + return; + } + + /* state: done! */ + netdev_link_up ( dev->netdev ); + dev->assoc_rc = 0; + dev->state &= ~NET80211_WORKING; + + free ( dev->ctx.assoc ); + dev->ctx.assoc = NULL; + + net80211_free_wlan ( dev->associating ); + dev->associating = NULL; + + dev->rctl = rc80211_init ( dev ); + + process_del ( proc ); + + DBGC ( dev, "802.11 %p associated with %s (%s)\n", dev, + dev->essid, eth_ntoa ( dev->bssid ) ); + + return; + + fail: + dev->state &= ~( NET80211_WORKING | NET80211_WAITING ); + if ( rc ) + dev->assoc_rc = rc; + + netdev_link_err ( dev->netdev, dev->assoc_rc ); + + /* We never reach here from the middle of a probe, so we don't + need to worry about freeing dev->ctx.probe. */ + + if ( dev->state & NET80211_PROBED ) { + free ( dev->ctx.assoc ); + dev->ctx.assoc = NULL; + } + + net80211_free_wlan ( dev->associating ); + dev->associating = NULL; + + process_del ( proc ); + + DBGC ( dev, "802.11 %p association failed (state=%04x): " + "%s\n", dev, dev->state, strerror ( dev->assoc_rc ) ); + + /* Try it again: */ + net80211_autoassociate ( dev ); +} + +/** + * Check for 802.11 SSID updates + * + * This acts as a settings applicator; if the user changes netX/ssid, + * and netX is currently open, the association task will be invoked + * again. + */ +static int net80211_check_ssid_update ( void ) +{ + struct net80211_device *dev; + char ssid[IEEE80211_MAX_SSID_LEN + 1]; + + list_for_each_entry ( dev, &net80211_devices, list ) { + if ( ! ( dev->netdev->state & NETDEV_OPEN ) ) + continue; + + fetch_string_setting ( netdev_settings ( dev->netdev ), + &net80211_ssid_setting, ssid, + IEEE80211_MAX_SSID_LEN + 1 ); + + if ( strcmp ( ssid, dev->essid ) != 0 && + ! ( ! ssid[0] && ( dev->state & NET80211_AUTO_SSID ) ) ) { + DBGC ( dev, "802.11 %p updating association: " + "%s -> %s\n", dev, dev->essid, ssid ); + net80211_autoassociate ( dev ); + } + } + + return 0; +} + +/** + * Start 802.11 association process + * + * @v dev 802.11 device + * + * If the association process is running, it will be restarted. + */ +void net80211_autoassociate ( struct net80211_device *dev ) +{ + if ( ! ( dev->state & NET80211_WORKING ) ) { + DBGC2 ( dev, "802.11 %p spawning association process\n", dev ); + process_add ( &dev->proc_assoc ); + } + + /* Clean up everything an earlier association process might + have been in the middle of using */ + if ( dev->associating ) + net80211_free_wlan ( dev->associating ); + + if ( ! ( dev->state & NET80211_PROBED ) ) + net80211_free_wlan ( + net80211_probe_finish_best ( dev->ctx.probe ) ); + else + free ( dev->ctx.assoc ); + + /* Reset to a clean state */ + fetch_string_setting ( netdev_settings ( dev->netdev ), + &net80211_ssid_setting, dev->essid, + IEEE80211_MAX_SSID_LEN + 1 ); + dev->ctx.probe = NULL; + dev->associating = NULL; + net80211_set_state ( dev, NET80211_PROBED, NET80211_WORKING, 0 ); +} + +/** + * Pick TX rate for RTS/CTS packets based on data rate + * + * @v dev 802.11 device + * + * The RTS/CTS rate is the fastest TX rate marked as "basic" that is + * not faster than the data rate. + */ +static void net80211_set_rtscts_rate ( struct net80211_device *dev ) +{ + u16 datarate = dev->rates[dev->rate]; + u16 rtsrate = 0; + int rts_idx = -1; + int i; + + for ( i = 0; i < dev->nr_rates; i++ ) { + u16 rate = dev->rates[i]; + + if ( ! ( dev->basic_rates & ( 1 << i ) ) || rate > datarate ) + continue; + + if ( rate > rtsrate ) { + rtsrate = rate; + rts_idx = i; + } + } + + /* If this is in initialization, we might not have any basic + rates; just use the first data rate in that case. */ + if ( rts_idx < 0 ) + rts_idx = 0; + + dev->rtscts_rate = rts_idx; +} + +/** + * Set data transmission rate for 802.11 device + * + * @v dev 802.11 device + * @v rate Rate to set, as index into @c dev->rates array + */ +void net80211_set_rate_idx ( struct net80211_device *dev, int rate ) +{ + assert ( dev->netdev->state & NETDEV_OPEN ); + + if ( rate >= 0 && rate < dev->nr_rates && rate != dev->rate ) { + DBGC2 ( dev, "802.11 %p changing rate from %d->%d Mbps\n", + dev, dev->rates[dev->rate] / 10, + dev->rates[rate] / 10 ); + + dev->rate = rate; + net80211_set_rtscts_rate ( dev ); + dev->op->config ( dev, NET80211_CFG_RATE ); + } +} + +/** + * Configure 802.11 device to transmit on a certain channel + * + * @v dev 802.11 device + * @v channel Channel number (1-11 for 2.4GHz) to transmit on + */ +int net80211_change_channel ( struct net80211_device *dev, int channel ) +{ + int i, oldchan = dev->channel; + + assert ( dev->netdev->state & NETDEV_OPEN ); + + for ( i = 0; i < dev->nr_channels; i++ ) { + if ( dev->channels[i].channel_nr == channel ) { + dev->channel = i; + break; + } + } + + if ( i == dev->nr_channels ) + return -ENOENT; + + if ( i != oldchan ) + return dev->op->config ( dev, NET80211_CFG_CHANNEL ); + + return 0; +} + +/** + * Prepare 802.11 device channel and rate set for scanning + * + * @v dev 802.11 device + * @v band RF band(s) on which to prepare for scanning + * @v active Whether the scanning will be active + * @ret rc Return status code + */ +int net80211_prepare_probe ( struct net80211_device *dev, int band, + int active ) +{ + assert ( dev->netdev->state & NETDEV_OPEN ); + + if ( active && ( band & NET80211_BAND_BIT_5GHZ ) ) { + DBGC ( dev, "802.11 %p cannot perform active scanning on " + "5GHz band\n", dev ); + return -EINVAL_ACTIVE_SCAN; + } + + if ( band == 0 ) { + /* This can happen for a 5GHz-only card with 5GHz + scanning masked out by an active request. */ + DBGC ( dev, "802.11 %p asked to prepare for scanning nothing\n", + dev ); + return -EINVAL_ACTIVE_SCAN; + } + + dev->nr_channels = 0; + + if ( active ) + net80211_add_channels ( dev, 1, 11, NET80211_REG_TXPOWER ); + else { + if ( band & NET80211_BAND_BIT_2GHZ ) + net80211_add_channels ( dev, 1, 14, + NET80211_REG_TXPOWER ); + if ( band & NET80211_BAND_BIT_5GHZ ) + net80211_add_channels ( dev, 36, 8, + NET80211_REG_TXPOWER ); + } + + net80211_filter_hw_channels ( dev ); + + /* Use channel 1 for now */ + dev->channel = 0; + dev->op->config ( dev, NET80211_CFG_CHANNEL ); + + /* Always do active probes at lowest (presumably first) speed */ + dev->rate = 0; + dev->nr_rates = 1; + dev->rates[0] = dev->hw->rates[dev->channels[0].band][0]; + dev->op->config ( dev, NET80211_CFG_RATE ); + + return 0; +} + +/** + * Prepare 802.11 device channel and rate set for communication + * + * @v dev 802.11 device + * @v wlan WLAN to prepare for communication with + * @ret rc Return status code + */ +int net80211_prepare_assoc ( struct net80211_device *dev, + struct net80211_wlan *wlan ) +{ + struct ieee80211_frame *hdr = wlan->beacon->data; + struct ieee80211_beacon *beacon = + ( struct ieee80211_beacon * ) hdr->data; + int rc; + + assert ( dev->netdev->state & NETDEV_OPEN ); + + net80211_set_state ( dev, NET80211_ASSOCIATED, 0, 0 ); + memcpy ( dev->bssid, wlan->bssid, ETH_ALEN ); + strcpy ( dev->essid, wlan->essid ); + + dev->last_beacon_timestamp = beacon->timestamp; + dev->tx_beacon_interval = 1024 * beacon->beacon_interval; + + /* XXX do crypto setup here */ + + /* Barring an IE that tells us the channel outright, assume + the channel we heard this AP best on is the channel it's + communicating on. */ + net80211_change_channel ( dev, wlan->channel ); + + rc = net80211_process_capab ( dev, beacon->capability ); + if ( rc ) + return rc; + + rc = net80211_process_ie ( dev, beacon->info_element, + wlan->beacon->tail ); + if ( rc ) + return rc; + + /* Associate at the lowest rate so we know it'll get through */ + dev->rate = 0; + dev->op->config ( dev, NET80211_CFG_RATE ); + + return 0; +} + +/** + * Send 802.11 initial authentication frame + * + * @v dev 802.11 device + * @v wlan WLAN to authenticate with + * @v method Authentication method + * @ret rc Return status code + * + * @a method may be 0 for Open System authentication or 1 for Shared + * Key authentication. Open System provides no security in association + * whatsoever, relying on encryption for confidentiality, but Shared + * Key actively introduces security problems and is very rarely used. + */ +int net80211_send_auth ( struct net80211_device *dev, + struct net80211_wlan *wlan, int method ) +{ + struct io_buffer *iob = alloc_iob ( 64 ); + struct ieee80211_auth *auth; + + net80211_set_state ( dev, 0, NET80211_WAITING, 0 ); + iob_reserve ( iob, IEEE80211_TYP_FRAME_HEADER_LEN ); + auth = iob_put ( iob, sizeof ( *auth ) ); + auth->algorithm = method; + auth->tx_seq = 1; + auth->status = 0; + + return net80211_tx_mgmt ( dev, IEEE80211_STYPE_AUTH, wlan->bssid, iob ); +} + +/** + * Handle receipt of 802.11 authentication frame + * + * @v dev 802.11 device + * @v iob I/O buffer + * + * If the authentication method being used is Shared Key, and the + * frame that was received included challenge text, the frame is + * encrypted using the cryptographic algorithm currently in effect and + * sent back to the AP to complete the authentication. + */ +static void net80211_handle_auth ( struct net80211_device *dev, + struct io_buffer *iob ) +{ + struct ieee80211_frame *hdr = iob->data; + struct ieee80211_auth *auth = + ( struct ieee80211_auth * ) hdr->data; + + if ( auth->tx_seq & 1 ) { + DBGC ( dev, "802.11 %p authentication received improperly " + "directed frame (seq. %d)\n", dev, auth->tx_seq ); + net80211_set_state ( dev, NET80211_WAITING, 0, + IEEE80211_STATUS_FAILURE ); + return; + } + + if ( auth->status != IEEE80211_STATUS_SUCCESS ) { + DBGC ( dev, "802.11 %p authentication failed: status %d\n", + dev, auth->status ); + net80211_set_state ( dev, NET80211_WAITING, 0, + auth->status ); + return; + } + + if ( auth->algorithm == IEEE80211_AUTH_SHARED_KEY && ! dev->crypto ) { + DBGC ( dev, "802.11 %p can't perform shared-key authentication " + "without a cryptosystem\n", dev ); + net80211_set_state ( dev, NET80211_WAITING, 0, + IEEE80211_STATUS_FAILURE ); + return; + } + + if ( auth->algorithm == IEEE80211_AUTH_SHARED_KEY && + auth->tx_seq == 2 ) { + /* Since the iob we got is going to be freed as soon + as we return, we can do some in-place + modification. */ + auth->tx_seq = 3; + auth->status = 0; + + memcpy ( hdr->addr2, hdr->addr1, ETH_ALEN ); + memcpy ( hdr->addr1, hdr->addr3, ETH_ALEN ); + + netdev_tx ( dev->netdev, + dev->crypto->encrypt ( dev->crypto, iob ) ); + return; + } + + net80211_set_state ( dev, NET80211_WAITING, NET80211_AUTHENTICATED, + IEEE80211_STATUS_SUCCESS ); + + return; +} + +/** + * Send 802.11 association frame + * + * @v dev 802.11 device + * @v wlan WLAN to associate with + * @ret rc Return status code + */ +int net80211_send_assoc ( struct net80211_device *dev, + struct net80211_wlan *wlan ) +{ + struct io_buffer *iob = alloc_iob ( 128 ); + struct ieee80211_assoc_req *assoc; + union ieee80211_ie *ie; + + net80211_set_state ( dev, 0, NET80211_WAITING, 0 ); + + iob_reserve ( iob, IEEE80211_TYP_FRAME_HEADER_LEN ); + assoc = iob->data; + + assoc->capability = IEEE80211_CAPAB_MANAGED; + if ( ! ( dev->hw->flags & NET80211_HW_NO_SHORT_PREAMBLE ) ) + assoc->capability |= IEEE80211_CAPAB_SHORT_PMBL; + if ( ! ( dev->hw->flags & NET80211_HW_NO_SHORT_SLOT ) ) + assoc->capability |= IEEE80211_CAPAB_SHORT_SLOT; + if ( wlan->crypto ) + assoc->capability |= IEEE80211_CAPAB_PRIVACY; + + assoc->listen_interval = 1; + + ie = net80211_marshal_request_info ( dev, assoc->info_element ); + + DBGP ( "802.11 %p about to send association request:\n", dev ); + DBGP_HD ( iob->data, ( void * ) ie - iob->data ); + + /* XXX add RSN ie for WPA support */ + + iob_put ( iob, ( void * ) ie - iob->data ); + + return net80211_tx_mgmt ( dev, IEEE80211_STYPE_ASSOC_REQ, + wlan->bssid, iob ); +} + +/** + * Handle receipt of 802.11 association reply frame + * + * @v dev 802.11 device + * @v iob I/O buffer + */ +static void net80211_handle_assoc_reply ( struct net80211_device *dev, + struct io_buffer *iob ) +{ + struct ieee80211_frame *hdr = iob->data; + struct ieee80211_assoc_resp *assoc = + ( struct ieee80211_assoc_resp * ) hdr->data; + + net80211_process_capab ( dev, assoc->capability ); + net80211_process_ie ( dev, assoc->info_element, iob->tail ); + + if ( assoc->status != IEEE80211_STATUS_SUCCESS ) { + DBGC ( dev, "802.11 %p association failed: status %d\n", + dev, assoc->status ); + net80211_set_state ( dev, NET80211_WAITING, 0, + assoc->status ); + return; + } + + /* ESSID was filled before the association request was sent */ + memcpy ( dev->bssid, hdr->addr3, ETH_ALEN ); + dev->aid = assoc->aid; + + net80211_set_state ( dev, NET80211_WAITING, NET80211_ASSOCIATED, + IEEE80211_STATUS_SUCCESS ); +} + + +/** + * Send 802.11 disassociation frame + * + * @v dev 802.11 device + * @v reason Reason for disassociation + * @ret rc Return status code + */ +static int net80211_send_disassoc ( struct net80211_device *dev, int reason ) +{ + struct io_buffer *iob = alloc_iob ( 64 ); + struct ieee80211_disassoc *disassoc; + + if ( ! ( dev->state & NET80211_ASSOCIATED ) ) + return -EINVAL; + + net80211_set_state ( dev, NET80211_ASSOCIATED, 0, 0 ); + iob_reserve ( iob, IEEE80211_TYP_FRAME_HEADER_LEN ); + disassoc = iob_put ( iob, sizeof ( *disassoc ) ); + disassoc->reason = reason; + + return net80211_tx_mgmt ( dev, IEEE80211_STYPE_DISASSOC, dev->bssid, + iob ); +} + + +/** Smoothing factor (1-7) for link quality calculation */ +#define LQ_SMOOTH 7 + +/** + * Update link quality information based on received beacon + * + * @v dev 802.11 device + * @v iob I/O buffer containing beacon + * @ret rc Return status code + */ +static void net80211_update_link_quality ( struct net80211_device *dev, + struct io_buffer *iob ) +{ + struct ieee80211_frame *hdr = iob->data; + struct ieee80211_beacon *beacon; + u32 dt, rxi; + + if ( ! ( dev->state & NET80211_ASSOCIATED ) ) + return; + + beacon = ( struct ieee80211_beacon * ) hdr->data; + dt = ( u32 ) ( beacon->timestamp - dev->last_beacon_timestamp ); + rxi = dev->rx_beacon_interval; + + rxi = ( LQ_SMOOTH * rxi ) + ( ( 8 - LQ_SMOOTH ) * dt ); + dev->rx_beacon_interval = rxi >> 3; + + dev->last_beacon_timestamp = beacon->timestamp; +} + + +/** + * Handle receipt of 802.11 management frame + * + * @v dev 802.11 device + * @v iob I/O buffer + * @v signal Signal strength of received frame + */ +static void net80211_handle_mgmt ( struct net80211_device *dev, + struct io_buffer *iob, int signal ) +{ + struct ieee80211_frame *hdr = iob->data; + struct ieee80211_disassoc *disassoc; + u16 stype = hdr->fc & IEEE80211_FC_SUBTYPE; + int keep = 0; + int is_deauth = ( stype == IEEE80211_STYPE_DEAUTH ); + + if ( ( hdr->fc & IEEE80211_FC_TYPE ) != IEEE80211_TYPE_MGMT ) { + free_iob ( iob ); + return; /* only handle management frames */ + } + + switch ( stype ) { + /* We reconnect on deauthentication and disassociation. */ + case IEEE80211_STYPE_DEAUTH: + case IEEE80211_STYPE_DISASSOC: + disassoc = ( struct ieee80211_disassoc * ) hdr->data; + net80211_set_state ( dev, is_deauth ? NET80211_AUTHENTICATED : + NET80211_ASSOCIATED, 0, + NET80211_IS_REASON | disassoc->reason ); + DBGC ( dev, "802.11 %p %s: reason %d\n", + dev, is_deauth ? "deauthenticated" : "disassociated", + disassoc->reason ); + + /* Try to reassociate, in case it's transient. */ + net80211_autoassociate ( dev ); + + break; + + /* We handle authentication and association. */ + case IEEE80211_STYPE_AUTH: + if ( ! ( dev->state & NET80211_AUTHENTICATED ) ) + net80211_handle_auth ( dev, iob ); + break; + + case IEEE80211_STYPE_ASSOC_RESP: + case IEEE80211_STYPE_REASSOC_RESP: + if ( ! ( dev->state & NET80211_ASSOCIATED ) ) + net80211_handle_assoc_reply ( dev, iob ); + break; + + /* We pass probes and beacons onto network scanning + code. Pass actions for future extensibility. */ + case IEEE80211_STYPE_BEACON: + net80211_update_link_quality ( dev, iob ); + /* fall through */ + case IEEE80211_STYPE_PROBE_RESP: + case IEEE80211_STYPE_ACTION: + if ( dev->keep_mgmt ) { + struct net80211_rx_info *rxinf; + rxinf = zalloc ( sizeof ( *rxinf ) ); + if ( ! rxinf ) { + DBGC ( dev, "802.11 %p out of memory\n", dev ); + break; + } + rxinf->signal = signal; + list_add_tail ( &iob->list, &dev->mgmt_queue ); + list_add_tail ( &rxinf->list, &dev->mgmt_info_queue ); + keep = 1; + } + break; + + case IEEE80211_STYPE_PROBE_REQ: + /* Some nodes send these broadcast. Ignore them. */ + break; + + case IEEE80211_STYPE_ASSOC_REQ: + case IEEE80211_STYPE_REASSOC_REQ: + /* We should never receive these, only send them. */ + DBGC ( dev, "802.11 %p received strange management request " + "(%04x)\n", dev, stype ); + break; + + default: + DBGC ( dev, "802.11 %p received unimplemented management " + "packet (%04x)\n", dev, stype ); + break; + } + + if ( ! keep ) + free_iob ( iob ); +} + +/* ---------- Packet handling functions ---------- */ + +/** + * Free buffers used by 802.11 fragment cache entry + * + * @v dev 802.11 device + * @v fcid Fragment cache entry index + * + * After this function, the referenced entry will be marked unused. + */ +static void net80211_free_frags ( struct net80211_device *dev, int fcid ) +{ + int j; + struct net80211_frag_cache *frag = &dev->frags[fcid]; + + for ( j = 0; j < 16; j++ ) { + if ( frag->iob[j] ) { + free_iob ( frag->iob[j] ); + frag->iob[j] = NULL; + } + } + + frag->seqnr = 0; + frag->start_ticks = 0; + frag->in_use = 0; +} + +/** + * Accumulate 802.11 fragments into one I/O buffer + * + * @v dev 802.11 device + * @v fcid Fragment cache entry index + * @v nfrags Number of fragments received + * @v size Sum of sizes of all fragments, including headers + * @ret iob I/O buffer containing reassembled packet + * + * This function does not free the fragment buffers. + */ +static struct io_buffer *net80211_accum_frags ( struct net80211_device *dev, + int fcid, int nfrags, int size ) +{ + struct net80211_frag_cache *frag = &dev->frags[fcid]; + int hdrsize = IEEE80211_TYP_FRAME_HEADER_LEN; + int nsize = size - hdrsize * ( nfrags - 1 ); + int i; + + struct io_buffer *niob = alloc_iob ( nsize ); + struct ieee80211_frame *hdr; + + /* Add the header from the first one... */ + memcpy ( iob_put ( niob, hdrsize ), frag->iob[0]->data, hdrsize ); + + /* ... and all the data from all of them. */ + for ( i = 0; i < nfrags; i++ ) { + int len = iob_len ( frag->iob[i] ) - hdrsize; + memcpy ( iob_put ( niob, len ), + frag->iob[i]->data + hdrsize, len ); + } + + /* Turn off the fragment bit. */ + hdr = niob->data; + hdr->fc &= ~IEEE80211_FC_MORE_FRAG; + + return niob; +} + +/** + * Handle receipt of 802.11 fragment + * + * @v dev 802.11 device + * @v iob I/O buffer containing fragment + * @v signal Signal strength with which fragment was received + */ +static void net80211_rx_frag ( struct net80211_device *dev, + struct io_buffer *iob, int signal ) +{ + struct ieee80211_frame *hdr = iob->data; + int fragnr = IEEE80211_FRAG ( hdr->seq ); + + if ( fragnr == 0 && ( hdr->fc & IEEE80211_FC_MORE_FRAG ) ) { + /* start a frag cache entry */ + int i, newest = -1; + u32 curr_ticks = currticks(), newest_ticks = 0; + u32 timeout = ticks_per_sec() * NET80211_FRAG_TIMEOUT; + + for ( i = 0; i < NET80211_NR_CONCURRENT_FRAGS; i++ ) { + if ( dev->frags[i].in_use == 0 ) + break; + + if ( dev->frags[i].start_ticks + timeout >= + curr_ticks ) { + net80211_free_frags ( dev, i ); + break; + } + + if ( dev->frags[i].start_ticks > newest_ticks ) { + newest = i; + newest_ticks = dev->frags[i].start_ticks; + } + } + + /* If we're being sent more concurrent fragmented + packets than we can handle, drop the newest so the + older ones have time to complete. */ + if ( i == NET80211_NR_CONCURRENT_FRAGS ) { + i = newest; + net80211_free_frags ( dev, i ); + } + + dev->frags[i].in_use = 1; + dev->frags[i].seqnr = IEEE80211_SEQNR ( hdr->seq ); + dev->frags[i].start_ticks = currticks(); + dev->frags[i].iob[0] = iob; + return; + } else { + int i; + for ( i = 0; i < NET80211_NR_CONCURRENT_FRAGS; i++ ) { + if ( dev->frags[i].in_use && dev->frags[i].seqnr == + IEEE80211_SEQNR ( hdr->seq ) ) + break; + } + if ( i == NET80211_NR_CONCURRENT_FRAGS ) { + /* Drop non-first not-in-cache fragments */ + DBGC ( dev, "802.11 %p dropped fragment fc=%04x " + "seq=%04x\n", dev, hdr->fc, hdr->seq ); + free_iob ( iob ); + return; + } + + dev->frags[i].iob[fragnr] = iob; + + if ( ! ( hdr->fc & IEEE80211_FC_MORE_FRAG ) ) { + int j, size = 0; + for ( j = 0; j < fragnr; j++ ) { + size += iob_len ( dev->frags[i].iob[j] ); + if ( dev->frags[i].iob[j] == NULL ) + break; + } + if ( j == fragnr ) { + /* We've got everything */ + struct io_buffer *niob = + net80211_accum_frags ( dev, i, fragnr, + size ); + net80211_free_frags ( dev, i ); + net80211_rx ( dev, niob, signal, 0 ); + } else { + DBGC ( dev, "802.11 %p dropping fragmented " + "packet due to out-of-order arrival, " + "fc=%04x seq=%04x\n", dev, hdr->fc, + hdr->seq ); + net80211_free_frags ( dev, i ); + } + } + } +} + +/** + * Handle receipt of 802.11 frame + * + * @v dev 802.11 device + * @v iob I/O buffer + * @v signal Received signal strength + * @v rate Bitrate at which frame was received, in 100 kbps units + * + * If the rate or signal is unknown, 0 should be passed. + */ +void net80211_rx ( struct net80211_device *dev, struct io_buffer *iob, + int signal, u16 rate ) +{ + struct ieee80211_frame *hdr = iob->data; + u16 type = hdr->fc & IEEE80211_FC_TYPE; + if ( ( hdr->fc & IEEE80211_FC_VERSION ) != IEEE80211_THIS_VERSION ) + goto drop; /* drop invalid-version packets */ + + if ( type == IEEE80211_TYPE_CTRL ) + goto drop; /* we don't handle control packets, + the hardware does */ + + if ( dev->last_rx_seq == hdr->seq ) + goto drop; /* avoid duplicate packet */ + dev->last_rx_seq = hdr->seq; + + if ( dev->hw->flags & NET80211_HW_RX_HAS_FCS ) { + /* discard the FCS */ + iob_unput ( iob, 4 ); + } + + if ( hdr->fc & IEEE80211_FC_PROTECTED ) { + struct io_buffer *niob; + if ( ! dev->crypto ) + goto drop; /* can't decrypt packets on an open network */ + + niob = dev->crypto->decrypt ( dev->crypto, iob ); + if ( ! niob ) + goto drop; /* drop failed decryption */ + free_iob ( iob ); + iob = niob; + } + + dev->last_signal = signal; + + /* Fragments go into the frag cache or get dropped. */ + if ( IEEE80211_FRAG ( hdr->seq ) != 0 + || ( hdr->fc & IEEE80211_FC_MORE_FRAG ) ) { + net80211_rx_frag ( dev, iob, signal ); + return; + } + + /* Management frames get handled, enqueued, or dropped. */ + if ( type == IEEE80211_TYPE_MGMT ) { + net80211_handle_mgmt ( dev, iob, signal ); + return; + } + + /* Data frames get dropped or sent to the net_device. */ + if ( ( hdr->fc & IEEE80211_FC_SUBTYPE ) != IEEE80211_STYPE_DATA ) + goto drop; /* drop QoS, CFP, or null data packets */ + + /* Update rate-control algorithm */ + if ( dev->rctl ) + rc80211_update_rx ( dev, hdr->fc & IEEE80211_FC_RETRY, rate ); + + /* Pass packet onward */ + if ( netdev_link_ok ( dev->netdev ) ) { + netdev_rx ( dev->netdev, iob ); + return; + } + + drop: + DBGC2 ( dev, "802.11 %p dropped packet fc=%04x seq=%04x\n", dev, + hdr->fc, hdr->seq ); + free_iob ( iob ); + return; +} + +/** Indicate an error in receiving a packet + * + * @v dev 802.11 device + * @v iob I/O buffer with received packet, or NULL + * @v rc Error code + * + * This logs the error with the wrapping net_device, and frees iob if + * it is passed. + */ +void net80211_rx_err ( struct net80211_device *dev, + struct io_buffer *iob, int rc ) +{ + netdev_rx_err ( dev->netdev, iob, rc ); +} + +/** Indicate the completed transmission of a packet + * + * @v dev 802.11 device + * @v iob I/O buffer of transmitted packet + * @v retries Number of times this packet was retransmitted + * @v rc Error code, or 0 for success + * + * This logs an error with the wrapping net_device if one occurred, + * and removes and frees the I/O buffer from its TX queue. The + * provided retry information is used to tune our transmission rate. + * + * If the packet did not need to be retransmitted because it was + * properly ACKed the first time, @a retries should be 0. + */ +void net80211_tx_complete ( struct net80211_device *dev, + struct io_buffer *iob, int retries, int rc ) +{ + /* Update rate-control algorithm */ + if ( dev->rctl ) + rc80211_update_tx ( dev, retries, rc ); + + /* Pass completion onward */ + netdev_tx_complete_err ( dev->netdev, iob, rc ); +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/80211/rc80211.c b/debian/grub-extras/disabled/gpxe/src/net/80211/rc80211.c new file mode 100644 index 0000000..5bd1914 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/80211/rc80211.c @@ -0,0 +1,371 @@ +/* + * Simple 802.11 rate-control algorithm for gPXE. + * + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdlib.h> +#include <gpxe/net80211.h> + +/** + * @file + * + * Simple 802.11 rate-control algorithm + */ + +/** @page rc80211 Rate control philosophy + * + * We want to maximize our transmission speed, to the extent that we + * can do that without dropping undue numbers of packets. We also + * don't want to take up very much code space, so our algorithm has to + * be pretty simple + * + * When we receive a packet, we know what rate it was transmitted at, + * and whether it had to be retransmitted to get to us. + * + * When we send a packet, we hear back how many times it had to be + * retried to get through, and whether it got through at all. + * + * Indications of TX success are more reliable than RX success, but RX + * information helps us know where to start. + * + * To handle all of this, we keep for each rate and each direction (TX + * and RX separately) some state information for the most recent + * packets on that rate and the number of packets for which we have + * information. The state is a 32-bit unsigned integer in which two + * bits represent a packet: 11 if it went through well, 10 if it went + * through with one retry, 01 if it went through with more than one + * retry, or 00 if it didn't go through at all. We define the + * "goodness" for a particular (rate, direction) combination as the + * sum of all the 2-bit fields, times 33, divided by the number of + * 2-bit fields containing valid information (16 except when we're + * starting out). The number produced is between 0 and 99; we use -1 + * for rates with less than 4 RX packets or 1 TX, as an indicator that + * we do not have enough information to rely on them. + * + * In deciding which rates are best, we find the weighted average of + * TX and RX goodness, where the weighting is by number of packets + * with data and TX packets are worth 4 times as much as RX packets. + * The weighted average is called "net goodness" and is also a number + * between 0 and 99. If 3 consecutive packets fail transmission + * outright, we automatically ratchet down the rate; otherwise, we + * switch to the best rate whenever the current rate's goodness falls + * below some threshold, and try increasing our rate when the goodness + * is very high. + * + * This system is optimized for gPXE's style of usage. Because normal + * operation always involves receiving something, we'll make our way + * to the best rate pretty quickly. We tend to follow the lead of the + * sending AP in choosing rates, but we won't use rates for long that + * don't work well for us in transmission. We assume gPXE won't be + * running for long enough that rate patterns will change much, so we + * don't have to keep time counters or the like. And if this doesn't + * work well in practice there are many ways it could be tweaked. + * + * To avoid staying at 1Mbps for a long time, we don't track any + * transmitted packets until we've set our rate based on received + * packets. + */ + +/** Two-bit packet status indicator for a packet with no retries */ +#define RC_PKT_OK 0x3 + +/** Two-bit packet status indicator for a packet with one retry */ +#define RC_PKT_RETRIED_ONCE 0x2 + +/** Two-bit packet status indicator for a TX packet with multiple retries + * + * It is not possible to tell whether an RX packet had one or multiple + * retries; we rely instead on the fact that failed RX packets won't + * get to us at all, so if we receive a lot of RX packets on a certain + * rate it must be pretty good. + */ +#define RC_PKT_RETRIED_MULTI 0x1 + +/** Two-bit packet status indicator for a TX packet that was never ACKed + * + * It is not possible to tell whether an RX packet was setn if it + * didn't get through to us, but if we don't see one we won't increase + * the goodness for its rate. This asymmetry is part of why TX packets + * are weighted much more heavily than RX. + */ +#define RC_PKT_FAILED 0x0 + +/** Number of times to weight TX packets more heavily than RX packets */ +#define RC_TX_FACTOR 4 + +/** Number of consecutive failed TX packets that cause an automatic rate drop */ +#define RC_TX_EMERG_FAIL 3 + +/** Minimum net goodness below which we will search for a better rate */ +#define RC_GOODNESS_MIN 85 + +/** Maximum net goodness above which we will try to increase our rate */ +#define RC_GOODNESS_MAX 95 + +/** Minimum (num RX + @c RC_TX_FACTOR * num TX) to use a certain rate */ +#define RC_UNCERTAINTY_THRESH 4 + +/** TX direction */ +#define TX 0 + +/** RX direction */ +#define RX 1 + +/** A rate control context */ +struct rc80211_ctx +{ + /** Goodness state for each rate, TX and RX */ + u32 goodness[2][NET80211_MAX_RATES]; + + /** Number of packets recorded for each rate */ + u8 count[2][NET80211_MAX_RATES]; + + /** Indication of whether we've set the device rate yet */ + int started; + + /** Counter of all packets sent and received */ + int packets; +}; + +/** + * Initialize rate-control algorithm + * + * @v dev 802.11 device + * @ret ctx Rate-control context, to be stored in @c dev->rctl + */ +struct rc80211_ctx * rc80211_init ( struct net80211_device *dev __unused ) +{ + struct rc80211_ctx *ret = zalloc ( sizeof ( *ret ) ); + return ret; +} + +/** + * Calculate net goodness for a certain rate + * + * @v ctx Rate-control context + * @v rate_idx Index of rate to calculate net goodness for + */ +static int rc80211_calc_net_goodness ( struct rc80211_ctx *ctx, + int rate_idx ) +{ + int sum[2], num[2], dir, pkt; + + for ( dir = 0; dir < 2; dir++ ) { + u32 good = ctx->goodness[dir][rate_idx]; + + num[dir] = ctx->count[dir][rate_idx]; + sum[dir] = 0; + + for ( pkt = 0; pkt < num[dir]; pkt++ ) + sum[dir] += ( good >> ( 2 * pkt ) ) & 0x3; + } + + if ( ( num[TX] * RC_TX_FACTOR + num[RX] ) < RC_UNCERTAINTY_THRESH ) + return -1; + + return ( 33 * ( sum[TX] * RC_TX_FACTOR + sum[RX] ) / + ( num[TX] * RC_TX_FACTOR + num[RX] ) ); +} + +/** + * Determine the best rate to switch to and return it + * + * @v dev 802.11 device + * @ret rate_idx Index of the best rate to switch to + */ +static int rc80211_pick_best ( struct net80211_device *dev ) +{ + struct rc80211_ctx *ctx = dev->rctl; + int best_net_good = 0, best_rate = -1, i; + + for ( i = 0; i < dev->nr_rates; i++ ) { + int net_good = rc80211_calc_net_goodness ( ctx, i ); + + if ( net_good > best_net_good || + ( best_net_good > RC_GOODNESS_MIN && + net_good > RC_GOODNESS_MIN ) ) { + best_net_good = net_good; + best_rate = i; + } + } + + if ( best_rate >= 0 ) { + int old_good = rc80211_calc_net_goodness ( ctx, dev->rate ); + if ( old_good != best_net_good ) + DBGC ( ctx, "802.11 RC %p switching from goodness " + "%d to %d\n", ctx, old_good, best_net_good ); + + ctx->started = 1; + return best_rate; + } + + return dev->rate; +} + +/** + * Set 802.11 device rate + * + * @v dev 802.11 device + * @v rate_idx Index of rate to switch to + * + * This is a thin wrapper around net80211_set_rate_idx to insert a + * debugging message where appropriate. + */ +static inline void rc80211_set_rate ( struct net80211_device *dev, + int rate_idx ) +{ + DBGC ( dev->rctl, "802.11 RC %p changing rate %d->%d Mbps\n", dev->rctl, + dev->rates[dev->rate] / 10, dev->rates[rate_idx] / 10 ); + + net80211_set_rate_idx ( dev, rate_idx ); +} + +/** + * Check rate-control state and change rate if necessary + * + * @v dev 802.11 device + */ +static void rc80211_maybe_set_new ( struct net80211_device *dev ) +{ + struct rc80211_ctx *ctx = dev->rctl; + int net_good; + + net_good = rc80211_calc_net_goodness ( ctx, dev->rate ); + + if ( ! ctx->started ) { + rc80211_set_rate ( dev, rc80211_pick_best ( dev ) ); + return; + } + + if ( net_good < 0 ) /* insufficient data */ + return; + + if ( net_good > RC_GOODNESS_MAX && dev->rate + 1 < dev->nr_rates ) { + int higher = rc80211_calc_net_goodness ( ctx, dev->rate + 1 ); + if ( higher > net_good || higher < 0 ) + rc80211_set_rate ( dev, dev->rate + 1 ); + else + rc80211_set_rate ( dev, rc80211_pick_best ( dev ) ); + } + + if ( net_good < RC_GOODNESS_MIN ) { + rc80211_set_rate ( dev, rc80211_pick_best ( dev ) ); + } +} + +/** + * Update rate-control state + * + * @v dev 802.11 device + * @v direction One of the direction constants TX or RX + * @v rate_idx Index of rate at which packet was sent or received + * @v retries Number of times packet was retried before success + * @v failed If nonzero, the packet failed to get through + */ +static void rc80211_update ( struct net80211_device *dev, int direction, + int rate_idx, int retries, int failed ) +{ + struct rc80211_ctx *ctx = dev->rctl; + u32 goodness = ctx->goodness[direction][rate_idx]; + + if ( ctx->count[direction][rate_idx] < 16 ) + ctx->count[direction][rate_idx]++; + + goodness <<= 2; + if ( failed ) + goodness |= RC_PKT_FAILED; + else if ( retries > 1 ) + goodness |= RC_PKT_RETRIED_MULTI; + else if ( retries ) + goodness |= RC_PKT_RETRIED_ONCE; + else + goodness |= RC_PKT_OK; + + ctx->goodness[direction][rate_idx] = goodness; + + ctx->packets++; + + rc80211_maybe_set_new ( dev ); +} + +/** + * Update rate-control state for transmitted packet + * + * @v dev 802.11 device + * @v retries Number of times packet was transmitted before success + * @v rc Return status code for transmission + */ +void rc80211_update_tx ( struct net80211_device *dev, int retries, int rc ) +{ + struct rc80211_ctx *ctx = dev->rctl; + + if ( ! ctx->started ) + return; + + rc80211_update ( dev, TX, dev->rate, retries, rc ); + + /* Check if the last RC_TX_EMERG_FAIL packets have all failed */ + if ( ! ( ctx->goodness[TX][dev->rate] & + ( ( 1 << ( 2 * RC_TX_EMERG_FAIL ) ) - 1 ) ) ) { + if ( dev->rate == 0 ) + DBGC ( dev->rctl, "802.11 RC %p saw %d consecutive " + "failed TX, but cannot lower rate any further\n", + dev->rctl, RC_TX_EMERG_FAIL ); + else { + DBGC ( dev->rctl, "802.11 RC %p lowering rate (%d->%d " + "Mbps) due to %d consecutive TX failures\n", + dev->rctl, dev->rates[dev->rate] / 10, + dev->rates[dev->rate - 1] / 10, + RC_TX_EMERG_FAIL ); + + rc80211_set_rate ( dev, dev->rate - 1 ); + } + } +} + +/** + * Update rate-control state for received packet + * + * @v dev 802.11 device + * @v retry Whether the received packet had been retransmitted + * @v rate Rate at which packet was received, in 100 kbps units + */ +void rc80211_update_rx ( struct net80211_device *dev, int retry, u16 rate ) +{ + int ridx; + + for ( ridx = 0; ridx < dev->nr_rates && dev->rates[ridx] != rate; + ridx++ ) + ; + if ( ridx >= dev->nr_rates ) + return; /* couldn't find the rate */ + + rc80211_update ( dev, RX, ridx, retry, 0 ); +} + +/** + * Free rate-control context + * + * @v ctx Rate-control context + */ +void rc80211_free ( struct rc80211_ctx *ctx ) +{ + free ( ctx ); +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/aoe.c b/debian/grub-extras/disabled/gpxe/src/net/aoe.c new file mode 100644 index 0000000..839a875 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/aoe.c @@ -0,0 +1,471 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stddef.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <assert.h> +#include <byteswap.h> +#include <gpxe/list.h> +#include <gpxe/if_ether.h> +#include <gpxe/ethernet.h> +#include <gpxe/iobuf.h> +#include <gpxe/uaccess.h> +#include <gpxe/ata.h> +#include <gpxe/netdevice.h> +#include <gpxe/process.h> +#include <gpxe/features.h> +#include <gpxe/aoe.h> + +/** @file + * + * AoE protocol + * + */ + +FEATURE ( FEATURE_PROTOCOL, "AoE", DHCP_EB_FEATURE_AOE, 1 ); + +struct net_protocol aoe_protocol; + +/** List of all AoE sessions */ +static LIST_HEAD ( aoe_sessions ); + +static void aoe_free ( struct refcnt *refcnt ) { + struct aoe_session *aoe = + container_of ( refcnt, struct aoe_session, refcnt ); + + netdev_put ( aoe->netdev ); + free ( aoe ); +} + +/** + * Mark current AoE command complete + * + * @v aoe AoE session + * @v rc Return status code + */ +static void aoe_done ( struct aoe_session *aoe, int rc ) { + + /* Record overall command status */ + if ( aoe->command ) { + aoe->command->cb.cmd_stat = aoe->status; + aoe->command->rc = rc; + aoe->command = NULL; + } + + /* Stop retransmission timer */ + stop_timer ( &aoe->timer ); + + /* Mark operation as complete */ + aoe->rc = rc; +} + +/** + * Send AoE command + * + * @v aoe AoE session + * @ret rc Return status code + * + * This transmits an AoE command packet. It does not wait for a + * response. + */ +static int aoe_send_command ( struct aoe_session *aoe ) { + struct ata_command *command = aoe->command; + struct io_buffer *iobuf; + struct aoehdr *aoehdr; + union aoecmd *aoecmd; + struct aoeata *aoeata; + unsigned int count; + unsigned int data_out_len; + unsigned int aoecmdlen; + + /* Fail immediately if we have no netdev to send on */ + if ( ! aoe->netdev ) { + aoe_done ( aoe, -ENETUNREACH ); + return -ENETUNREACH; + } + + /* If we are transmitting anything that requires a response, + * start the retransmission timer. Do this before attempting + * to allocate the I/O buffer, in case allocation itself + * fails. + */ + start_timer ( &aoe->timer ); + + /* Calculate count and data_out_len for this subcommand */ + switch ( aoe->aoe_cmd_type ) { + case AOE_CMD_ATA: + count = command->cb.count.native; + if ( count > AOE_MAX_COUNT ) + count = AOE_MAX_COUNT; + data_out_len = ( command->data_out ? + ( count * ATA_SECTOR_SIZE ) : 0 ); + aoecmdlen = sizeof ( aoecmd->ata ); + break; + case AOE_CMD_CONFIG: + count = 0; + data_out_len = 0; + aoecmdlen = sizeof ( aoecmd->cfg ); + break; + default: + return -ENOTSUP; + } + + /* Create outgoing I/O buffer */ + iobuf = alloc_iob ( ETH_HLEN + sizeof ( *aoehdr ) + + aoecmdlen + data_out_len ); + + if ( ! iobuf ) + return -ENOMEM; + iob_reserve ( iobuf, ETH_HLEN ); + aoehdr = iob_put ( iobuf, sizeof ( *aoehdr ) ); + aoecmd = iob_put ( iobuf, aoecmdlen ); + memset ( aoehdr, 0, ( sizeof ( *aoehdr ) + aoecmdlen ) ); + + /* Fill AoE header */ + aoehdr->ver_flags = AOE_VERSION; + aoehdr->major = htons ( aoe->major ); + aoehdr->minor = aoe->minor; + aoehdr->command = aoe->aoe_cmd_type; + aoehdr->tag = htonl ( ++aoe->tag ); + + /* Fill AoE payload */ + switch ( aoe->aoe_cmd_type ) { + case AOE_CMD_ATA: + /* Fill AoE command */ + aoeata = &aoecmd->ata; + linker_assert ( AOE_FL_DEV_HEAD == ATA_DEV_SLAVE, + __fix_ata_h__ ); + aoeata->aflags = ( ( command->cb.lba48 ? AOE_FL_EXTENDED : 0 )| + ( command->cb.device & ATA_DEV_SLAVE ) | + ( data_out_len ? AOE_FL_WRITE : 0 ) ); + aoeata->err_feat = command->cb.err_feat.bytes.cur; + aoeata->count = count; + aoeata->cmd_stat = command->cb.cmd_stat; + aoeata->lba.u64 = cpu_to_le64 ( command->cb.lba.native ); + if ( ! command->cb.lba48 ) + aoeata->lba.bytes[3] |= + ( command->cb.device & ATA_DEV_MASK ); + + /* Fill data payload */ + copy_from_user ( iob_put ( iobuf, data_out_len ), + command->data_out, aoe->command_offset, + data_out_len ); + break; + case AOE_CMD_CONFIG: + /* Nothing to do */ + break; + default: + assert ( 0 ); + } + + /* Send packet */ + return net_tx ( iobuf, aoe->netdev, &aoe_protocol, aoe->target ); +} + +/** + * Handle AoE retry timer expiry + * + * @v timer AoE retry timer + * @v fail Failure indicator + */ +static void aoe_timer_expired ( struct retry_timer *timer, int fail ) { + struct aoe_session *aoe = + container_of ( timer, struct aoe_session, timer ); + + if ( fail ) { + aoe_done ( aoe, -ETIMEDOUT ); + } else { + aoe_send_command ( aoe ); + } +} + +/** + * Handle AoE configuration command response + * + * @v aoe AoE session + * @v ll_source Link-layer source address + * @ret rc Return status code + */ +static int aoe_rx_cfg ( struct aoe_session *aoe, const void *ll_source ) { + + /* Record target MAC address */ + memcpy ( aoe->target, ll_source, sizeof ( aoe->target ) ); + DBGC ( aoe, "AoE %p target MAC address %s\n", + aoe, eth_ntoa ( aoe->target ) ); + + /* Mark config request as complete */ + aoe_done ( aoe, 0 ); + + return 0; +} + +/** + * Handle AoE ATA command response + * + * @v aoe AoE session + * @v aoeata AoE ATA command + * @v len Length of AoE ATA command + * @ret rc Return status code + */ +static int aoe_rx_ata ( struct aoe_session *aoe, struct aoeata *aoeata, + size_t len ) { + struct ata_command *command = aoe->command; + unsigned int rx_data_len; + unsigned int count; + unsigned int data_len; + + /* Sanity check */ + if ( len < sizeof ( *aoeata ) ) { + /* Ignore packet; allow timer to trigger retransmit */ + return -EINVAL; + } + rx_data_len = ( len - sizeof ( *aoeata ) ); + + /* Calculate count and data_len for this subcommand */ + count = command->cb.count.native; + if ( count > AOE_MAX_COUNT ) + count = AOE_MAX_COUNT; + data_len = count * ATA_SECTOR_SIZE; + + /* Merge into overall ATA status */ + aoe->status |= aoeata->cmd_stat; + + /* Copy data payload */ + if ( command->data_in ) { + if ( rx_data_len > data_len ) + rx_data_len = data_len; + copy_to_user ( command->data_in, aoe->command_offset, + aoeata->data, rx_data_len ); + } + + /* Update ATA command and offset */ + aoe->command_offset += data_len; + command->cb.lba.native += count; + command->cb.count.native -= count; + + /* Check for operation complete */ + if ( ! command->cb.count.native ) { + aoe_done ( aoe, 0 ); + return 0; + } + + /* Transmit next portion of request */ + stop_timer ( &aoe->timer ); + aoe_send_command ( aoe ); + + return 0; +} + +/** + * Process incoming AoE packets + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v ll_source Link-layer source address + * @ret rc Return status code + * + */ +static int aoe_rx ( struct io_buffer *iobuf, + struct net_device *netdev __unused, + const void *ll_source ) { + struct aoehdr *aoehdr = iobuf->data; + struct aoe_session *aoe; + int rc = 0; + + /* Sanity checks */ + if ( iob_len ( iobuf ) < sizeof ( *aoehdr ) ) { + rc = -EINVAL; + goto done; + } + if ( ( aoehdr->ver_flags & AOE_VERSION_MASK ) != AOE_VERSION ) { + rc = -EPROTONOSUPPORT; + goto done; + } + if ( ! ( aoehdr->ver_flags & AOE_FL_RESPONSE ) ) { + /* Ignore AoE requests that we happen to see */ + goto done; + } + iob_pull ( iobuf, sizeof ( *aoehdr ) ); + + /* Demultiplex amongst active AoE sessions */ + list_for_each_entry ( aoe, &aoe_sessions, list ) { + if ( ntohs ( aoehdr->major ) != aoe->major ) + continue; + if ( aoehdr->minor != aoe->minor ) + continue; + if ( ntohl ( aoehdr->tag ) != aoe->tag ) + continue; + if ( aoehdr->ver_flags & AOE_FL_ERROR ) { + aoe_done ( aoe, -EIO ); + break; + } + switch ( aoehdr->command ) { + case AOE_CMD_ATA: + rc = aoe_rx_ata ( aoe, iobuf->data, iob_len ( iobuf )); + break; + case AOE_CMD_CONFIG: + rc = aoe_rx_cfg ( aoe, ll_source ); + break; + default: + DBGC ( aoe, "AoE %p ignoring command %02x\n", + aoe, aoehdr->command ); + break; + } + break; + } + + done: + free_iob ( iobuf ); + return rc; +} + +/** AoE protocol */ +struct net_protocol aoe_protocol __net_protocol = { + .name = "AoE", + .net_proto = htons ( ETH_P_AOE ), + .rx = aoe_rx, +}; + +/** + * Issue ATA command via an open AoE session + * + * @v ata ATA device + * @v command ATA command + * @ret rc Return status code + */ +static int aoe_command ( struct ata_device *ata, + struct ata_command *command ) { + struct aoe_session *aoe = + container_of ( ata->backend, struct aoe_session, refcnt ); + + aoe->command = command; + aoe->status = 0; + aoe->command_offset = 0; + aoe->aoe_cmd_type = AOE_CMD_ATA; + + aoe_send_command ( aoe ); + + return 0; +} + +/** + * Issue AoE config query for AoE target discovery + * + * @v aoe AoE session + * @ret rc Return status code + */ +static int aoe_discover ( struct aoe_session *aoe ) { + int rc; + + aoe->status = 0; + aoe->aoe_cmd_type = AOE_CMD_CONFIG; + aoe->command = NULL; + + aoe_send_command ( aoe ); + + aoe->rc = -EINPROGRESS; + while ( aoe->rc == -EINPROGRESS ) + step(); + rc = aoe->rc; + + return rc; +} + +static int aoe_detached_command ( struct ata_device *ata __unused, + struct ata_command *command __unused ) { + return -ENODEV; +} + +void aoe_detach ( struct ata_device *ata ) { + struct aoe_session *aoe = + container_of ( ata->backend, struct aoe_session, refcnt ); + + stop_timer ( &aoe->timer ); + ata->command = aoe_detached_command; + list_del ( &aoe->list ); + ref_put ( ata->backend ); + ata->backend = NULL; +} + +static int aoe_parse_root_path ( struct aoe_session *aoe, + const char *root_path ) { + char *ptr; + + if ( strncmp ( root_path, "aoe:", 4 ) != 0 ) + return -EINVAL; + ptr = ( ( char * ) root_path + 4 ); + + if ( *ptr++ != 'e' ) + return -EINVAL; + + aoe->major = strtoul ( ptr, &ptr, 10 ); + if ( *ptr++ != '.' ) + return -EINVAL; + + aoe->minor = strtoul ( ptr, &ptr, 10 ); + if ( *ptr ) + return -EINVAL; + + return 0; +} + +int aoe_attach ( struct ata_device *ata, struct net_device *netdev, + const char *root_path ) { + struct aoe_session *aoe; + int rc; + + /* Allocate and initialise structure */ + aoe = zalloc ( sizeof ( *aoe ) ); + if ( ! aoe ) + return -ENOMEM; + aoe->refcnt.free = aoe_free; + aoe->netdev = netdev_get ( netdev ); + memcpy ( aoe->target, netdev->ll_broadcast, sizeof ( aoe->target ) ); + aoe->tag = AOE_TAG_MAGIC; + aoe->timer.expired = aoe_timer_expired; + + /* Parse root path */ + if ( ( rc = aoe_parse_root_path ( aoe, root_path ) ) != 0 ) + goto err; + + /* Attach parent interface, transfer reference to connection + * list, and return + */ + ata->backend = ref_get ( &aoe->refcnt ); + ata->command = aoe_command; + list_add ( &aoe->list, &aoe_sessions ); + + /* Send discovery packet to find the target MAC address. + * Ideally, this ought to be done asynchronously, but the + * block device interface does not yet support asynchronous + * operation. + */ + if ( ( rc = aoe_discover( aoe ) ) != 0 ) + goto err; + + return 0; + + err: + ref_put ( &aoe->refcnt ); + return rc; +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/arp.c b/debian/grub-extras/disabled/gpxe/src/net/arp.c new file mode 100644 index 0000000..124a856 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/arp.c @@ -0,0 +1,289 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <gpxe/if_ether.h> +#include <gpxe/if_arp.h> +#include <gpxe/iobuf.h> +#include <gpxe/netdevice.h> +#include <gpxe/arp.h> + +/** @file + * + * Address Resolution Protocol + * + * This file implements the address resolution protocol as defined in + * RFC826. The implementation is media-independent and + * protocol-independent; it is not limited to Ethernet or to IPv4. + * + */ + +/** An ARP cache entry */ +struct arp_entry { + /** Network-layer protocol */ + struct net_protocol *net_protocol; + /** Link-layer protocol */ + struct ll_protocol *ll_protocol; + /** Network-layer address */ + uint8_t net_addr[MAX_NET_ADDR_LEN]; + /** Link-layer address */ + uint8_t ll_addr[MAX_LL_ADDR_LEN]; +}; + +/** Number of entries in the ARP cache + * + * This is a global cache, covering all network interfaces, + * network-layer protocols and link-layer protocols. + */ +#define NUM_ARP_ENTRIES 4 + +/** The ARP cache */ +static struct arp_entry arp_table[NUM_ARP_ENTRIES]; +#define arp_table_end &arp_table[NUM_ARP_ENTRIES] + +static unsigned int next_new_arp_entry = 0; + +struct net_protocol arp_protocol; + +/** + * Find entry in the ARP cache + * + * @v ll_protocol Link-layer protocol + * @v net_protocol Network-layer protocol + * @v net_addr Network-layer address + * @ret arp ARP cache entry, or NULL if not found + * + */ +static struct arp_entry * +arp_find_entry ( struct ll_protocol *ll_protocol, + struct net_protocol *net_protocol, + const void *net_addr ) { + struct arp_entry *arp; + + for ( arp = arp_table ; arp < arp_table_end ; arp++ ) { + if ( ( arp->ll_protocol == ll_protocol ) && + ( arp->net_protocol == net_protocol ) && + ( memcmp ( arp->net_addr, net_addr, + net_protocol->net_addr_len ) == 0 ) ) + return arp; + } + return NULL; +} + +/** + * Look up media-specific link-layer address in the ARP cache + * + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v dest_net_addr Destination network-layer address + * @v source_net_addr Source network-layer address + * @ret dest_ll_addr Destination link layer address + * @ret rc Return status code + * + * This function will use the ARP cache to look up the link-layer + * address for the link-layer protocol associated with the network + * device and the given network-layer protocol and addresses. If + * found, the destination link-layer address will be filled in in @c + * dest_ll_addr. + * + * If no address is found in the ARP cache, an ARP request will be + * transmitted on the specified network device and -ENOENT will be + * returned. + */ +int arp_resolve ( struct net_device *netdev, struct net_protocol *net_protocol, + const void *dest_net_addr, const void *source_net_addr, + void *dest_ll_addr ) { + struct ll_protocol *ll_protocol = netdev->ll_protocol; + const struct arp_entry *arp; + struct io_buffer *iobuf; + struct arphdr *arphdr; + int rc; + + /* Look for existing entry in ARP table */ + arp = arp_find_entry ( ll_protocol, net_protocol, dest_net_addr ); + if ( arp ) { + DBG ( "ARP cache hit: %s %s => %s %s\n", + net_protocol->name, net_protocol->ntoa ( arp->net_addr ), + ll_protocol->name, ll_protocol->ntoa ( arp->ll_addr ) ); + memcpy ( dest_ll_addr, arp->ll_addr, ll_protocol->ll_addr_len); + return 0; + } + DBG ( "ARP cache miss: %s %s\n", net_protocol->name, + net_protocol->ntoa ( dest_net_addr ) ); + + /* Allocate ARP packet */ + iobuf = alloc_iob ( MAX_LL_HEADER_LEN + sizeof ( *arphdr ) + + 2 * ( MAX_LL_ADDR_LEN + MAX_NET_ADDR_LEN ) ); + if ( ! iobuf ) + return -ENOMEM; + iob_reserve ( iobuf, MAX_LL_HEADER_LEN ); + + /* Build up ARP request */ + arphdr = iob_put ( iobuf, sizeof ( *arphdr ) ); + arphdr->ar_hrd = ll_protocol->ll_proto; + arphdr->ar_hln = ll_protocol->ll_addr_len; + arphdr->ar_pro = net_protocol->net_proto; + arphdr->ar_pln = net_protocol->net_addr_len; + arphdr->ar_op = htons ( ARPOP_REQUEST ); + memcpy ( iob_put ( iobuf, ll_protocol->ll_addr_len ), + netdev->ll_addr, ll_protocol->ll_addr_len ); + memcpy ( iob_put ( iobuf, net_protocol->net_addr_len ), + source_net_addr, net_protocol->net_addr_len ); + memset ( iob_put ( iobuf, ll_protocol->ll_addr_len ), + 0, ll_protocol->ll_addr_len ); + memcpy ( iob_put ( iobuf, net_protocol->net_addr_len ), + dest_net_addr, net_protocol->net_addr_len ); + + /* Transmit ARP request */ + if ( ( rc = net_tx ( iobuf, netdev, &arp_protocol, + netdev->ll_broadcast ) ) != 0 ) + return rc; + + return -ENOENT; +} + +/** + * Identify ARP protocol + * + * @v net_proto Network-layer protocol, in network-endian order + * @ret arp_net_protocol ARP protocol, or NULL + * + */ +static struct arp_net_protocol * arp_find_protocol ( uint16_t net_proto ) { + struct arp_net_protocol *arp_net_protocol; + + for_each_table_entry ( arp_net_protocol, ARP_NET_PROTOCOLS ) { + if ( arp_net_protocol->net_protocol->net_proto == net_proto ) { + return arp_net_protocol; + } + } + return NULL; +} + +/** + * Process incoming ARP packets + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v ll_source Link-layer source address + * @ret rc Return status code + * + * This handles ARP requests and responses as detailed in RFC826. The + * method detailed within the RFC is pretty optimised, handling + * requests and responses with basically a single code path and + * avoiding the need for extraneous ARP requests; read the RFC for + * details. + */ +static int arp_rx ( struct io_buffer *iobuf, struct net_device *netdev, + const void *ll_source __unused ) { + struct arphdr *arphdr = iobuf->data; + struct arp_net_protocol *arp_net_protocol; + struct net_protocol *net_protocol; + struct ll_protocol *ll_protocol; + struct arp_entry *arp; + int merge = 0; + + /* Identify network-layer and link-layer protocols */ + arp_net_protocol = arp_find_protocol ( arphdr->ar_pro ); + if ( ! arp_net_protocol ) + goto done; + net_protocol = arp_net_protocol->net_protocol; + ll_protocol = netdev->ll_protocol; + + /* Sanity checks */ + if ( ( arphdr->ar_hrd != ll_protocol->ll_proto ) || + ( arphdr->ar_hln != ll_protocol->ll_addr_len ) || + ( arphdr->ar_pln != net_protocol->net_addr_len ) ) + goto done; + + /* See if we have an entry for this sender, and update it if so */ + arp = arp_find_entry ( ll_protocol, net_protocol, + arp_sender_pa ( arphdr ) ); + if ( arp ) { + memcpy ( arp->ll_addr, arp_sender_ha ( arphdr ), + arphdr->ar_hln ); + merge = 1; + DBG ( "ARP cache update: %s %s => %s %s\n", + net_protocol->name, net_protocol->ntoa ( arp->net_addr ), + ll_protocol->name, ll_protocol->ntoa ( arp->ll_addr ) ); + } + + /* See if we own the target protocol address */ + if ( arp_net_protocol->check ( netdev, arp_target_pa ( arphdr ) ) != 0) + goto done; + + /* Create new ARP table entry if necessary */ + if ( ! merge ) { + arp = &arp_table[next_new_arp_entry++ % NUM_ARP_ENTRIES]; + arp->ll_protocol = ll_protocol; + arp->net_protocol = net_protocol; + memcpy ( arp->ll_addr, arp_sender_ha ( arphdr ), + arphdr->ar_hln ); + memcpy ( arp->net_addr, arp_sender_pa ( arphdr ), + arphdr->ar_pln); + DBG ( "ARP cache add: %s %s => %s %s\n", + net_protocol->name, net_protocol->ntoa ( arp->net_addr ), + ll_protocol->name, ll_protocol->ntoa ( arp->ll_addr ) ); + } + + /* If it's not a request, there's nothing more to do */ + if ( arphdr->ar_op != htons ( ARPOP_REQUEST ) ) + goto done; + + /* Change request to a reply */ + DBG ( "ARP reply: %s %s => %s %s\n", net_protocol->name, + net_protocol->ntoa ( arp_target_pa ( arphdr ) ), + ll_protocol->name, ll_protocol->ntoa ( netdev->ll_addr ) ); + arphdr->ar_op = htons ( ARPOP_REPLY ); + memswap ( arp_sender_ha ( arphdr ), arp_target_ha ( arphdr ), + arphdr->ar_hln + arphdr->ar_pln ); + memcpy ( arp_sender_ha ( arphdr ), netdev->ll_addr, arphdr->ar_hln ); + + /* Send reply */ + net_tx ( iob_disown ( iobuf ), netdev, &arp_protocol, + arp_target_ha ( arphdr ) ); + + done: + free_iob ( iobuf ); + return 0; +} + +/** + * Transcribe ARP address + * + * @v net_addr ARP address + * @ret string "<ARP>" + * + * This operation is meaningless for the ARP protocol. + */ +static const char * arp_ntoa ( const void *net_addr __unused ) { + return "<ARP>"; +} + +/** ARP protocol */ +struct net_protocol arp_protocol __net_protocol = { + .name = "ARP", + .net_proto = htons ( ETH_P_ARP ), + .rx = arp_rx, + .ntoa = arp_ntoa, +}; diff --git a/debian/grub-extras/disabled/gpxe/src/net/dhcpopts.c b/debian/grub-extras/disabled/gpxe/src/net/dhcpopts.c new file mode 100644 index 0000000..c1940f1 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/dhcpopts.c @@ -0,0 +1,436 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <gpxe/dhcp.h> +#include <gpxe/dhcpopts.h> + +/** @file + * + * DHCP options + * + */ + +/** + * Obtain printable version of a DHCP option tag + * + * @v tag DHCP option tag + * @ret name String representation of the tag + * + */ +static inline char * dhcp_tag_name ( unsigned int tag ) { + static char name[8]; + + if ( DHCP_IS_ENCAP_OPT ( tag ) ) { + snprintf ( name, sizeof ( name ), "%d.%d", + DHCP_ENCAPSULATOR ( tag ), + DHCP_ENCAPSULATED ( tag ) ); + } else { + snprintf ( name, sizeof ( name ), "%d", tag ); + } + return name; +} + +/** + * Get pointer to DHCP option + * + * @v options DHCP options block + * @v offset Offset within options block + * @ret option DHCP option + */ +static inline __attribute__ (( always_inline )) struct dhcp_option * +dhcp_option ( struct dhcp_options *options, unsigned int offset ) { + return ( ( struct dhcp_option * ) ( options->data + offset ) ); +} + +/** + * Get offset of a DHCP option + * + * @v options DHCP options block + * @v option DHCP option + * @ret offset Offset within options block + */ +static inline __attribute__ (( always_inline )) int +dhcp_option_offset ( struct dhcp_options *options, + struct dhcp_option *option ) { + return ( ( ( void * ) option ) - options->data ); +} + +/** + * Calculate length of any DHCP option + * + * @v option DHCP option + * @ret len Length (including tag and length field) + */ +static unsigned int dhcp_option_len ( struct dhcp_option *option ) { + if ( ( option->tag == DHCP_END ) || ( option->tag == DHCP_PAD ) ) { + return 1; + } else { + return ( option->len + DHCP_OPTION_HEADER_LEN ); + } +} + +/** + * Find DHCP option within DHCP options block, and its encapsulator (if any) + * + * @v options DHCP options block + * @v tag DHCP option tag to search for + * @ret encap_offset Offset of encapsulating DHCP option + * @ret offset Offset of DHCP option, or negative error + * + * Searches for the DHCP option matching the specified tag within the + * DHCP option block. Encapsulated options may be searched for by + * using DHCP_ENCAP_OPT() to construct the tag value. + * + * If the option is encapsulated, and @c encapsulator is non-NULL, it + * will be filled in with the offset of the encapsulating option. + * + * This routine is designed to be paranoid. It does not assume that + * the option data is well-formatted, and so must guard against flaws + * such as options missing a @c DHCP_END terminator, or options whose + * length would take them beyond the end of the data block. + */ +static int find_dhcp_option_with_encap ( struct dhcp_options *options, + unsigned int tag, + int *encap_offset ) { + unsigned int original_tag __attribute__ (( unused )) = tag; + struct dhcp_option *option; + int offset = 0; + ssize_t remaining = options->len; + unsigned int option_len; + + /* Sanity check */ + if ( tag == DHCP_PAD ) + return -ENOENT; + + /* Search for option */ + while ( remaining ) { + /* Calculate length of this option. Abort processing + * if the length is malformed (i.e. takes us beyond + * the end of the data block). + */ + option = dhcp_option ( options, offset ); + option_len = dhcp_option_len ( option ); + remaining -= option_len; + if ( remaining < 0 ) + break; + /* Check for explicit end marker */ + if ( option->tag == DHCP_END ) + break; + /* Check for matching tag */ + if ( option->tag == tag ) { + DBGC ( options, "DHCPOPT %p found %s (length %d)\n", + options, dhcp_tag_name ( original_tag ), + option_len ); + return offset; + } + /* Check for start of matching encapsulation block */ + if ( DHCP_IS_ENCAP_OPT ( tag ) && + ( option->tag == DHCP_ENCAPSULATOR ( tag ) ) ) { + if ( encap_offset ) + *encap_offset = offset; + /* Continue search within encapsulated option block */ + tag = DHCP_ENCAPSULATED ( tag ); + remaining = option_len; + offset += DHCP_OPTION_HEADER_LEN; + continue; + } + offset += option_len; + } + + return -ENOENT; +} + +/** + * Resize a DHCP option + * + * @v options DHCP option block + * @v offset Offset of option to resize + * @v encap_offset Offset of encapsulating offset (or -ve for none) + * @v old_len Old length (including header) + * @v new_len New length (including header) + * @v can_realloc Can reallocate options data if necessary + * @ret rc Return status code + */ +static int resize_dhcp_option ( struct dhcp_options *options, + int offset, int encap_offset, + size_t old_len, size_t new_len, + int can_realloc ) { + struct dhcp_option *encapsulator; + struct dhcp_option *option; + ssize_t delta = ( new_len - old_len ); + size_t new_options_len; + size_t new_encapsulator_len; + void *new_data; + void *source; + void *dest; + void *end; + + /* Check for sufficient space, and update length fields */ + if ( new_len > DHCP_MAX_LEN ) { + DBGC ( options, "DHCPOPT %p overlength option\n", options ); + return -ENOSPC; + } + new_options_len = ( options->len + delta ); + if ( new_options_len > options->max_len ) { + /* Reallocate options block if allowed to do so. */ + if ( can_realloc ) { + new_data = realloc ( options->data, new_options_len ); + if ( ! new_data ) { + DBGC ( options, "DHCPOPT %p could not " + "reallocate to %zd bytes\n", options, + new_options_len ); + return -ENOMEM; + } + options->data = new_data; + options->max_len = new_options_len; + } else { + DBGC ( options, "DHCPOPT %p out of space\n", options ); + return -ENOMEM; + } + } + if ( encap_offset >= 0 ) { + encapsulator = dhcp_option ( options, encap_offset ); + new_encapsulator_len = ( encapsulator->len + delta ); + if ( new_encapsulator_len > DHCP_MAX_LEN ) { + DBGC ( options, "DHCPOPT %p overlength encapsulator\n", + options ); + return -ENOSPC; + } + encapsulator->len = new_encapsulator_len; + } + options->len = new_options_len; + + /* Move remainder of option data */ + option = dhcp_option ( options, offset ); + source = ( ( ( void * ) option ) + old_len ); + dest = ( ( ( void * ) option ) + new_len ); + end = ( options->data + options->max_len ); + memmove ( dest, source, ( end - dest ) ); + + return 0; +} + +/** + * Set value of DHCP option + * + * @v options DHCP option block + * @v tag DHCP option tag + * @v data New value for DHCP option + * @v len Length of value, in bytes + * @v can_realloc Can reallocate options data if necessary + * @ret offset Offset of DHCP option, or negative error + * + * Sets the value of a DHCP option within the options block. The + * option may or may not already exist. Encapsulators will be created + * (and deleted) as necessary. + * + * This call may fail due to insufficient space in the options block. + * If it does fail, and the option existed previously, the option will + * be left with its original value. + */ +static int set_dhcp_option ( struct dhcp_options *options, unsigned int tag, + const void *data, size_t len, + int can_realloc ) { + static const uint8_t empty_encapsulator[] = { DHCP_END }; + int offset; + int encap_offset = -1; + int creation_offset = 0; + struct dhcp_option *option; + unsigned int encap_tag = DHCP_ENCAPSULATOR ( tag ); + size_t old_len = 0; + size_t new_len = ( len ? ( len + DHCP_OPTION_HEADER_LEN ) : 0 ); + int rc; + + /* Sanity check */ + if ( tag == DHCP_PAD ) + return -ENOTTY; + + /* Find old instance of this option, if any */ + offset = find_dhcp_option_with_encap ( options, tag, &encap_offset ); + if ( offset >= 0 ) { + old_len = dhcp_option_len ( dhcp_option ( options, offset ) ); + DBGC ( options, "DHCPOPT %p resizing %s from %zd to %zd\n", + options, dhcp_tag_name ( tag ), old_len, new_len ); + } else { + DBGC ( options, "DHCPOPT %p creating %s (length %zd)\n", + options, dhcp_tag_name ( tag ), new_len ); + } + + /* Ensure that encapsulator exists, if required */ + if ( encap_tag ) { + if ( encap_offset < 0 ) + encap_offset = set_dhcp_option ( options, encap_tag, + empty_encapsulator, 1, + can_realloc ); + if ( encap_offset < 0 ) + return encap_offset; + creation_offset = ( encap_offset + DHCP_OPTION_HEADER_LEN ); + } + + /* Create new option if necessary */ + if ( offset < 0 ) + offset = creation_offset; + + /* Resize option to fit new data */ + if ( ( rc = resize_dhcp_option ( options, offset, encap_offset, + old_len, new_len, + can_realloc ) ) != 0 ) + return rc; + + /* Copy new data into option, if applicable */ + if ( len ) { + option = dhcp_option ( options, offset ); + option->tag = tag; + option->len = len; + memcpy ( &option->data, data, len ); + } + + /* Delete encapsulator if there's nothing else left in it */ + if ( encap_offset >= 0 ) { + option = dhcp_option ( options, encap_offset ); + if ( option->len <= 1 ) + set_dhcp_option ( options, encap_tag, NULL, 0, 0 ); + } + + return offset; +} + +/** + * Store value of DHCP option setting + * + * @v options DHCP option block + * @v tag Setting tag number + * @v data Setting data, or NULL to clear setting + * @v len Length of setting data + * @ret rc Return status code + */ +int dhcpopt_store ( struct dhcp_options *options, unsigned int tag, + const void *data, size_t len ) { + int offset; + + offset = set_dhcp_option ( options, tag, data, len, 0 ); + if ( offset < 0 ) + return offset; + return 0; +} + +/** + * Store value of DHCP option setting, extending options block if necessary + * + * @v options DHCP option block + * @v tag Setting tag number + * @v data Setting data, or NULL to clear setting + * @v len Length of setting data + * @ret rc Return status code + */ +int dhcpopt_extensible_store ( struct dhcp_options *options, unsigned int tag, + const void *data, size_t len ) { + int offset; + + offset = set_dhcp_option ( options, tag, data, len, 1 ); + if ( offset < 0 ) + return offset; + return 0; +} + +/** + * Fetch value of DHCP option setting + * + * @v options DHCP option block + * @v tag Setting tag number + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ +int dhcpopt_fetch ( struct dhcp_options *options, unsigned int tag, + void *data, size_t len ) { + int offset; + struct dhcp_option *option; + size_t option_len; + + offset = find_dhcp_option_with_encap ( options, tag, NULL ); + if ( offset < 0 ) + return offset; + + option = dhcp_option ( options, offset ); + option_len = option->len; + if ( len > option_len ) + len = option_len; + memcpy ( data, option->data, len ); + + return option_len; +} + +/** + * Recalculate length of DHCP options block + * + * @v options Uninitialised DHCP option block + * + * The "used length" field will be updated based on scanning through + * the block to find the end of the options. + */ +static void dhcpopt_update_len ( struct dhcp_options *options ) { + struct dhcp_option *option; + int offset = 0; + ssize_t remaining = options->max_len; + unsigned int option_len; + + /* Find last non-pad option */ + options->len = 0; + while ( remaining ) { + option = dhcp_option ( options, offset ); + option_len = dhcp_option_len ( option ); + remaining -= option_len; + if ( remaining < 0 ) + break; + offset += option_len; + if ( option->tag != DHCP_PAD ) + options->len = offset; + } +} + +/** + * Initialise prepopulated block of DHCP options + * + * @v options Uninitialised DHCP option block + * @v data Memory for DHCP option data + * @v max_len Length of memory for DHCP option data + * + * The memory content must already be filled with valid DHCP options. + * A zeroed block counts as a block of valid DHCP options. + */ +void dhcpopt_init ( struct dhcp_options *options, void *data, + size_t max_len ) { + + /* Fill in fields */ + options->data = data; + options->max_len = max_len; + + /* Update length */ + dhcpopt_update_len ( options ); + + DBGC ( options, "DHCPOPT %p created (data %p len %#zx max_len %#zx)\n", + options, options->data, options->len, options->max_len ); +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/dhcppkt.c b/debian/grub-extras/disabled/gpxe/src/net/dhcppkt.c new file mode 100644 index 0000000..20a0e66 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/dhcppkt.c @@ -0,0 +1,283 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <gpxe/netdevice.h> +#include <gpxe/dhcp.h> +#include <gpxe/dhcpopts.h> +#include <gpxe/dhcppkt.h> + +/** @file + * + * DHCP packets + * + */ + +/**************************************************************************** + * + * DHCP packet raw interface + * + */ + +/** + * Calculate used length of an IPv4 field within a DHCP packet + * + * @v data Field data + * @v len Length of field + * @ret used Used length of field + */ +static size_t used_len_ipv4 ( const void *data, size_t len __unused ) { + const struct in_addr *in = data; + + return ( in->s_addr ? sizeof ( *in ) : 0 ); +} + +/** + * Calculate used length of a string field within a DHCP packet + * + * @v data Field data + * @v len Length of field + * @ret used Used length of field + */ +static size_t used_len_string ( const void *data, size_t len ) { + return strnlen ( data, len ); +} + +/** A dedicated field within a DHCP packet */ +struct dhcp_packet_field { + /** Settings tag number */ + unsigned int tag; + /** Offset within DHCP packet */ + uint16_t offset; + /** Length of field */ + uint16_t len; + /** Calculate used length of field + * + * @v data Field data + * @v len Length of field + * @ret used Used length of field + */ + size_t ( * used_len ) ( const void *data, size_t len ); +}; + +/** Declare a dedicated field within a DHCP packet + * + * @v _tag Settings tag number + * @v _field Field name + * @v _used_len Function to calculate used length of field + */ +#define DHCP_PACKET_FIELD( _tag, _field, _used_len ) { \ + .tag = (_tag), \ + .offset = offsetof ( struct dhcphdr, _field ), \ + .len = sizeof ( ( ( struct dhcphdr * ) 0 )->_field ), \ + .used_len = _used_len, \ + } + +/** Dedicated fields within a DHCP packet */ +static struct dhcp_packet_field dhcp_packet_fields[] = { + DHCP_PACKET_FIELD ( DHCP_EB_YIADDR, yiaddr, used_len_ipv4 ), + DHCP_PACKET_FIELD ( DHCP_EB_SIADDR, siaddr, used_len_ipv4 ), + DHCP_PACKET_FIELD ( DHCP_TFTP_SERVER_NAME, sname, used_len_string ), + DHCP_PACKET_FIELD ( DHCP_BOOTFILE_NAME, file, used_len_string ), +}; + +/** + * Get address of a DHCP packet field + * + * @v dhcphdr DHCP packet header + * @v field DHCP packet field + * @ret data Packet field data + */ +static inline void * dhcp_packet_field ( struct dhcphdr *dhcphdr, + struct dhcp_packet_field *field ) { + return ( ( ( void * ) dhcphdr ) + field->offset ); +} + +/** + * Find DHCP packet field corresponding to settings tag number + * + * @v tag Settings tag number + * @ret field DHCP packet field, or NULL + */ +static struct dhcp_packet_field * +find_dhcp_packet_field ( unsigned int tag ) { + struct dhcp_packet_field *field; + unsigned int i; + + for ( i = 0 ; i < ( sizeof ( dhcp_packet_fields ) / + sizeof ( dhcp_packet_fields[0] ) ) ; i++ ) { + field = &dhcp_packet_fields[i]; + if ( field->tag == tag ) + return field; + } + return NULL; +} + +/** + * Store value of DHCP packet setting + * + * @v dhcppkt DHCP packet + * @v tag Setting tag number + * @v data Setting data, or NULL to clear setting + * @v len Length of setting data + * @ret rc Return status code + */ +int dhcppkt_store ( struct dhcp_packet *dhcppkt, unsigned int tag, + const void *data, size_t len ) { + struct dhcp_packet_field *field; + void *field_data; + int rc; + + /* If this is a special field, fill it in */ + if ( ( field = find_dhcp_packet_field ( tag ) ) != NULL ) { + if ( len > field->len ) + return -ENOSPC; + field_data = dhcp_packet_field ( dhcppkt->dhcphdr, field ); + memset ( field_data, 0, field->len ); + memcpy ( dhcp_packet_field ( dhcppkt->dhcphdr, field ), + data, len ); + /* Erase any equivalent option from the options block */ + dhcpopt_store ( &dhcppkt->options, tag, NULL, 0 ); + return 0; + } + + /* Otherwise, use the generic options block */ + rc = dhcpopt_store ( &dhcppkt->options, tag, data, len ); + + /* Update our used-length field */ + dhcppkt->len = ( offsetof ( struct dhcphdr, options ) + + dhcppkt->options.len ); + + return rc; +} + +/** + * Fetch value of DHCP packet setting + * + * @v dhcppkt DHCP packet + * @v tag Setting tag number + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ +int dhcppkt_fetch ( struct dhcp_packet *dhcppkt, unsigned int tag, + void *data, size_t len ) { + struct dhcp_packet_field *field; + void *field_data; + size_t field_len = 0; + + /* Identify special field, if any */ + if ( ( field = find_dhcp_packet_field ( tag ) ) != NULL ) { + field_data = dhcp_packet_field ( dhcppkt->dhcphdr, field ); + field_len = field->used_len ( field_data, field->len ); + } + + /* Return special field, if it exists and is populated */ + if ( field_len ) { + if ( len > field_len ) + len = field_len; + memcpy ( data, field_data, len ); + return field_len; + } + + /* Otherwise, use the generic options block */ + return dhcpopt_fetch ( &dhcppkt->options, tag, data, len ); +} + +/**************************************************************************** + * + * DHCP packet settings interface + * + */ + +/** + * Store value of DHCP setting + * + * @v settings Settings block + * @v setting Setting to store + * @v data Setting data, or NULL to clear setting + * @v len Length of setting data + * @ret rc Return status code + */ +static int dhcppkt_settings_store ( struct settings *settings, + struct setting *setting, + const void *data, size_t len ) { + struct dhcp_packet *dhcppkt = + container_of ( settings, struct dhcp_packet, settings ); + + return dhcppkt_store ( dhcppkt, setting->tag, data, len ); +} + +/** + * Fetch value of DHCP setting + * + * @v settings Settings block, or NULL to search all blocks + * @v setting Setting to fetch + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ +static int dhcppkt_settings_fetch ( struct settings *settings, + struct setting *setting, + void *data, size_t len ) { + struct dhcp_packet *dhcppkt = + container_of ( settings, struct dhcp_packet, settings ); + + return dhcppkt_fetch ( dhcppkt, setting->tag, data, len ); +} + +/** DHCP settings operations */ +static struct settings_operations dhcppkt_settings_operations = { + .store = dhcppkt_settings_store, + .fetch = dhcppkt_settings_fetch, +}; + +/**************************************************************************** + * + * Constructor + * + */ + +/** + * Initialise DHCP packet + * + * @v dhcppkt DHCP packet structure to fill in + * @v data DHCP packet raw data + * @v max_len Length of raw data buffer + * + * Initialise a DHCP packet structure from a data buffer containing a + * DHCP packet. + */ +void dhcppkt_init ( struct dhcp_packet *dhcppkt, struct dhcphdr *data, + size_t len ) { + dhcppkt->dhcphdr = data; + dhcppkt->max_len = len; + dhcpopt_init ( &dhcppkt->options, &dhcppkt->dhcphdr->options, + ( len - offsetof ( struct dhcphdr, options ) ) ); + dhcppkt->len = ( offsetof ( struct dhcphdr, options ) + + dhcppkt->options.len ); + settings_init ( &dhcppkt->settings, + &dhcppkt_settings_operations, &dhcppkt->refcnt, + DHCP_SETTINGS_NAME, 0 ); +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/ethernet.c b/debian/grub-extras/disabled/gpxe/src/net/ethernet.c new file mode 100644 index 0000000..bf1aec0 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/ethernet.c @@ -0,0 +1,181 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <assert.h> +#include <gpxe/if_arp.h> +#include <gpxe/if_ether.h> +#include <gpxe/in.h> +#include <gpxe/netdevice.h> +#include <gpxe/iobuf.h> +#include <gpxe/ethernet.h> + +/** @file + * + * Ethernet protocol + * + */ + +/** Ethernet broadcast MAC address */ +static uint8_t eth_broadcast[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +/** + * Add Ethernet link-layer header + * + * @v netdev Network device + * @v iobuf I/O buffer + * @v ll_dest Link-layer destination address + * @v ll_source Source link-layer address + * @v net_proto Network-layer protocol, in network-byte order + * @ret rc Return status code + */ +static int eth_push ( struct net_device *netdev __unused, + struct io_buffer *iobuf, const void *ll_dest, + const void *ll_source, uint16_t net_proto ) { + struct ethhdr *ethhdr = iob_push ( iobuf, sizeof ( *ethhdr ) ); + + /* Build Ethernet header */ + memcpy ( ethhdr->h_dest, ll_dest, ETH_ALEN ); + memcpy ( ethhdr->h_source, ll_source, ETH_ALEN ); + ethhdr->h_protocol = net_proto; + + return 0; +} + +/** + * Remove Ethernet link-layer header + * + * @v netdev Network device + * @v iobuf I/O buffer + * @ret ll_dest Link-layer destination address + * @ret ll_source Source link-layer address + * @ret net_proto Network-layer protocol, in network-byte order + * @ret rc Return status code + */ +static int eth_pull ( struct net_device *netdev __unused, + struct io_buffer *iobuf, const void **ll_dest, + const void **ll_source, uint16_t *net_proto ) { + struct ethhdr *ethhdr = iobuf->data; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *ethhdr ) ) { + DBG ( "Ethernet packet too short (%zd bytes)\n", + iob_len ( iobuf ) ); + return -EINVAL; + } + + /* Strip off Ethernet header */ + iob_pull ( iobuf, sizeof ( *ethhdr ) ); + + /* Fill in required fields */ + *ll_dest = ethhdr->h_dest; + *ll_source = ethhdr->h_source; + *net_proto = ethhdr->h_protocol; + + return 0; +} + +/** + * Initialise Ethernet address + * + * @v hw_addr Hardware address + * @v ll_addr Link-layer address + */ +void eth_init_addr ( const void *hw_addr, void *ll_addr ) { + memcpy ( ll_addr, hw_addr, ETH_ALEN ); +} + +/** + * Transcribe Ethernet address + * + * @v ll_addr Link-layer address + * @ret string Link-layer address in human-readable format + */ +const char * eth_ntoa ( const void *ll_addr ) { + static char buf[18]; /* "00:00:00:00:00:00" */ + const uint8_t *eth_addr = ll_addr; + + snprintf ( buf, sizeof (buf), "%02x:%02x:%02x:%02x:%02x:%02x", + eth_addr[0], eth_addr[1], eth_addr[2], + eth_addr[3], eth_addr[4], eth_addr[5] ); + return buf; +} + +/** + * Hash multicast address + * + * @v af Address family + * @v net_addr Network-layer address + * @v ll_addr Link-layer address to fill in + * @ret rc Return status code + */ +int eth_mc_hash ( unsigned int af, const void *net_addr, void *ll_addr ) { + const uint8_t *net_addr_bytes = net_addr; + uint8_t *ll_addr_bytes = ll_addr; + + switch ( af ) { + case AF_INET: + ll_addr_bytes[0] = 0x01; + ll_addr_bytes[1] = 0x00; + ll_addr_bytes[2] = 0x5e; + ll_addr_bytes[3] = net_addr_bytes[1] & 0x7f; + ll_addr_bytes[4] = net_addr_bytes[2]; + ll_addr_bytes[5] = net_addr_bytes[3]; + return 0; + default: + return -ENOTSUP; + } +} + +/** Ethernet protocol */ +struct ll_protocol ethernet_protocol __ll_protocol = { + .name = "Ethernet", + .ll_proto = htons ( ARPHRD_ETHER ), + .hw_addr_len = ETH_ALEN, + .ll_addr_len = ETH_ALEN, + .ll_header_len = ETH_HLEN, + .push = eth_push, + .pull = eth_pull, + .init_addr = eth_init_addr, + .ntoa = eth_ntoa, + .mc_hash = eth_mc_hash, +}; + +/** + * Allocate Ethernet device + * + * @v priv_size Size of driver private data + * @ret netdev Network device, or NULL + */ +struct net_device * alloc_etherdev ( size_t priv_size ) { + struct net_device *netdev; + + netdev = alloc_netdev ( priv_size ); + if ( netdev ) { + netdev->ll_protocol = ðernet_protocol; + netdev->ll_broadcast = eth_broadcast; + netdev->max_pkt_len = ETH_FRAME_LEN; + } + return netdev; +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/fakedhcp.c b/debian/grub-extras/disabled/gpxe/src/net/fakedhcp.c new file mode 100644 index 0000000..ad3f046 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/fakedhcp.c @@ -0,0 +1,217 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <gpxe/settings.h> +#include <gpxe/netdevice.h> +#include <gpxe/dhcppkt.h> +#include <gpxe/fakedhcp.h> + +/** @file + * + * Fake DHCP packets + * + */ + +/** + * Copy settings to DHCP packet + * + * @v dest Destination DHCP packet + * @v source Source settings block + * @v encapsulator Encapsulating setting tag number, or zero + * @ret rc Return status code + */ +static int copy_encap_settings ( struct dhcp_packet *dest, + struct settings *source, + unsigned int encapsulator ) { + struct setting setting = { .name = "" }; + unsigned int subtag; + unsigned int tag; + int len; + int check_len; + int rc; + + for ( subtag = DHCP_MIN_OPTION; subtag <= DHCP_MAX_OPTION; subtag++ ) { + tag = DHCP_ENCAP_OPT ( encapsulator, subtag ); + switch ( tag ) { + case DHCP_EB_ENCAP: + case DHCP_VENDOR_ENCAP: + /* Process encapsulated settings */ + if ( ( rc = copy_encap_settings ( dest, source, + tag ) ) != 0 ) + return rc; + break; + default: + /* Copy setting, if present */ + setting.tag = tag; + len = fetch_setting_len ( source, &setting ); + if ( len < 0 ) + break; + { + char buf[len]; + + check_len = fetch_setting ( source, &setting, + buf, sizeof (buf)); + assert ( check_len == len ); + if ( ( rc = dhcppkt_store ( dest, tag, buf, + sizeof(buf) )) !=0) + return rc; + } + break; + } + } + + return 0; +} + +/** + * Copy settings to DHCP packet + * + * @v dest Destination DHCP packet + * @v source Source settings block + * @ret rc Return status code + */ +static int copy_settings ( struct dhcp_packet *dest, + struct settings *source ) { + return copy_encap_settings ( dest, source, 0 ); +} + +/** + * Create fake DHCPDISCOVER packet + * + * @v netdev Network device + * @v data Buffer for DHCP packet + * @v max_len Size of DHCP packet buffer + * @ret rc Return status code + * + * Used by external code. + */ +int create_fakedhcpdiscover ( struct net_device *netdev, + void *data, size_t max_len ) { + struct dhcp_packet dhcppkt; + struct in_addr ciaddr = { 0 }; + int rc; + + if ( ( rc = dhcp_create_request ( &dhcppkt, netdev, DHCPDISCOVER, + ciaddr, data, max_len ) ) != 0 ) { + DBG ( "Could not create DHCPDISCOVER: %s\n", + strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Create fake DHCPACK packet + * + * @v netdev Network device + * @v data Buffer for DHCP packet + * @v max_len Size of DHCP packet buffer + * @ret rc Return status code + * + * Used by external code. + */ +int create_fakedhcpack ( struct net_device *netdev, + void *data, size_t max_len ) { + struct dhcp_packet dhcppkt; + int rc; + + /* Create base DHCPACK packet */ + if ( ( rc = dhcp_create_packet ( &dhcppkt, netdev, DHCPACK, NULL, 0, + data, max_len ) ) != 0 ) { + DBG ( "Could not create DHCPACK: %s\n", strerror ( rc ) ); + return rc; + } + + /* Merge in globally-scoped settings, then netdev-specific + * settings. Do it in this order so that netdev-specific + * settings take precedence regardless of stated priorities. + */ + if ( ( rc = copy_settings ( &dhcppkt, NULL ) ) != 0 ) { + DBG ( "Could not set DHCPACK global settings: %s\n", + strerror ( rc ) ); + return rc; + } + if ( ( rc = copy_settings ( &dhcppkt, + netdev_settings ( netdev ) ) ) != 0 ) { + DBG ( "Could not set DHCPACK netdev settings: %s\n", + strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Create fake PXE Boot Server ACK packet + * + * @v netdev Network device + * @v data Buffer for DHCP packet + * @v max_len Size of DHCP packet buffer + * @ret rc Return status code + * + * Used by external code. + */ +int create_fakepxebsack ( struct net_device *netdev, + void *data, size_t max_len ) { + struct dhcp_packet dhcppkt; + struct settings *proxy_settings; + struct settings *pxebs_settings; + int rc; + + /* Identify available settings */ + proxy_settings = find_settings ( PROXYDHCP_SETTINGS_NAME ); + pxebs_settings = find_settings ( PXEBS_SETTINGS_NAME ); + if ( ( ! proxy_settings ) && ( ! pxebs_settings ) ) { + /* No PXE boot server; return the regular DHCPACK */ + return create_fakedhcpack ( netdev, data, max_len ); + } + + /* Create base DHCPACK packet */ + if ( ( rc = dhcp_create_packet ( &dhcppkt, netdev, DHCPACK, NULL, 0, + data, max_len ) ) != 0 ) { + DBG ( "Could not create PXE BS ACK: %s\n", + strerror ( rc ) ); + return rc; + } + + /* Merge in ProxyDHCP options */ + if ( proxy_settings && + ( ( rc = copy_settings ( &dhcppkt, proxy_settings ) ) != 0 ) ) { + DBG ( "Could not copy ProxyDHCP settings: %s\n", + strerror ( rc ) ); + return rc; + } + + /* Merge in BootServerDHCP options, if present */ + if ( pxebs_settings && + ( ( rc = copy_settings ( &dhcppkt, pxebs_settings ) ) != 0 ) ) { + DBG ( "Could not copy PXE BS settings: %s\n", + strerror ( rc ) ); + return rc; + } + + return 0; +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/icmp.c b/debian/grub-extras/disabled/gpxe/src/net/icmp.c new file mode 100644 index 0000000..749c345 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/icmp.c @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <errno.h> +#include <gpxe/iobuf.h> +#include <gpxe/in.h> +#include <gpxe/tcpip.h> +#include <gpxe/icmp.h> + +/** @file + * + * ICMP protocol + * + */ + +struct tcpip_protocol icmp_protocol __tcpip_protocol; + +/** + * Process a received packet + * + * @v iobuf I/O buffer + * @v st_src Partially-filled source address + * @v st_dest Partially-filled destination address + * @v pshdr_csum Pseudo-header checksum + * @ret rc Return status code + */ +static int icmp_rx ( struct io_buffer *iobuf, struct sockaddr_tcpip *st_src, + struct sockaddr_tcpip *st_dest, + uint16_t pshdr_csum __unused ) { + struct icmp_header *icmp = iobuf->data; + size_t len = iob_len ( iobuf ); + unsigned int csum; + int rc; + + /* Sanity check */ + if ( len < sizeof ( *icmp ) ) { + DBG ( "ICMP packet too short at %zd bytes (min %zd bytes)\n", + len, sizeof ( *icmp ) ); + rc = -EINVAL; + goto done; + } + + /* Verify checksum */ + csum = tcpip_chksum ( icmp, len ); + if ( csum != 0 ) { + DBG ( "ICMP checksum incorrect (is %04x, should be 0000)\n", + csum ); + DBG_HD ( icmp, len ); + rc = -EINVAL; + goto done; + } + + /* We respond only to pings */ + if ( icmp->type != ICMP_ECHO_REQUEST ) { + DBG ( "ICMP ignoring type %d\n", icmp->type ); + rc = 0; + goto done; + } + + DBG ( "ICMP responding to ping\n" ); + + /* Change type to response and recalculate checksum */ + icmp->type = ICMP_ECHO_RESPONSE; + icmp->chksum = 0; + icmp->chksum = tcpip_chksum ( icmp, len ); + + /* Transmit the response */ + if ( ( rc = tcpip_tx ( iob_disown ( iobuf ), &icmp_protocol, st_dest, + st_src, NULL, NULL ) ) != 0 ) { + DBG ( "ICMP could not transmit ping response: %s\n", + strerror ( rc ) ); + goto done; + } + + done: + free_iob ( iobuf ); + return rc; +} + +/** ICMP TCP/IP protocol */ +struct tcpip_protocol icmp_protocol __tcpip_protocol = { + .name = "ICMP", + .rx = icmp_rx, + .tcpip_proto = IP_ICMP, +}; diff --git a/debian/grub-extras/disabled/gpxe/src/net/infiniband.c b/debian/grub-extras/disabled/gpxe/src/net/infiniband.c new file mode 100644 index 0000000..cd7deae --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/infiniband.c @@ -0,0 +1,923 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <byteswap.h> +#include <errno.h> +#include <assert.h> +#include <gpxe/list.h> +#include <gpxe/if_arp.h> +#include <gpxe/netdevice.h> +#include <gpxe/iobuf.h> +#include <gpxe/ipoib.h> +#include <gpxe/process.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_mi.h> +#include <gpxe/ib_sma.h> + +/** @file + * + * Infiniband protocol + * + */ + +/** List of Infiniband devices */ +struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices ); + +/** List of open Infiniband devices, in reverse order of opening */ +static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices ); + +/*************************************************************************** + * + * Completion queues + * + *************************************************************************** + */ + +/** + * Create completion queue + * + * @v ibdev Infiniband device + * @v num_cqes Number of completion queue entries + * @v op Completion queue operations + * @ret cq New completion queue + */ +struct ib_completion_queue * +ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes, + struct ib_completion_queue_operations *op ) { + struct ib_completion_queue *cq; + int rc; + + DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev ); + + /* Allocate and initialise data structure */ + cq = zalloc ( sizeof ( *cq ) ); + if ( ! cq ) + goto err_alloc_cq; + cq->ibdev = ibdev; + list_add ( &cq->list, &ibdev->cqs ); + cq->num_cqes = num_cqes; + INIT_LIST_HEAD ( &cq->work_queues ); + cq->op = op; + + /* Perform device-specific initialisation and get CQN */ + if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not initialise completion " + "queue: %s\n", ibdev, strerror ( rc ) ); + goto err_dev_create_cq; + } + + DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) " + "with CQN %#lx\n", ibdev, num_cqes, cq, + ib_cq_get_drvdata ( cq ), cq->cqn ); + return cq; + + ibdev->op->destroy_cq ( ibdev, cq ); + err_dev_create_cq: + list_del ( &cq->list ); + free ( cq ); + err_alloc_cq: + return NULL; +} + +/** + * Destroy completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ +void ib_destroy_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n", + ibdev, cq->cqn ); + assert ( list_empty ( &cq->work_queues ) ); + ibdev->op->destroy_cq ( ibdev, cq ); + list_del ( &cq->list ); + free ( cq ); +} + +/** + * Poll completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ +void ib_poll_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + struct ib_work_queue *wq; + + /* Poll completion queue */ + ibdev->op->poll_cq ( ibdev, cq ); + + /* Refill receive work queues */ + list_for_each_entry ( wq, &cq->work_queues, list ) { + if ( ! wq->is_send ) + ib_refill_recv ( ibdev, wq->qp ); + } +} + +/*************************************************************************** + * + * Work queues + * + *************************************************************************** + */ + +/** + * Create queue pair + * + * @v ibdev Infiniband device + * @v type Queue pair type + * @v num_send_wqes Number of send work queue entries + * @v send_cq Send completion queue + * @v num_recv_wqes Number of receive work queue entries + * @v recv_cq Receive completion queue + * @ret qp Queue pair + * + * The queue pair will be left in the INIT state; you must call + * ib_modify_qp() before it is ready to use for sending and receiving. + */ +struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, + enum ib_queue_pair_type type, + unsigned int num_send_wqes, + struct ib_completion_queue *send_cq, + unsigned int num_recv_wqes, + struct ib_completion_queue *recv_cq ) { + struct ib_queue_pair *qp; + size_t total_size; + int rc; + + DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev ); + + /* Allocate and initialise data structure */ + total_size = ( sizeof ( *qp ) + + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) + + ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); + qp = zalloc ( total_size ); + if ( ! qp ) + goto err_alloc_qp; + qp->ibdev = ibdev; + list_add ( &qp->list, &ibdev->qps ); + qp->type = type; + qp->send.qp = qp; + qp->send.is_send = 1; + qp->send.cq = send_cq; + list_add ( &qp->send.list, &send_cq->work_queues ); + qp->send.psn = ( random() & 0xffffffUL ); + qp->send.num_wqes = num_send_wqes; + qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) ); + qp->recv.qp = qp; + qp->recv.cq = recv_cq; + list_add ( &qp->recv.list, &recv_cq->work_queues ); + qp->recv.psn = ( random() & 0xffffffUL ); + qp->recv.num_wqes = num_recv_wqes; + qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) + + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) )); + INIT_LIST_HEAD ( &qp->mgids ); + + /* Perform device-specific initialisation and get QPN */ + if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not initialise queue pair: " + "%s\n", ibdev, strerror ( rc ) ); + goto err_dev_create_qp; + } + DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n", + ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn ); + DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n", + ibdev, qp->qpn, num_send_wqes, qp->send.iobufs, + qp->recv.iobufs ); + DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n", + ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs, + ( ( ( void * ) qp ) + total_size ) ); + + /* Calculate externally-visible QPN */ + switch ( type ) { + case IB_QPT_SMI: + qp->ext_qpn = IB_QPN_SMI; + break; + case IB_QPT_GSI: + qp->ext_qpn = IB_QPN_GSI; + break; + default: + qp->ext_qpn = qp->qpn; + break; + } + if ( qp->ext_qpn != qp->qpn ) { + DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n", + ibdev, qp->qpn, qp->ext_qpn ); + } + + return qp; + + ibdev->op->destroy_qp ( ibdev, qp ); + err_dev_create_qp: + list_del ( &qp->send.list ); + list_del ( &qp->recv.list ); + list_del ( &qp->list ); + free ( qp ); + err_alloc_qp: + return NULL; +} + +/** + * Modify queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v av New address vector, if applicable + * @ret rc Return status code + */ +int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { + int rc; + + DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn ); + + if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Destroy queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ +void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { + struct io_buffer *iobuf; + unsigned int i; + + DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n", + ibdev, qp->qpn ); + + assert ( list_empty ( &qp->mgids ) ); + + /* Perform device-specific destruction */ + ibdev->op->destroy_qp ( ibdev, qp ); + + /* Complete any remaining I/O buffers with errors */ + for ( i = 0 ; i < qp->send.num_wqes ; i++ ) { + if ( ( iobuf = qp->send.iobufs[i] ) != NULL ) + ib_complete_send ( ibdev, qp, iobuf, -ECANCELED ); + } + for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) { + if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) { + ib_complete_recv ( ibdev, qp, NULL, iobuf, + -ECANCELED ); + } + } + + /* Remove work queues from completion queue */ + list_del ( &qp->send.list ); + list_del ( &qp->recv.list ); + + /* Free QP */ + list_del ( &qp->list ); + free ( qp ); +} + +/** + * Find queue pair by QPN + * + * @v ibdev Infiniband device + * @v qpn Queue pair number + * @ret qp Queue pair, or NULL + */ +struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev, + unsigned long qpn ) { + struct ib_queue_pair *qp; + + list_for_each_entry ( qp, &ibdev->qps, list ) { + if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) ) + return qp; + } + return NULL; +} + +/** + * Find queue pair by multicast GID + * + * @v ibdev Infiniband device + * @v gid Multicast GID + * @ret qp Queue pair, or NULL + */ +struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev, + struct ib_gid *gid ) { + struct ib_queue_pair *qp; + struct ib_multicast_gid *mgid; + + list_for_each_entry ( qp, &ibdev->qps, list ) { + list_for_each_entry ( mgid, &qp->mgids, list ) { + if ( memcmp ( &mgid->gid, gid, + sizeof ( mgid->gid ) ) == 0 ) { + return qp; + } + } + } + return NULL; +} + +/** + * Find work queue belonging to completion queue + * + * @v cq Completion queue + * @v qpn Queue pair number + * @v is_send Find send work queue (rather than receive) + * @ret wq Work queue, or NULL if not found + */ +struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, + unsigned long qpn, int is_send ) { + struct ib_work_queue *wq; + + list_for_each_entry ( wq, &cq->work_queues, list ) { + if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) ) + return wq; + } + return NULL; +} + +/** + * Post send work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer + * @ret rc Return status code + */ +int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_address_vector *av, + struct io_buffer *iobuf ) { + struct ib_address_vector av_copy; + int rc; + + /* Check queue fill level */ + if ( qp->send.fill >= qp->send.num_wqes ) { + DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n", + ibdev, qp->qpn ); + return -ENOBUFS; + } + + /* Use default address vector if none specified */ + if ( ! av ) + av = &qp->av; + + /* Make modifiable copy of address vector */ + memcpy ( &av_copy, av, sizeof ( av_copy ) ); + av = &av_copy; + + /* Fill in optional parameters in address vector */ + if ( ! av->qkey ) + av->qkey = qp->qkey; + if ( ! av->rate ) + av->rate = IB_RATE_2_5; + + /* Post to hardware */ + if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: " + "%s\n", ibdev, qp->qpn, strerror ( rc ) ); + return rc; + } + + qp->send.fill++; + return 0; +} + +/** + * Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + */ +int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct io_buffer *iobuf ) { + int rc; + + /* Check packet length */ + if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) { + DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n", + ibdev, qp->qpn, iob_tailroom ( iobuf ) ); + return -EINVAL; + } + + /* Check queue fill level */ + if ( qp->recv.fill >= qp->recv.num_wqes ) { + DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n", + ibdev, qp->qpn ); + return -ENOBUFS; + } + + /* Post to hardware */ + if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: " + "%s\n", ibdev, qp->qpn, strerror ( rc ) ); + return rc; + } + + qp->recv.fill++; + return 0; +} + +/** + * Complete send work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @v rc Completion status code + */ +void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct io_buffer *iobuf, int rc ) { + + if ( qp->send.cq->op->complete_send ) { + qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc ); + } else { + free_iob ( iobuf ); + } + qp->send.fill--; +} + +/** + * Complete receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer + * @v rc Completion status code + */ +void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_address_vector *av, + struct io_buffer *iobuf, int rc ) { + + if ( qp->recv.cq->op->complete_recv ) { + qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc ); + } else { + free_iob ( iobuf ); + } + qp->recv.fill--; +} + +/** + * Refill receive work queue + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ +void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { + struct io_buffer *iobuf; + int rc; + + /* Keep filling while unfilled entries remain */ + while ( qp->recv.fill < qp->recv.num_wqes ) { + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE ); + if ( ! iobuf ) { + /* Non-fatal; we will refill on next attempt */ + return; + } + + /* Post I/O buffer */ + if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not refill: %s\n", + ibdev, strerror ( rc ) ); + free_iob ( iobuf ); + /* Give up */ + return; + } + } +} + +/*************************************************************************** + * + * Link control + * + *************************************************************************** + */ + +/** + * Open port + * + * @v ibdev Infiniband device + * @ret rc Return status code + */ +int ib_open ( struct ib_device *ibdev ) { + int rc; + + /* Increment device open request counter */ + if ( ibdev->open_count++ > 0 ) { + /* Device was already open; do nothing */ + return 0; + } + + /* Create subnet management interface */ + ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI ); + if ( ! ibdev->smi ) { + DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev ); + rc = -ENOMEM; + goto err_create_smi; + } + + /* Create subnet management agent */ + if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n", + ibdev, strerror ( rc ) ); + goto err_create_sma; + } + + /* Create general services interface */ + ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI ); + if ( ! ibdev->gsi ) { + DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev ); + rc = -ENOMEM; + goto err_create_gsi; + } + + /* Open device */ + if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not open: %s\n", + ibdev, strerror ( rc ) ); + goto err_open; + } + + /* Add to head of open devices list */ + list_add ( &ibdev->open_list, &open_ib_devices ); + + assert ( ibdev->open_count == 1 ); + return 0; + + ibdev->op->close ( ibdev ); + err_open: + ib_destroy_mi ( ibdev, ibdev->gsi ); + err_create_gsi: + ib_destroy_sma ( ibdev, ibdev->smi ); + err_create_sma: + ib_destroy_mi ( ibdev, ibdev->smi ); + err_create_smi: + assert ( ibdev->open_count == 1 ); + ibdev->open_count = 0; + return rc; +} + +/** + * Close port + * + * @v ibdev Infiniband device + */ +void ib_close ( struct ib_device *ibdev ) { + + /* Decrement device open request counter */ + ibdev->open_count--; + + /* Close device if this was the last remaining requested opening */ + if ( ibdev->open_count == 0 ) { + list_del ( &ibdev->open_list ); + ib_destroy_mi ( ibdev, ibdev->gsi ); + ib_destroy_sma ( ibdev, ibdev->smi ); + ib_destroy_mi ( ibdev, ibdev->smi ); + ibdev->op->close ( ibdev ); + } +} + +/*************************************************************************** + * + * Multicast + * + *************************************************************************** + */ + +/** + * Attach to multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + * @ret rc Return status code + * + * Note that this function handles only the local device's attachment + * to the multicast GID; it does not issue the relevant MADs to join + * the multicast group on the subnet. + */ +int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ) { + struct ib_multicast_gid *mgid; + int rc; + + /* Add to software multicast GID list */ + mgid = zalloc ( sizeof ( *mgid ) ); + if ( ! mgid ) { + rc = -ENOMEM; + goto err_alloc_mgid; + } + memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) ); + list_add ( &mgid->list, &qp->mgids ); + + /* Add to hardware multicast GID list */ + if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 ) + goto err_dev_mcast_attach; + + return 0; + + err_dev_mcast_attach: + list_del ( &mgid->list ); + free ( mgid ); + err_alloc_mgid: + return rc; +} + +/** + * Detach from multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + */ +void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ) { + struct ib_multicast_gid *mgid; + + /* Remove from hardware multicast GID list */ + ibdev->op->mcast_detach ( ibdev, qp, gid ); + + /* Remove from software multicast GID list */ + list_for_each_entry ( mgid, &qp->mgids, list ) { + if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) { + list_del ( &mgid->list ); + free ( mgid ); + break; + } + } +} + +/*************************************************************************** + * + * Miscellaneous + * + *************************************************************************** + */ + +/** + * Get Infiniband HCA information + * + * @v ibdev Infiniband device + * @ret hca_guid HCA GUID + * @ret num_ports Number of ports + */ +int ib_get_hca_info ( struct ib_device *ibdev, + struct ib_gid_half *hca_guid ) { + struct ib_device *tmp; + int num_ports = 0; + + /* Search for IB devices with the same physical device to + * identify port count and a suitable Node GUID. + */ + for_each_ibdev ( tmp ) { + if ( tmp->dev != ibdev->dev ) + continue; + if ( num_ports == 0 ) { + memcpy ( hca_guid, &tmp->gid.u.half[1], + sizeof ( *hca_guid ) ); + } + num_ports++; + } + return num_ports; +} + +/** + * Set port information + * + * @v ibdev Infiniband device + * @v mad Set port information MAD + */ +int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) { + int rc; + + /* Adapters with embedded SMAs do not need to support this method */ + if ( ! ibdev->op->set_port_info ) { + DBGC ( ibdev, "IBDEV %p does not support setting port " + "information\n", ibdev ); + return -ENOTSUP; + } + + if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not set port information: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + + return 0; +}; + +/** + * Set partition key table + * + * @v ibdev Infiniband device + * @v mad Set partition key table MAD + */ +int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) { + int rc; + + /* Adapters with embedded SMAs do not need to support this method */ + if ( ! ibdev->op->set_pkey_table ) { + DBGC ( ibdev, "IBDEV %p does not support setting partition " + "key table\n", ibdev ); + return -ENOTSUP; + } + + if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not set partition key table: " + "%s\n", ibdev, strerror ( rc ) ); + return rc; + } + + return 0; +}; + +/*************************************************************************** + * + * Event queues + * + *************************************************************************** + */ + +/** + * Handle Infiniband link state change + * + * @v ibdev Infiniband device + */ +void ib_link_state_changed ( struct ib_device *ibdev ) { + + /* Notify IPoIB of link state change */ + ipoib_link_state_changed ( ibdev ); +} + +/** + * Poll event queue + * + * @v ibdev Infiniband device + */ +void ib_poll_eq ( struct ib_device *ibdev ) { + struct ib_completion_queue *cq; + + /* Poll device's event queue */ + ibdev->op->poll_eq ( ibdev ); + + /* Poll all completion queues */ + list_for_each_entry ( cq, &ibdev->cqs, list ) + ib_poll_cq ( ibdev, cq ); +} + +/** + * Single-step the Infiniband event queue + * + * @v process Infiniband event queue process + */ +static void ib_step ( struct process *process __unused ) { + struct ib_device *ibdev; + + for_each_ibdev ( ibdev ) + ib_poll_eq ( ibdev ); +} + +/** Infiniband event queue process */ +struct process ib_process __permanent_process = { + .list = LIST_HEAD_INIT ( ib_process.list ), + .step = ib_step, +}; + +/*************************************************************************** + * + * Infiniband device creation/destruction + * + *************************************************************************** + */ + +/** + * Allocate Infiniband device + * + * @v priv_size Size of driver private data area + * @ret ibdev Infiniband device, or NULL + */ +struct ib_device * alloc_ibdev ( size_t priv_size ) { + struct ib_device *ibdev; + void *drv_priv; + size_t total_len; + + total_len = ( sizeof ( *ibdev ) + priv_size ); + ibdev = zalloc ( total_len ); + if ( ibdev ) { + drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) ); + ib_set_drvdata ( ibdev, drv_priv ); + INIT_LIST_HEAD ( &ibdev->cqs ); + INIT_LIST_HEAD ( &ibdev->qps ); + ibdev->lid = IB_LID_NONE; + ibdev->pkey = IB_PKEY_NONE; + } + return ibdev; +} + +/** + * Register Infiniband device + * + * @v ibdev Infiniband device + * @ret rc Return status code + */ +int register_ibdev ( struct ib_device *ibdev ) { + int rc; + + /* Add to device list */ + ibdev_get ( ibdev ); + list_add_tail ( &ibdev->list, &ib_devices ); + + /* Add IPoIB device */ + if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n", + ibdev, strerror ( rc ) ); + goto err_ipoib_probe; + } + + DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev, + ibdev->dev->name ); + return 0; + + err_ipoib_probe: + list_del ( &ibdev->list ); + ibdev_put ( ibdev ); + return rc; +} + +/** + * Unregister Infiniband device + * + * @v ibdev Infiniband device + */ +void unregister_ibdev ( struct ib_device *ibdev ) { + + /* Close device */ + ipoib_remove ( ibdev ); + + /* Remove from device list */ + list_del ( &ibdev->list ); + ibdev_put ( ibdev ); + DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev ); +} + +/** + * Find Infiniband device by GID + * + * @v gid GID + * @ret ibdev Infiniband device, or NULL + */ +struct ib_device * find_ibdev ( struct ib_gid *gid ) { + struct ib_device *ibdev; + + for_each_ibdev ( ibdev ) { + if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 ) + return ibdev; + } + return NULL; +} + +/** + * Get most recently opened Infiniband device + * + * @ret ibdev Most recently opened Infiniband device, or NULL + */ +struct ib_device * last_opened_ibdev ( void ) { + struct ib_device *ibdev; + + list_for_each_entry ( ibdev, &open_ib_devices, open_list ) { + assert ( ibdev->open_count != 0 ); + return ibdev; + } + + return NULL; +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_cm.c b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_cm.c new file mode 100644 index 0000000..30a3691 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_cm.c @@ -0,0 +1,411 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <assert.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_mi.h> +#include <gpxe/ib_pathrec.h> +#include <gpxe/ib_cm.h> + +/** + * @file + * + * Infiniband communication management + * + */ + +/** List of connections */ +static LIST_HEAD ( ib_cm_conns ); + +/** + * Send "ready to use" response + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v conn Connection + * @v av Address vector + * @ret rc Return status code + */ +static int ib_cm_send_rtu ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + struct ib_connection *conn, + struct ib_address_vector *av ) { + union ib_mad mad; + struct ib_cm_ready_to_use *ready = + &mad.cm.cm_data.ready_to_use; + int rc; + + /* Construct "ready to use" response */ + memset ( &mad, 0, sizeof ( mad ) ); + mad.hdr.mgmt_class = IB_MGMT_CLASS_CM; + mad.hdr.class_version = IB_CM_CLASS_VERSION; + mad.hdr.method = IB_MGMT_METHOD_SEND; + mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE ); + ready->local_id = htonl ( conn->local_id ); + ready->remote_id = htonl ( conn->remote_id ); + if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){ + DBGC ( conn, "CM %p could not send RTU: %s\n", + conn, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Handle duplicate connection replies + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + * @ret rc Return status code + * + * If a "ready to use" MAD is lost, the peer may resend the connection + * reply. We have to respond to these with duplicate "ready to use" + * MADs, otherwise the peer may time out and drop the connection. + */ +static void ib_cm_connect_rep ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_cm_connect_reply *connect_rep = + &mad->cm.cm_data.connect_reply; + struct ib_connection *conn; + int rc; + + /* Identify connection */ + list_for_each_entry ( conn, &ib_cm_conns, list ) { + if ( ntohl ( connect_rep->remote_id ) != conn->local_id ) + continue; + /* Try to send "ready to use" reply */ + if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) { + /* Ignore errors */ + return; + } + return; + } + + DBG ( "CM unidentified connection %08x\n", + ntohl ( connect_rep->remote_id ) ); +} + +/** Communication management agents */ +struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = { + { + .mgmt_class = IB_MGMT_CLASS_CM, + .class_version = IB_CM_CLASS_VERSION, + .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ), + .handle = ib_cm_connect_rep, + }, +}; + +/** + * Convert connection rejection reason to return status code + * + * @v reason Rejection reason (in network byte order) + * @ret rc Return status code + */ +static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) { + switch ( reason ) { + case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) : + return -ENODEV; + case htons ( IB_CM_REJECT_STALE_CONN ) : + return -EALREADY; + case htons ( IB_CM_REJECT_CONSUMER ) : + return -ENOTTY; + default: + return -EPERM; + } +} + +/** + * Handle connection request transaction completion + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + * @v rc Status code + * @v mad Received MAD (or NULL on error) + * @v av Source address vector (or NULL on error) + */ +static void ib_cm_req_complete ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + struct ib_mad_transaction *madx, + int rc, union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_connection *conn = ib_madx_get_ownerdata ( madx ); + struct ib_queue_pair *qp = conn->qp; + struct ib_cm_common *common = &mad->cm.cm_data.common; + struct ib_cm_connect_reply *connect_rep = + &mad->cm.cm_data.connect_reply; + struct ib_cm_connect_reject *connect_rej = + &mad->cm.cm_data.connect_reject; + void *private_data = NULL; + size_t private_data_len = 0; + + /* Report failures */ + if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) )) + rc = -EIO; + if ( rc != 0 ) { + DBGC ( conn, "CM %p connection request failed: %s\n", + conn, strerror ( rc ) ); + goto out; + } + + /* Record remote communication ID */ + conn->remote_id = ntohl ( common->local_id ); + + /* Handle response */ + switch ( mad->hdr.attr_id ) { + + case htons ( IB_CM_ATTR_CONNECT_REPLY ) : + /* Extract fields */ + qp->av.qpn = ( ntohl ( connect_rep->local_qpn ) >> 8 ); + qp->send.psn = ( ntohl ( connect_rep->starting_psn ) >> 8 ); + private_data = &connect_rep->private_data; + private_data_len = sizeof ( connect_rep->private_data ); + DBGC ( conn, "CM %p connected to QPN %lx PSN %x\n", + conn, qp->av.qpn, qp->send.psn ); + + /* Modify queue pair */ + if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( conn, "CM %p could not modify queue pair: %s\n", + conn, strerror ( rc ) ); + goto out; + } + + /* Send "ready to use" reply */ + if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) { + /* Treat as non-fatal */ + rc = 0; + } + break; + + case htons ( IB_CM_ATTR_CONNECT_REJECT ) : + /* Extract fields */ + DBGC ( conn, "CM %p connection rejected (reason %d)\n", + conn, ntohs ( connect_rej->reason ) ); + /* Private data is valid only for a Consumer Reject */ + if ( connect_rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) { + private_data = &connect_rej->private_data; + private_data_len = sizeof (connect_rej->private_data); + } + rc = ib_cm_rejection_reason_to_rc ( connect_rej->reason ); + break; + + default: + DBGC ( conn, "CM %p unexpected response (attribute %04x)\n", + conn, ntohs ( mad->hdr.attr_id ) ); + rc = -ENOTSUP; + break; + } + + out: + /* Destroy the completed transaction */ + ib_destroy_madx ( ibdev, ibdev->gsi, madx ); + conn->madx = NULL; + + /* Hand off to the upper completion handler */ + conn->op->changed ( ibdev, qp, conn, rc, private_data, + private_data_len ); +} + +/** Connection request operations */ +static struct ib_mad_transaction_operations ib_cm_req_op = { + .complete = ib_cm_req_complete, +}; + +/** + * Handle connection path transaction completion + * + * @v ibdev Infiniband device + * @v path Path + * @v rc Status code + * @v av Address vector, or NULL on error + */ +static void ib_cm_path_complete ( struct ib_device *ibdev, + struct ib_path *path, int rc, + struct ib_address_vector *av ) { + struct ib_connection *conn = ib_path_get_ownerdata ( path ); + struct ib_queue_pair *qp = conn->qp; + union ib_mad mad; + struct ib_cm_connect_request *connect_req = + &mad.cm.cm_data.connect_request; + size_t private_data_len; + + /* Report failures */ + if ( rc != 0 ) { + DBGC ( conn, "CM %p path lookup failed: %s\n", + conn, strerror ( rc ) ); + conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 ); + goto out; + } + + /* Update queue pair peer path */ + memcpy ( &qp->av, av, sizeof ( qp->av ) ); + + /* Construct connection request */ + memset ( &mad, 0, sizeof ( mad ) ); + mad.hdr.mgmt_class = IB_MGMT_CLASS_CM; + mad.hdr.class_version = IB_CM_CLASS_VERSION; + mad.hdr.method = IB_MGMT_METHOD_SEND; + mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST ); + connect_req->local_id = htonl ( conn->local_id ); + memcpy ( &connect_req->service_id, &conn->service_id, + sizeof ( connect_req->service_id ) ); + ib_get_hca_info ( ibdev, &connect_req->local_ca ); + connect_req->local_qpn__responder_resources = + htonl ( ( qp->qpn << 8 ) | 1 ); + connect_req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 ); + connect_req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl = + htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) | + ( 0 << 0 ) ); + connect_req->starting_psn__local_timeout__retry_count = + htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) | + ( 0x07 << 0 ) ); + connect_req->pkey = htons ( ibdev->pkey ); + connect_req->payload_mtu__rdc_exists__rnr_retry = + ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) ); + connect_req->max_cm_retries__srq = + ( ( 0x0f << 4 ) | ( 0 << 3 ) ); + connect_req->primary.local_lid = htons ( ibdev->lid ); + connect_req->primary.remote_lid = htons ( conn->qp->av.lid ); + memcpy ( &connect_req->primary.local_gid, &ibdev->gid, + sizeof ( connect_req->primary.local_gid ) ); + memcpy ( &connect_req->primary.remote_gid, &conn->qp->av.gid, + sizeof ( connect_req->primary.remote_gid ) ); + connect_req->primary.flow_label__rate = + htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) ); + connect_req->primary.hop_limit = 0; + connect_req->primary.sl__subnet_local = + ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) ); + connect_req->primary.local_ack_timeout = ( 0x13 << 3 ); + private_data_len = conn->private_data_len; + if ( private_data_len > sizeof ( connect_req->private_data ) ) + private_data_len = sizeof ( connect_req->private_data ); + memcpy ( &connect_req->private_data, &conn->private_data, + private_data_len ); + + /* Create connection request */ + conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, NULL, + &ib_cm_req_op ); + if ( ! conn->madx ) { + DBGC ( conn, "CM %p could not create connection request\n", + conn ); + conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 ); + goto out; + } + ib_madx_set_ownerdata ( conn->madx, conn ); + + out: + /* Destroy the completed transaction */ + ib_destroy_path ( ibdev, path ); + conn->path = NULL; +} + +/** Connection path operations */ +static struct ib_path_operations ib_cm_path_op = { + .complete = ib_cm_path_complete, +}; + +/** + * Create connection to remote QP + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v dgid Target GID + * @v service_id Target service ID + * @v private_data Connection request private data + * @v private_data_len Length of connection request private data + * @v op Connection operations + * @ret conn Connection + */ +struct ib_connection * +ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *dgid, struct ib_gid_half *service_id, + void *private_data, size_t private_data_len, + struct ib_connection_operations *op ) { + struct ib_connection *conn; + + /* Allocate and initialise request */ + conn = zalloc ( sizeof ( *conn ) + private_data_len ); + if ( ! conn ) + goto err_alloc_conn; + conn->ibdev = ibdev; + conn->qp = qp; + memset ( &qp->av, 0, sizeof ( qp->av ) ); + qp->av.gid_present = 1; + memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) ); + conn->local_id = random(); + memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) ); + conn->op = op; + conn->private_data_len = private_data_len; + memcpy ( &conn->private_data, private_data, private_data_len ); + + /* Create path */ + conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op ); + if ( ! conn->path ) + goto err_create_path; + ib_path_set_ownerdata ( conn->path, conn ); + + /* Add to list of connections */ + list_add ( &conn->list, &ib_cm_conns ); + + DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n", + conn, ibdev, qp->qpn ); + DBGC ( conn, "CM %p connecting to %08x:%08x:%08x:%08x %08x:%08x\n", + conn, ntohl ( dgid->u.dwords[0] ), ntohl ( dgid->u.dwords[1] ), + ntohl ( dgid->u.dwords[2] ), ntohl ( dgid->u.dwords[3] ), + ntohl ( service_id->u.dwords[0] ), + ntohl ( service_id->u.dwords[1] ) ); + + return conn; + + ib_destroy_path ( ibdev, conn->path ); + err_create_path: + free ( conn ); + err_alloc_conn: + return NULL; +} + +/** + * Destroy connection to remote QP + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v conn Connection + */ +void ib_destroy_conn ( struct ib_device *ibdev, + struct ib_queue_pair *qp __unused, + struct ib_connection *conn ) { + + list_del ( &conn->list ); + if ( conn->madx ) + ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx ); + if ( conn->path ) + ib_destroy_path ( ibdev, conn->path ); + free ( conn ); +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_cmrc.c b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_cmrc.c new file mode 100644 index 0000000..2d64811 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_cmrc.c @@ -0,0 +1,436 @@ +/* + * Copyright (C) 2009 Fen Systems Ltd <mbrown@fensystems.co.uk>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +FILE_LICENCE ( BSD2 ); + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <gpxe/iobuf.h> +#include <gpxe/xfer.h> +#include <gpxe/process.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_cm.h> +#include <gpxe/ib_cmrc.h> + +/** + * @file + * + * Infiniband Communication-managed Reliable Connections + * + */ + +/** CMRC number of send WQEs + * + * This is a policy decision. + */ +#define IB_CMRC_NUM_SEND_WQES 4 + +/** CMRC number of receive WQEs + * + * This is a policy decision. + */ +#define IB_CMRC_NUM_RECV_WQES 2 + +/** CMRC number of completion queue entries + * + * This is a policy decision + */ +#define IB_CMRC_NUM_CQES 8 + +/** An Infiniband Communication-Managed Reliable Connection */ +struct ib_cmrc_connection { + /** Reference count */ + struct refcnt refcnt; + /** Data transfer interface */ + struct xfer_interface xfer; + /** Infiniband device */ + struct ib_device *ibdev; + /** Completion queue */ + struct ib_completion_queue *cq; + /** Queue pair */ + struct ib_queue_pair *qp; + /** Connection */ + struct ib_connection *conn; + /** Destination GID */ + struct ib_gid dgid; + /** Service ID */ + struct ib_gid_half service_id; + /** QP is connected */ + int connected; + /** Shutdown process */ + struct process shutdown; +}; + +/** + * Shut down CMRC connection gracefully + * + * @v process Process + * + * The Infiniband data structures are not reference-counted or + * guarded. It is therefore unsafe to shut them down while we may be + * in the middle of a callback from the Infiniband stack (e.g. in a + * receive completion handler). + * + * This shutdown process will run some time after the call to + * ib_cmrc_close(), after control has returned out of the Infiniband + * core, and will shut down the Infiniband interfaces cleanly. + * + * The shutdown process holds an implicit reference on the CMRC + * connection, ensuring that the structure is not freed before the + * shutdown process has run. + */ +static void ib_cmrc_shutdown ( struct process *process ) { + struct ib_cmrc_connection *cmrc = + container_of ( process, struct ib_cmrc_connection, shutdown ); + + DBGC ( cmrc, "CMRC %p shutting down\n", cmrc ); + + /* Shut down Infiniband interface */ + ib_destroy_conn ( cmrc->ibdev, cmrc->qp, cmrc->conn ); + ib_destroy_qp ( cmrc->ibdev, cmrc->qp ); + ib_destroy_cq ( cmrc->ibdev, cmrc->cq ); + ib_close ( cmrc->ibdev ); + + /* Remove process from run queue */ + process_del ( &cmrc->shutdown ); + + /* Drop the remaining reference */ + ref_put ( &cmrc->refcnt ); +} + +/** + * Close CMRC connection + * + * @v cmrc Communication-Managed Reliable Connection + * @v rc Reason for close + */ +static void ib_cmrc_close ( struct ib_cmrc_connection *cmrc, int rc ) { + + /* Close data transfer interface */ + xfer_nullify ( &cmrc->xfer ); + xfer_close ( &cmrc->xfer, rc ); + + /* Schedule shutdown process */ + process_add ( &cmrc->shutdown ); +} + +/** + * Handle change of CMRC connection status + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v conn Connection + * @v rc_cm Connection status code + * @v private_data Private data, if available + * @v private_data_len Length of private data + */ +static void ib_cmrc_changed ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_connection *conn __unused, int rc_cm, + void *private_data, size_t private_data_len ) { + struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp ); + int rc_xfer; + + /* Record connection status */ + if ( rc_cm == 0 ) { + DBGC ( cmrc, "CMRC %p connected\n", cmrc ); + cmrc->connected = 1; + } else { + DBGC ( cmrc, "CMRC %p disconnected: %s\n", + cmrc, strerror ( rc_cm ) ); + cmrc->connected = 0; + } + + /* Pass up any private data */ + DBGC2 ( cmrc, "CMRC %p received private data:\n", cmrc ); + DBGC2_HDA ( cmrc, 0, private_data, private_data_len ); + if ( private_data && + ( rc_xfer = xfer_deliver_raw ( &cmrc->xfer, private_data, + private_data_len ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not deliver private data: %s\n", + cmrc, strerror ( rc_xfer ) ); + ib_cmrc_close ( cmrc, rc_xfer ); + return; + } + + /* If we are disconnected, close the upper connection */ + if ( rc_cm != 0 ) { + ib_cmrc_close ( cmrc, rc_cm ); + return; + } +} + +/** CMRC connection operations */ +static struct ib_connection_operations ib_cmrc_conn_op = { + .changed = ib_cmrc_changed, +}; + +/** + * Handle CMRC send completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @v rc Completion status code + */ +static void ib_cmrc_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct io_buffer *iobuf, int rc ) { + struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp ); + + /* Free the completed I/O buffer */ + free_iob ( iobuf ); + + /* Close the connection on any send errors */ + if ( rc != 0 ) { + DBGC ( cmrc, "CMRC %p send error: %s\n", + cmrc, strerror ( rc ) ); + ib_cmrc_close ( cmrc, rc ); + return; + } +} + +/** + * Handle CMRC receive completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v av Address vector, or NULL + * @v iobuf I/O buffer + * @v rc Completion status code + */ +static void ib_cmrc_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_address_vector *av __unused, + struct io_buffer *iobuf, int rc ) { + struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp ); + + /* Close the connection on any receive errors */ + if ( rc != 0 ) { + DBGC ( cmrc, "CMRC %p receive error: %s\n", + cmrc, strerror ( rc ) ); + free_iob ( iobuf ); + ib_cmrc_close ( cmrc, rc ); + return; + } + + DBGC2 ( cmrc, "CMRC %p received:\n", cmrc ); + DBGC2_HDA ( cmrc, 0, iobuf->data, iob_len ( iobuf ) ); + + /* Pass up data */ + if ( ( rc = xfer_deliver_iob ( &cmrc->xfer, iobuf ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not deliver data: %s\n", + cmrc, strerror ( rc ) ); + ib_cmrc_close ( cmrc, rc ); + return; + } +} + +/** Infiniband CMRC completion operations */ +static struct ib_completion_queue_operations ib_cmrc_completion_ops = { + .complete_send = ib_cmrc_complete_send, + .complete_recv = ib_cmrc_complete_recv, +}; + +/** + * Send data via CMRC + * + * @v xfer Data transfer interface + * @v iobuf Datagram I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int ib_cmrc_xfer_deliver_iob ( struct xfer_interface *xfer, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + struct ib_cmrc_connection *cmrc = + container_of ( xfer, struct ib_cmrc_connection, xfer ); + int rc; + + /* If no connection has yet been attempted, send this datagram + * as the CM REQ private data. Otherwise, send it via the QP. + */ + if ( ! cmrc->connected ) { + + /* Abort if we have already sent a CM connection request */ + if ( cmrc->conn ) { + DBGC ( cmrc, "CMRC %p attempt to send before " + "connection is complete\n", cmrc ); + rc = -EIO; + goto out; + } + + /* Send via CM connection request */ + cmrc->conn = ib_create_conn ( cmrc->ibdev, cmrc->qp, + &cmrc->dgid, &cmrc->service_id, + iobuf->data, iob_len ( iobuf ), + &ib_cmrc_conn_op ); + if ( ! cmrc->conn ) { + DBGC ( cmrc, "CMRC %p could not connect\n", cmrc ); + rc = -ENOMEM; + goto out; + } + + } else { + + /* Send via QP */ + if ( ( rc = ib_post_send ( cmrc->ibdev, cmrc->qp, NULL, + iob_disown ( iobuf ) ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not send: %s\n", + cmrc, strerror ( rc ) ); + goto out; + } + + } + return 0; + + out: + /* Free the I/O buffer if necessary */ + free_iob ( iobuf ); + + /* Close the connection on any errors */ + if ( rc != 0 ) + ib_cmrc_close ( cmrc, rc ); + + return rc; +} + +/** + * Check CMRC flow control window + * + * @v xfer Data transfer interface + * @ret len Length of window + */ +static size_t ib_cmrc_xfer_window ( struct xfer_interface *xfer ) { + struct ib_cmrc_connection *cmrc = + container_of ( xfer, struct ib_cmrc_connection, xfer ); + + /* We indicate a window only when we are successfully + * connected. + */ + return ( cmrc->connected ? IB_MAX_PAYLOAD_SIZE : 0 ); +} + +/** + * Close CMRC data-transfer interface + * + * @v xfer Data transfer interface + * @v rc Reason for close + */ +static void ib_cmrc_xfer_close ( struct xfer_interface *xfer, int rc ) { + struct ib_cmrc_connection *cmrc = + container_of ( xfer, struct ib_cmrc_connection, xfer ); + + DBGC ( cmrc, "CMRC %p closed: %s\n", cmrc, strerror ( rc ) ); + ib_cmrc_close ( cmrc, rc ); +} + +/** CMRC data transfer interface operations */ +static struct xfer_interface_operations ib_cmrc_xfer_operations = { + .close = ib_cmrc_xfer_close, + .vredirect = ignore_xfer_vredirect, + .window = ib_cmrc_xfer_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = ib_cmrc_xfer_deliver_iob, + .deliver_raw = xfer_deliver_as_iob, +}; + +/** + * Open CMRC connection + * + * @v xfer Data transfer interface + * @v ibdev Infiniband device + * @v dgid Destination GID + * @v service_id Service ID + * @ret rc Returns status code + */ +int ib_cmrc_open ( struct xfer_interface *xfer, struct ib_device *ibdev, + struct ib_gid *dgid, struct ib_gid_half *service_id ) { + struct ib_cmrc_connection *cmrc; + int rc; + + /* Allocate and initialise structure */ + cmrc = zalloc ( sizeof ( *cmrc ) ); + if ( ! cmrc ) { + rc = -ENOMEM; + goto err_alloc; + } + xfer_init ( &cmrc->xfer, &ib_cmrc_xfer_operations, &cmrc->refcnt ); + cmrc->ibdev = ibdev; + memcpy ( &cmrc->dgid, dgid, sizeof ( cmrc->dgid ) ); + memcpy ( &cmrc->service_id, service_id, sizeof ( cmrc->service_id ) ); + process_init_stopped ( &cmrc->shutdown, ib_cmrc_shutdown, + &cmrc->refcnt ); + + /* Open Infiniband device */ + if ( ( rc = ib_open ( ibdev ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not open device: %s\n", + cmrc, strerror ( rc ) ); + goto err_open; + } + + /* Create completion queue */ + cmrc->cq = ib_create_cq ( ibdev, IB_CMRC_NUM_CQES, + &ib_cmrc_completion_ops ); + if ( ! cmrc->cq ) { + DBGC ( cmrc, "CMRC %p could not create completion queue\n", + cmrc ); + rc = -ENOMEM; + goto err_create_cq; + } + + /* Create queue pair */ + cmrc->qp = ib_create_qp ( ibdev, IB_QPT_RC, IB_CMRC_NUM_SEND_WQES, + cmrc->cq, IB_CMRC_NUM_RECV_WQES, cmrc->cq ); + if ( ! cmrc->qp ) { + DBGC ( cmrc, "CMRC %p could not create queue pair\n", cmrc ); + rc = -ENOMEM; + goto err_create_qp; + } + ib_qp_set_ownerdata ( cmrc->qp, cmrc ); + DBGC ( cmrc, "CMRC %p using QPN %lx\n", cmrc, cmrc->qp->qpn ); + + /* Attach to parent interface, transfer reference (implicitly) + * to our shutdown process, and return. + */ + xfer_plug_plug ( &cmrc->xfer, xfer ); + return 0; + + ib_destroy_qp ( ibdev, cmrc->qp ); + err_create_qp: + ib_destroy_cq ( ibdev, cmrc->cq ); + err_create_cq: + ib_close ( ibdev ); + err_open: + ref_put ( &cmrc->refcnt ); + err_alloc: + return rc; +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_mcast.c b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_mcast.c new file mode 100644 index 0000000..5cb395d --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_mcast.c @@ -0,0 +1,218 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <gpxe/list.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_mi.h> +#include <gpxe/ib_mcast.h> + +/** @file + * + * Infiniband multicast groups + * + */ + +/** + * Generate multicast membership MAD + * + * @v ibdev Infiniband device + * @v gid Multicast GID + * @v join Join (rather than leave) group + * @v mad MAD to fill in + */ +static void ib_mcast_mad ( struct ib_device *ibdev, struct ib_gid *gid, + int join, union ib_mad *mad ) { + struct ib_mad_sa *sa = &mad->sa; + + /* Construct multicast membership record request */ + memset ( sa, 0, sizeof ( *sa ) ); + sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + sa->mad_hdr.class_version = IB_SA_CLASS_VERSION; + sa->mad_hdr.method = + ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE ); + sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ); + sa->sa_hdr.comp_mask[1] = + htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_JOIN_STATE ); + sa->sa_data.mc_member_record.scope__join_state = 1; + memcpy ( &sa->sa_data.mc_member_record.mgid, gid, + sizeof ( sa->sa_data.mc_member_record.mgid ) ); + memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid, + sizeof ( sa->sa_data.mc_member_record.port_gid ) ); +} + +/** + * Handle multicast membership record join response + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + * @v rc Status code + * @v mad Received MAD (or NULL on error) + * @v av Source address vector (or NULL on error) + */ +static void ib_mcast_complete ( struct ib_device *ibdev, + struct ib_mad_interface *mi __unused, + struct ib_mad_transaction *madx, + int rc, union ib_mad *mad, + struct ib_address_vector *av __unused ) { + struct ib_mc_membership *membership = ib_madx_get_ownerdata ( madx ); + struct ib_queue_pair *qp = membership->qp; + struct ib_gid *gid = &membership->gid; + struct ib_mc_member_record *mc_member_record = + &mad->sa.sa_data.mc_member_record; + int joined; + unsigned long qkey; + + /* Report failures */ + if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) )) + rc = -ENOTCONN; + if ( rc != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx join failed: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + goto out; + } + + /* Extract values from MAD */ + joined = ( mad->hdr.method == IB_MGMT_METHOD_GET_RESP ); + qkey = ntohl ( mc_member_record->qkey ); + DBGC ( ibdev, "IBDEV %p QPN %lx %s %08x:%08x:%08x:%08x qkey %lx\n", + ibdev, qp->qpn, ( joined ? "joined" : "left" ), + ntohl ( gid->u.dwords[0] ), ntohl ( gid->u.dwords[1] ), + ntohl ( gid->u.dwords[2] ), ntohl ( gid->u.dwords[3] ), + qkey ); + + /* Set queue key */ + qp->qkey = qkey; + if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not modify qkey: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + goto out; + } + + out: + /* Destroy the completed transaction */ + ib_destroy_madx ( ibdev, mi, madx ); + membership->madx = NULL; + + /* Hand off to upper completion handler */ + membership->complete ( ibdev, qp, membership, rc, mad ); +} + +/** Multicast membership management transaction completion operations */ +static struct ib_mad_transaction_operations ib_mcast_op = { + .complete = ib_mcast_complete, +}; + +/** + * Join multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v membership Multicast group membership + * @v gid Multicast GID to join + * @v joined Join completion handler + * @ret rc Return status code + */ +int ib_mcast_join ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_mc_membership *membership, struct ib_gid *gid, + void ( * complete ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_mc_membership *membership, + int rc, union ib_mad *mad ) ) { + union ib_mad mad; + int rc; + + DBGC ( ibdev, "IBDEV %p QPN %lx joining %08x:%08x:%08x:%08x\n", + ibdev, qp->qpn, ntohl ( gid->u.dwords[0] ), + ntohl ( gid->u.dwords[1] ), ntohl ( gid->u.dwords[2] ), + ntohl ( gid->u.dwords[3] ) ); + + /* Initialise structure */ + membership->qp = qp; + memcpy ( &membership->gid, gid, sizeof ( membership->gid ) ); + membership->complete = complete; + + /* Attach queue pair to multicast GID */ + if ( ( rc = ib_mcast_attach ( ibdev, qp, gid ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not attach: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + goto err_mcast_attach; + } + + /* Initiate multicast membership join */ + ib_mcast_mad ( ibdev, gid, 1, &mad ); + membership->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, NULL, + &ib_mcast_op ); + if ( ! membership->madx ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not create join " + "transaction\n", ibdev, qp->qpn ); + rc = -ENOMEM; + goto err_create_madx; + } + ib_madx_set_ownerdata ( membership->madx, membership ); + + return 0; + + ib_destroy_madx ( ibdev, ibdev->gsi, membership->madx ); + err_create_madx: + ib_mcast_detach ( ibdev, qp, gid ); + err_mcast_attach: + return rc; +} + +/** + * Leave multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v membership Multicast group membership + */ +void ib_mcast_leave ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_mc_membership *membership ) { + struct ib_gid *gid = &membership->gid; + union ib_mad mad; + int rc; + + DBGC ( ibdev, "IBDEV %p QPN %lx leaving %08x:%08x:%08x:%08x\n", + ibdev, qp->qpn, ntohl ( gid->u.dwords[0] ), + ntohl ( gid->u.dwords[1] ), ntohl ( gid->u.dwords[2] ), + ntohl ( gid->u.dwords[3] ) ); + + /* Detach from multicast GID */ + ib_mcast_detach ( ibdev, qp, &membership->gid ); + + /* Cancel multicast membership join, if applicable */ + if ( membership->madx ) { + ib_destroy_madx ( ibdev, ibdev->gsi, membership->madx ); + membership->madx = NULL; + } + + /* Send a single group leave MAD */ + ib_mcast_mad ( ibdev, &membership->gid, 0, &mad ); + if ( ( rc = ib_mi_send ( ibdev, ibdev->gsi, &mad, NULL ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not send leave request: " + "%s\n", ibdev, qp->qpn, strerror ( rc ) ); + } +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_mi.c b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_mi.c new file mode 100644 index 0000000..7511fd8 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_mi.c @@ -0,0 +1,406 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <byteswap.h> +#include <gpxe/infiniband.h> +#include <gpxe/iobuf.h> +#include <gpxe/ib_mi.h> + +/** + * @file + * + * Infiniband management interfaces + * + */ + +/** Management interface number of send WQEs + * + * This is a policy decision. + */ +#define IB_MI_NUM_SEND_WQES 4 + +/** Management interface number of receive WQEs + * + * This is a policy decision. + */ +#define IB_MI_NUM_RECV_WQES 2 + +/** Management interface number of completion queue entries + * + * This is a policy decision + */ +#define IB_MI_NUM_CQES 8 + +/** TID magic signature */ +#define IB_MI_TID_MAGIC ( ( 'g' << 24 ) | ( 'P' << 16 ) | ( 'X' << 8 ) | 'E' ) + +/** TID to use for next MAD */ +static unsigned int next_tid; + +/** + * Handle received MAD + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + * @ret rc Return status code + */ +static int ib_mi_handle ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_mad_hdr *hdr = &mad->hdr; + struct ib_mad_transaction *madx; + struct ib_mad_agent *agent; + + /* Look for a matching transaction by TID */ + list_for_each_entry ( madx, &mi->madx, list ) { + if ( memcmp ( &hdr->tid, &madx->mad.hdr.tid, + sizeof ( hdr->tid ) ) != 0 ) + continue; + /* Found a matching transaction */ + madx->op->complete ( ibdev, mi, madx, 0, mad, av ); + return 0; + } + + /* If there is no matching transaction, look for a listening agent */ + for_each_table_entry ( agent, IB_MAD_AGENTS ) { + if ( ( ( agent->mgmt_class & IB_MGMT_CLASS_MASK ) != + ( hdr->mgmt_class & IB_MGMT_CLASS_MASK ) ) || + ( agent->class_version != hdr->class_version ) || + ( agent->attr_id != hdr->attr_id ) ) + continue; + /* Found a matching agent */ + agent->handle ( ibdev, mi, mad, av ); + return 0; + } + + /* Otherwise, ignore it */ + DBGC ( mi, "MI %p RX TID %08x%08x ignored\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) ); + return -ENOTSUP; +} + +/** + * Complete receive via management interface + * + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer + * @v rc Completion status code + */ +static void ib_mi_complete_recv ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_address_vector *av, + struct io_buffer *iobuf, int rc ) { + struct ib_mad_interface *mi = ib_qp_get_ownerdata ( qp ); + union ib_mad *mad; + struct ib_mad_hdr *hdr; + + /* Ignore errors */ + if ( rc != 0 ) { + DBGC ( mi, "MI %p RX error: %s\n", mi, strerror ( rc ) ); + goto out; + } + + /* Sanity checks */ + if ( iob_len ( iobuf ) != sizeof ( *mad ) ) { + DBGC ( mi, "MI %p RX bad size (%zd bytes)\n", + mi, iob_len ( iobuf ) ); + DBGC_HDA ( mi, 0, iobuf->data, iob_len ( iobuf ) ); + goto out; + } + mad = iobuf->data; + hdr = &mad->hdr; + if ( hdr->base_version != IB_MGMT_BASE_VERSION ) { + DBGC ( mi, "MI %p RX unsupported base version %x\n", + mi, hdr->base_version ); + DBGC_HDA ( mi, 0, mad, sizeof ( *mad ) ); + goto out; + } + DBGC ( mi, "MI %p RX TID %08x%08x (%02x,%02x,%02x,%04x) status " + "%04x\n", mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ), + hdr->mgmt_class, hdr->class_version, hdr->method, + ntohs ( hdr->attr_id ), ntohs ( hdr->status ) ); + DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) ); + + /* Handle MAD */ + if ( ( rc = ib_mi_handle ( ibdev, mi, mad, av ) ) != 0 ) + goto out; + + out: + free_iob ( iobuf ); +} + +/** Management interface completion operations */ +static struct ib_completion_queue_operations ib_mi_completion_ops = { + .complete_recv = ib_mi_complete_recv, +}; + +/** + * Transmit MAD + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad MAD + * @v av Destination address vector + * @ret rc Return status code + */ +int ib_mi_send ( struct ib_device *ibdev, struct ib_mad_interface *mi, + union ib_mad *mad, struct ib_address_vector *av ) { + struct ib_mad_hdr *hdr = &mad->hdr; + struct io_buffer *iobuf; + int rc; + + /* Set common fields */ + hdr->base_version = IB_MGMT_BASE_VERSION; + if ( ( hdr->tid[0] == 0 ) && ( hdr->tid[1] == 0 ) ) { + hdr->tid[0] = htonl ( IB_MI_TID_MAGIC ); + hdr->tid[1] = htonl ( ++next_tid ); + } + DBGC ( mi, "MI %p TX TID %08x%08x (%02x,%02x,%02x,%04x) status " + "%04x\n", mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ), + hdr->mgmt_class, hdr->class_version, hdr->method, + ntohs ( hdr->attr_id ), ntohs ( hdr->status ) ); + DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) ); + + /* Construct directed route portion of response, if necessary */ + if ( hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ) { + struct ib_mad_smp *smp = &mad->smp; + unsigned int hop_pointer; + unsigned int hop_count; + + smp->mad_hdr.status |= htons ( IB_SMP_STATUS_D_INBOUND ); + hop_pointer = smp->mad_hdr.class_specific.smp.hop_pointer; + hop_count = smp->mad_hdr.class_specific.smp.hop_count; + assert ( hop_count == hop_pointer ); + if ( hop_pointer < ( sizeof ( smp->return_path.hops ) / + sizeof ( smp->return_path.hops[0] ) ) ) { + smp->return_path.hops[hop_pointer] = ibdev->port; + } else { + DBGC ( mi, "MI %p TX TID %08x%08x invalid hop pointer " + "%d\n", mi, ntohl ( hdr->tid[0] ), + ntohl ( hdr->tid[1] ), hop_pointer ); + return -EINVAL; + } + } + + /* Construct I/O buffer */ + iobuf = alloc_iob ( sizeof ( *mad ) ); + if ( ! iobuf ) { + DBGC ( mi, "MI %p could not allocate buffer for TID " + "%08x%08x\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) ); + return -ENOMEM; + } + memcpy ( iob_put ( iobuf, sizeof ( *mad ) ), mad, sizeof ( *mad ) ); + + /* Send I/O buffer */ + if ( ( rc = ib_post_send ( ibdev, mi->qp, av, iobuf ) ) != 0 ) { + DBGC ( mi, "MI %p TX TID %08x%08x failed: %s\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ), + strerror ( rc ) ); + free_iob ( iobuf ); + return rc; + } + + return 0; +} + +/** + * Handle management transaction timer expiry + * + * @v timer Retry timer + * @v expired Failure indicator + */ +static void ib_mi_timer_expired ( struct retry_timer *timer, int expired ) { + struct ib_mad_transaction *madx = + container_of ( timer, struct ib_mad_transaction, timer ); + struct ib_mad_interface *mi = madx->mi; + struct ib_device *ibdev = mi->ibdev; + struct ib_mad_hdr *hdr = &madx->mad.hdr; + + /* Abandon transaction if we have tried too many times */ + if ( expired ) { + DBGC ( mi, "MI %p abandoning TID %08x%08x\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) ); + madx->op->complete ( ibdev, mi, madx, -ETIMEDOUT, NULL, NULL ); + return; + } + + /* Restart retransmission timer */ + start_timer ( timer ); + + /* Resend MAD */ + ib_mi_send ( ibdev, mi, &madx->mad, &madx->av ); +} + +/** + * Create management transaction + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad MAD to send + * @v av Destination address, or NULL to use SM's GSI + * @v op Management transaction operations + * @ret madx Management transaction, or NULL + */ +struct ib_mad_transaction * +ib_create_madx ( struct ib_device *ibdev, struct ib_mad_interface *mi, + union ib_mad *mad, struct ib_address_vector *av, + struct ib_mad_transaction_operations *op ) { + struct ib_mad_transaction *madx; + + /* Allocate and initialise structure */ + madx = zalloc ( sizeof ( *madx ) ); + if ( ! madx ) + return NULL; + madx->mi = mi; + madx->timer.expired = ib_mi_timer_expired; + madx->op = op; + + /* Determine address vector */ + if ( av ) { + memcpy ( &madx->av, av, sizeof ( madx->av ) ); + } else { + madx->av.lid = ibdev->sm_lid; + madx->av.sl = ibdev->sm_sl; + madx->av.qpn = IB_QPN_GSI; + madx->av.qkey = IB_QKEY_GSI; + } + + /* Copy MAD */ + memcpy ( &madx->mad, mad, sizeof ( madx->mad ) ); + + /* Add to list and start timer to send initial MAD */ + list_add ( &madx->list, &mi->madx ); + start_timer_nodelay ( &madx->timer ); + + return madx; +} + +/** + * Destroy management transaction + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + */ +void ib_destroy_madx ( struct ib_device *ibdev __unused, + struct ib_mad_interface *mi __unused, + struct ib_mad_transaction *madx ) { + + /* Stop timer and remove from list */ + stop_timer ( &madx->timer ); + list_del ( &madx->list ); + + /* Free transaction */ + free ( madx ); +} + +/** + * Create management interface + * + * @v ibdev Infiniband device + * @v type Queue pair type + * @ret mi Management agent, or NULL + */ +struct ib_mad_interface * ib_create_mi ( struct ib_device *ibdev, + enum ib_queue_pair_type type ) { + struct ib_mad_interface *mi; + int rc; + + /* Allocate and initialise fields */ + mi = zalloc ( sizeof ( *mi ) ); + if ( ! mi ) + goto err_alloc; + mi->ibdev = ibdev; + INIT_LIST_HEAD ( &mi->madx ); + + /* Create completion queue */ + mi->cq = ib_create_cq ( ibdev, IB_MI_NUM_CQES, &ib_mi_completion_ops ); + if ( ! mi->cq ) { + DBGC ( mi, "MI %p could not allocate completion queue\n", mi ); + goto err_create_cq; + } + + /* Create queue pair */ + mi->qp = ib_create_qp ( ibdev, type, IB_MI_NUM_SEND_WQES, mi->cq, + IB_MI_NUM_RECV_WQES, mi->cq ); + if ( ! mi->qp ) { + DBGC ( mi, "MI %p could not allocate queue pair\n", mi ); + goto err_create_qp; + } + ib_qp_set_ownerdata ( mi->qp, mi ); + DBGC ( mi, "MI %p (%s) running on QPN %#lx\n", + mi, ( ( type == IB_QPT_SMI ) ? "SMI" : "GSI" ), mi->qp->qpn ); + + /* Set queue key */ + mi->qp->qkey = ( ( type == IB_QPT_SMI ) ? IB_QKEY_SMI : IB_QKEY_GSI ); + if ( ( rc = ib_modify_qp ( ibdev, mi->qp ) ) != 0 ) { + DBGC ( mi, "MI %p could not set queue key: %s\n", + mi, strerror ( rc ) ); + goto err_modify_qp; + } + + /* Fill receive ring */ + ib_refill_recv ( ibdev, mi->qp ); + return mi; + + err_modify_qp: + ib_destroy_qp ( ibdev, mi->qp ); + err_create_qp: + ib_destroy_cq ( ibdev, mi->cq ); + err_create_cq: + free ( mi ); + err_alloc: + return NULL; +} + +/** + * Destroy management interface + * + * @v mi Management interface + */ +void ib_destroy_mi ( struct ib_device *ibdev, struct ib_mad_interface *mi ) { + struct ib_mad_transaction *madx; + struct ib_mad_transaction *tmp; + + /* Flush any outstanding requests */ + list_for_each_entry_safe ( madx, tmp, &mi->madx, list ) { + DBGC ( mi, "MI %p destroyed while TID %08x%08x in progress\n", + mi, ntohl ( madx->mad.hdr.tid[0] ), + ntohl ( madx->mad.hdr.tid[1] ) ); + madx->op->complete ( ibdev, mi, madx, -ECANCELED, NULL, NULL ); + } + + ib_destroy_qp ( ibdev, mi->qp ); + ib_destroy_cq ( ibdev, mi->cq ); + free ( mi ); +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_packet.c b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_packet.c new file mode 100644 index 0000000..08820ef --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_packet.c @@ -0,0 +1,244 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <byteswap.h> +#include <gpxe/iobuf.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_packet.h> + +/** + * @file + * + * Infiniband Packet Formats + * + */ + +/** + * Add IB headers + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer to contain headers + * @v qp Queue pair + * @v payload_len Payload length + * @v av Address vector + */ +int ib_push ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_queue_pair *qp, size_t payload_len, + const struct ib_address_vector *av ) { + struct ib_local_route_header *lrh; + struct ib_global_route_header *grh; + struct ib_base_transport_header *bth; + struct ib_datagram_extended_transport_header *deth; + size_t orig_iob_len = iob_len ( iobuf ); + size_t pad_len; + size_t lrh_len; + size_t grh_len; + unsigned int vl; + unsigned int lnh; + + DBGC2 ( ibdev, "IBDEV %p TX %04x:%08lx => %04x:%08lx (key %08lx)\n", + ibdev, ibdev->lid, qp->ext_qpn, av->lid, av->qpn, av->qkey ); + + /* Calculate packet length */ + pad_len = ( (-payload_len) & 0x3 ); + payload_len += pad_len; + payload_len += 4; /* ICRC */ + + /* Reserve space for headers */ + orig_iob_len = iob_len ( iobuf ); + deth = iob_push ( iobuf, sizeof ( *deth ) ); + bth = iob_push ( iobuf, sizeof ( *bth ) ); + grh_len = ( payload_len + iob_len ( iobuf ) - orig_iob_len ); + grh = ( av->gid_present ? + iob_push ( iobuf, sizeof ( *grh ) ) : NULL ); + lrh = iob_push ( iobuf, sizeof ( *lrh ) ); + lrh_len = ( payload_len + iob_len ( iobuf ) - orig_iob_len ); + + /* Construct LRH */ + vl = ( ( qp->ext_qpn == IB_QPN_SMI ) ? IB_VL_SMP : IB_VL_DEFAULT ); + lrh->vl__lver = ( vl << 4 ); + lnh = ( grh ? IB_LNH_GRH : IB_LNH_BTH ); + lrh->sl__lnh = ( ( av->sl << 4 ) | lnh ); + lrh->dlid = htons ( av->lid ); + lrh->length = htons ( lrh_len >> 2 ); + lrh->slid = htons ( ibdev->lid ); + + /* Construct GRH, if required */ + if ( grh ) { + grh->ipver__tclass__flowlabel = + htonl ( IB_GRH_IPVER_IPv6 << 28 ); + grh->paylen = htons ( grh_len ); + grh->nxthdr = IB_GRH_NXTHDR_IBA; + grh->hoplmt = 0; + memcpy ( &grh->sgid, &ibdev->gid, sizeof ( grh->sgid ) ); + memcpy ( &grh->dgid, &av->gid, sizeof ( grh->dgid ) ); + } + + /* Construct BTH */ + bth->opcode = BTH_OPCODE_UD_SEND; + bth->se__m__padcnt__tver = ( pad_len << 4 ); + bth->pkey = htons ( ibdev->pkey ); + bth->dest_qp = htonl ( av->qpn ); + bth->ack__psn = htonl ( ( qp->send.psn++ ) & 0xffffffUL ); + + /* Construct DETH */ + deth->qkey = htonl ( av->qkey ); + deth->src_qp = htonl ( qp->ext_qpn ); + + DBGCP_HDA ( ibdev, 0, iobuf->data, + ( iob_len ( iobuf ) - orig_iob_len ) ); + + return 0; +} + +/** + * Remove IB headers + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer containing headers + * @v qp Queue pair to fill in, or NULL + * @v payload_len Payload length to fill in, or NULL + * @v av Address vector to fill in + */ +int ib_pull ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_queue_pair **qp, size_t *payload_len, + struct ib_address_vector *av ) { + struct ib_local_route_header *lrh; + struct ib_global_route_header *grh; + struct ib_base_transport_header *bth; + struct ib_datagram_extended_transport_header *deth; + size_t orig_iob_len = iob_len ( iobuf ); + unsigned int lnh; + size_t pad_len; + unsigned long qpn; + unsigned int lid; + + /* Clear return values */ + if ( qp ) + *qp = NULL; + if ( payload_len ) + *payload_len = 0; + memset ( av, 0, sizeof ( *av ) ); + + /* Extract LRH */ + if ( iob_len ( iobuf ) < sizeof ( *lrh ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for LRH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + lrh = iobuf->data; + iob_pull ( iobuf, sizeof ( *lrh ) ); + av->lid = ntohs ( lrh->slid ); + av->sl = ( lrh->sl__lnh >> 4 ); + lnh = ( lrh->sl__lnh & 0x3 ); + lid = ntohs ( lrh->dlid ); + + /* Reject unsupported packets */ + if ( ! ( ( lnh == IB_LNH_BTH ) || ( lnh == IB_LNH_GRH ) ) ) { + DBGC ( ibdev, "IBDEV %p RX unsupported LNH %x\n", + ibdev, lnh ); + return -ENOTSUP; + } + + /* Extract GRH, if present */ + if ( lnh == IB_LNH_GRH ) { + if ( iob_len ( iobuf ) < sizeof ( *grh ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) " + "for GRH\n", ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + grh = iobuf->data; + iob_pull ( iobuf, sizeof ( *grh ) ); + av->gid_present = 1; + memcpy ( &av->gid, &grh->sgid, sizeof ( av->gid ) ); + } else { + grh = NULL; + } + + /* Extract BTH */ + if ( iob_len ( iobuf ) < sizeof ( *bth ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for BTH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + bth = iobuf->data; + iob_pull ( iobuf, sizeof ( *bth ) ); + if ( bth->opcode != BTH_OPCODE_UD_SEND ) { + DBGC ( ibdev, "IBDEV %p unsupported BTH opcode %x\n", + ibdev, bth->opcode ); + return -ENOTSUP; + } + qpn = ntohl ( bth->dest_qp ); + + /* Extract DETH */ + if ( iob_len ( iobuf ) < sizeof ( *deth ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for DETH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + deth = iobuf->data; + iob_pull ( iobuf, sizeof ( *deth ) ); + av->qpn = ntohl ( deth->src_qp ); + av->qkey = ntohl ( deth->qkey ); + + /* Calculate payload length, if applicable */ + if ( payload_len ) { + pad_len = ( ( bth->se__m__padcnt__tver >> 4 ) & 0x3 ); + *payload_len = ( ( ntohs ( lrh->length ) << 2 ) + - ( orig_iob_len - iob_len ( iobuf ) ) + - pad_len - 4 /* ICRC */ ); + } + + /* Determine destination QP, if applicable */ + if ( qp ) { + if ( IB_LID_MULTICAST ( lid ) && grh ) { + if ( ! ( *qp = ib_find_qp_mgid ( ibdev, &grh->dgid ))){ + DBGC ( ibdev, "IBDEV %p RX for unknown MGID " + "%08x:%08x:%08x:%08x\n", ibdev, + ntohl ( grh->dgid.u.dwords[0] ), + ntohl ( grh->dgid.u.dwords[1] ), + ntohl ( grh->dgid.u.dwords[2] ), + ntohl ( grh->dgid.u.dwords[3] ) ); + return -ENODEV; + } + } else { + if ( ! ( *qp = ib_find_qp_qpn ( ibdev, qpn ) ) ) { + DBGC ( ibdev, "IBDEV %p RX for nonexistent " + "QPN %lx\n", ibdev, qpn ); + return -ENODEV; + } + } + assert ( *qp ); + } + + DBGC2 ( ibdev, "IBDEV %p RX %04x:%08lx <= %04x:%08lx (key %08x)\n", + ibdev, lid, ( IB_LID_MULTICAST( lid ) ? + ( qp ? (*qp)->ext_qpn : -1UL ) : qpn ), + av->lid, av->qpn, ntohl ( deth->qkey ) ); + DBGCP_HDA ( ibdev, 0, + ( iobuf->data - ( orig_iob_len - iob_len ( iobuf ) ) ), + ( orig_iob_len - iob_len ( iobuf ) ) ); + + return 0; +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_pathrec.c b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_pathrec.c new file mode 100644 index 0000000..136e628 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_pathrec.c @@ -0,0 +1,296 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_mi.h> +#include <gpxe/ib_pathrec.h> + +/** @file + * + * Infiniband path lookups + * + */ + +/** + * Handle path transaction completion + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + * @v rc Status code + * @v mad Received MAD (or NULL on error) + * @v av Source address vector (or NULL on error) + */ +static void ib_path_complete ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + struct ib_mad_transaction *madx, + int rc, union ib_mad *mad, + struct ib_address_vector *av __unused ) { + struct ib_path *path = ib_madx_get_ownerdata ( madx ); + struct ib_gid *dgid = &path->av.gid; + struct ib_path_record *pathrec = &mad->sa.sa_data.path_record; + + /* Report failures */ + if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) )) + rc = -ENETUNREACH; + if ( rc != 0 ) { + DBGC ( ibdev, "IBDEV %p path lookup for %08x:%08x:%08x:%08x " + "failed: %s\n", ibdev, htonl ( dgid->u.dwords[0] ), + htonl ( dgid->u.dwords[1] ), + htonl ( dgid->u.dwords[2] ), + htonl ( dgid->u.dwords[3] ), strerror ( rc ) ); + goto out; + } + + /* Extract values from MAD */ + path->av.lid = ntohs ( pathrec->dlid ); + path->av.sl = ( pathrec->reserved__sl & 0x0f ); + path->av.rate = ( pathrec->rate_selector__rate & 0x3f ); + DBGC ( ibdev, "IBDEV %p path to %08x:%08x:%08x:%08x is %04x sl %d " + "rate %d\n", ibdev, htonl ( dgid->u.dwords[0] ), + htonl ( dgid->u.dwords[1] ), htonl ( dgid->u.dwords[2] ), + htonl ( dgid->u.dwords[3] ), path->av.lid, path->av.sl, + path->av.rate ); + + out: + /* Destroy the completed transaction */ + ib_destroy_madx ( ibdev, mi, madx ); + path->madx = NULL; + + /* Hand off to upper completion handler */ + path->op->complete ( ibdev, path, rc, &path->av ); +} + +/** Path transaction completion operations */ +static struct ib_mad_transaction_operations ib_path_op = { + .complete = ib_path_complete, +}; + +/** + * Create path + * + * @v ibdev Infiniband device + * @v av Address vector to complete + * @v op Path operations + * @ret path Path + */ +struct ib_path * +ib_create_path ( struct ib_device *ibdev, struct ib_address_vector *av, + struct ib_path_operations *op ) { + struct ib_path *path; + union ib_mad mad; + struct ib_mad_sa *sa = &mad.sa; + + /* Allocate and initialise structure */ + path = zalloc ( sizeof ( *path ) ); + if ( ! path ) + goto err_alloc_path; + path->ibdev = ibdev; + memcpy ( &path->av, av, sizeof ( path->av ) ); + path->op = op; + + /* Construct path request */ + memset ( sa, 0, sizeof ( *sa ) ); + sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + sa->mad_hdr.class_version = IB_SA_CLASS_VERSION; + sa->mad_hdr.method = IB_MGMT_METHOD_GET; + sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); + sa->sa_hdr.comp_mask[1] = + htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); + memcpy ( &sa->sa_data.path_record.dgid, &path->av.gid, + sizeof ( sa->sa_data.path_record.dgid ) ); + memcpy ( &sa->sa_data.path_record.sgid, &ibdev->gid, + sizeof ( sa->sa_data.path_record.sgid ) ); + + /* Create management transaction */ + path->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, NULL, + &ib_path_op ); + if ( ! path->madx ) + goto err_create_madx; + ib_madx_set_ownerdata ( path->madx, path ); + + return path; + + ib_destroy_madx ( ibdev, ibdev->gsi, path->madx ); + err_create_madx: + free ( path ); + err_alloc_path: + return NULL; +} + +/** + * Destroy path + * + * @v ibdev Infiniband device + * @v path Path + */ +void ib_destroy_path ( struct ib_device *ibdev, struct ib_path *path ) { + + if ( path->madx ) + ib_destroy_madx ( ibdev, ibdev->gsi, path->madx ); + free ( path ); +} + +/** Number of path cache entries + * + * Must be a power of two. + */ +#define IB_NUM_CACHED_PATHS 4 + +/** A cached path */ +struct ib_cached_path { + /** Path */ + struct ib_path *path; +}; + +/** Path cache */ +static struct ib_cached_path ib_path_cache[IB_NUM_CACHED_PATHS]; + +/** Oldest path cache entry index */ +static unsigned int ib_path_cache_idx; + +/** + * Find path cache entry + * + * @v ibdev Infiniband device + * @v dgid Destination GID + * @ret path Path cache entry, or NULL + */ +static struct ib_cached_path * +ib_find_path_cache_entry ( struct ib_device *ibdev, struct ib_gid *dgid ) { + struct ib_cached_path *cached; + unsigned int i; + + for ( i = 0 ; i < IB_NUM_CACHED_PATHS ; i++ ) { + cached = &ib_path_cache[i]; + if ( ! cached->path ) + continue; + if ( cached->path->ibdev != ibdev ) + continue; + if ( memcmp ( &cached->path->av.gid, dgid, + sizeof ( cached->path->av.gid ) ) != 0 ) + continue; + return cached; + } + + return NULL; +} + +/** + * Handle cached path transaction completion + * + * @v ibdev Infiniband device + * @v path Path + * @v rc Status code + * @v av Address vector, or NULL on error + */ +static void ib_cached_path_complete ( struct ib_device *ibdev, + struct ib_path *path, int rc, + struct ib_address_vector *av __unused ) { + struct ib_cached_path *cached = ib_path_get_ownerdata ( path ); + + /* If the transaction failed, erase the cache entry */ + if ( rc != 0 ) { + /* Destroy the old cache entry */ + ib_destroy_path ( ibdev, path ); + memset ( cached, 0, sizeof ( *cached ) ); + return; + } + + /* Do not destroy the completed transaction; we still need to + * refer to the resolved path. + */ +} + +/** Cached path transaction completion operations */ +static struct ib_path_operations ib_cached_path_op = { + .complete = ib_cached_path_complete, +}; + +/** + * Resolve path + * + * @v ibdev Infiniband device + * @v av Address vector to complete + * @ret rc Return status code + * + * This provides a non-transactional way to resolve a path, via a + * cache similar to ARP. + */ +int ib_resolve_path ( struct ib_device *ibdev, struct ib_address_vector *av ) { + struct ib_gid *gid = &av->gid; + struct ib_cached_path *cached; + unsigned int cache_idx; + + /* Sanity check */ + if ( ! av->gid_present ) { + DBGC ( ibdev, "IBDEV %p attempt to look up path " + "without GID\n", ibdev ); + return -EINVAL; + } + + /* Look in cache for a matching entry */ + cached = ib_find_path_cache_entry ( ibdev, gid ); + if ( cached && cached->path->av.lid ) { + /* Populated entry found */ + av->lid = cached->path->av.lid; + av->rate = cached->path->av.rate; + av->sl = cached->path->av.sl; + DBGC2 ( ibdev, "IBDEV %p cache hit for %08x:%08x:%08x:%08x\n", + ibdev, htonl ( gid->u.dwords[0] ), + htonl ( gid->u.dwords[1] ), htonl ( gid->u.dwords[2] ), + htonl ( gid->u.dwords[3] ) ); + return 0; + } + DBGC ( ibdev, "IBDEV %p cache miss for %08x:%08x:%08x:%08x%s\n", + ibdev, htonl ( gid->u.dwords[0] ), htonl ( gid->u.dwords[1] ), + htonl ( gid->u.dwords[2] ), htonl ( gid->u.dwords[3] ), + ( cached ? " (in progress)" : "" ) ); + + /* If lookup is already in progress, do nothing */ + if ( cached ) + return -ENOENT; + + /* Locate a new cache entry to use */ + cache_idx = ( (ib_path_cache_idx++) % IB_NUM_CACHED_PATHS ); + cached = &ib_path_cache[cache_idx]; + + /* Destroy the old cache entry */ + if ( cached->path ) + ib_destroy_path ( ibdev, cached->path ); + memset ( cached, 0, sizeof ( *cached ) ); + + /* Create new path */ + cached->path = ib_create_path ( ibdev, av, &ib_cached_path_op ); + if ( ! cached->path ) { + DBGC ( ibdev, "IBDEV %p could not create path\n", + ibdev ); + return -ENOMEM; + } + ib_path_set_ownerdata ( cached->path, cached ); + + /* Not found yet */ + return -ENOENT; +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_sma.c b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_sma.c new file mode 100644 index 0000000..eb4f987 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_sma.c @@ -0,0 +1,362 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <byteswap.h> +#include <gpxe/infiniband.h> +#include <gpxe/iobuf.h> +#include <gpxe/ib_mi.h> +#include <gpxe/ib_sma.h> + +/** + * @file + * + * Infiniband Subnet Management Agent + * + */ + +/** + * Node information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_node_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_node_info *node_info = &mad->smp.smp_data.node_info; + int rc; + + /* Fill in information */ + memset ( node_info, 0, sizeof ( *node_info ) ); + node_info->base_version = IB_MGMT_BASE_VERSION; + node_info->class_version = IB_SMP_CLASS_VERSION; + node_info->node_type = IB_NODE_TYPE_HCA; + node_info->num_ports = ib_get_hca_info ( ibdev, &node_info->sys_guid ); + memcpy ( &node_info->node_guid, &node_info->sys_guid, + sizeof ( node_info->node_guid ) ); + memcpy ( &node_info->port_guid, &ibdev->gid.u.half[1], + sizeof ( node_info->port_guid ) ); + node_info->partition_cap = htons ( 1 ); + node_info->local_port_num = ibdev->port; + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send NodeInfo GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * Node description + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_node_desc ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_node_desc *node_desc = &mad->smp.smp_data.node_desc; + struct ib_gid_half *guid = &ibdev->gid.u.half[1]; + int rc; + + /* Fill in information */ + memset ( node_desc, 0, sizeof ( *node_desc ) ); + snprintf ( node_desc->node_string, sizeof ( node_desc->node_string ), + "gPXE %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x (%s)", + guid->u.bytes[0], guid->u.bytes[1], guid->u.bytes[2], + guid->u.bytes[3], guid->u.bytes[4], guid->u.bytes[5], + guid->u.bytes[6], guid->u.bytes[7], ibdev->dev->name ); + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send NodeDesc GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * GUID information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_guid_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_guid_info *guid_info = &mad->smp.smp_data.guid_info; + int rc; + + /* Fill in information */ + memset ( guid_info, 0, sizeof ( *guid_info ) ); + memcpy ( guid_info->guid[0], &ibdev->gid.u.half[1], + sizeof ( guid_info->guid[0] ) ); + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send GuidInfo GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * Set port information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @ret rc Return status code + */ +static int ib_sma_set_port_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad ) { + const struct ib_port_info *port_info = &mad->smp.smp_data.port_info; + unsigned int link_width_enabled; + unsigned int link_speed_enabled; + int rc; + + /* Set parameters */ + memcpy ( &ibdev->gid.u.half[0], port_info->gid_prefix, + sizeof ( ibdev->gid.u.half[0] ) ); + ibdev->lid = ntohs ( port_info->lid ); + ibdev->sm_lid = ntohs ( port_info->mastersm_lid ); + if ( ( link_width_enabled = port_info->link_width_enabled ) ) + ibdev->link_width_enabled = link_width_enabled; + if ( ( link_speed_enabled = + ( port_info->link_speed_active__link_speed_enabled & 0xf ) ) ) + ibdev->link_speed_enabled = link_speed_enabled; + ibdev->sm_sl = ( port_info->neighbour_mtu__mastersm_sl & 0xf ); + DBGC ( mi, "SMA %p set LID %04x SMLID %04x link width %02x speed " + "%02x\n", mi, ibdev->lid, ibdev->sm_lid, + ibdev->link_width_enabled, ibdev->link_speed_enabled ); + + /* Update parameters on device */ + if ( ( rc = ib_set_port_info ( ibdev, mad ) ) != 0 ) { + DBGC ( mi, "SMA %p could not set port information: %s\n", + mi, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Port information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_port_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_port_info *port_info = &mad->smp.smp_data.port_info; + int rc; + + /* Set parameters if applicable */ + if ( mad->hdr.method == IB_MGMT_METHOD_SET ) { + if ( ( rc = ib_sma_set_port_info ( ibdev, mi, mad ) ) != 0 ) { + mad->hdr.status = + htons ( IB_MGMT_STATUS_UNSUPPORTED_METHOD_ATTR ); + /* Fall through to generate GetResponse */ + } + } + + /* Fill in information */ + memset ( port_info, 0, sizeof ( *port_info ) ); + memcpy ( port_info->gid_prefix, &ibdev->gid.u.half[0], + sizeof ( port_info->gid_prefix ) ); + port_info->lid = ntohs ( ibdev->lid ); + port_info->mastersm_lid = ntohs ( ibdev->sm_lid ); + port_info->local_port_num = ibdev->port; + port_info->link_width_enabled = ibdev->link_width_enabled; + port_info->link_width_supported = ibdev->link_width_supported; + port_info->link_width_active = ibdev->link_width_active; + port_info->link_speed_supported__port_state = + ( ( ibdev->link_speed_supported << 4 ) | ibdev->port_state ); + port_info->port_phys_state__link_down_def_state = + ( ( IB_PORT_PHYS_STATE_POLLING << 4 ) | + IB_PORT_PHYS_STATE_POLLING ); + port_info->link_speed_active__link_speed_enabled = + ( ( ibdev->link_speed_active << 4 ) | + ibdev->link_speed_enabled ); + port_info->neighbour_mtu__mastersm_sl = + ( ( IB_MTU_2048 << 4 ) | ibdev->sm_sl ); + port_info->vl_cap__init_type = ( IB_VL_0 << 4 ); + port_info->init_type_reply__mtu_cap = IB_MTU_2048; + port_info->operational_vls__enforcement = ( IB_VL_0 << 4 ); + port_info->guid_cap = 1; + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send PortInfo GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * Set partition key table + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @ret rc Return status code + */ +static int ib_sma_set_pkey_table ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad ) { + struct ib_pkey_table *pkey_table = &mad->smp.smp_data.pkey_table; + int rc; + + /* Set parameters */ + ibdev->pkey = ntohs ( pkey_table->pkey[0] ); + DBGC ( mi, "SMA %p set pkey %04x\n", mi, ibdev->pkey ); + + /* Update parameters on device */ + if ( ( rc = ib_set_pkey_table ( ibdev, mad ) ) != 0 ) { + DBGC ( mi, "SMA %p could not set pkey table: %s\n", + mi, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Partition key table + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_pkey_table ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_pkey_table *pkey_table = &mad->smp.smp_data.pkey_table; + int rc; + + /* Set parameters, if applicable */ + if ( mad->hdr.method == IB_MGMT_METHOD_SET ) { + if ( ( rc = ib_sma_set_pkey_table ( ibdev, mi, mad ) ) != 0 ) { + mad->hdr.status = + htons ( IB_MGMT_STATUS_UNSUPPORTED_METHOD_ATTR ); + /* Fall through to generate GetResponse */ + } + } + + /* Fill in information */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + memset ( pkey_table, 0, sizeof ( *pkey_table ) ); + pkey_table->pkey[0] = htons ( ibdev->pkey ); + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send PKeyTable GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** Subnet management agent */ +struct ib_mad_agent ib_sma_agent[] __ib_mad_agent = { + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_NODE_INFO ), + .handle = ib_sma_node_info, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_NODE_DESC ), + .handle = ib_sma_node_desc, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_GUID_INFO ), + .handle = ib_sma_guid_info, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_PORT_INFO ), + .handle = ib_sma_port_info, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ), + .handle = ib_sma_pkey_table, + }, +}; + +/** + * Create subnet management agent and interface + * + * @v ibdev Infiniband device + * @v mi Management interface + * @ret rc Return status code + */ +int ib_create_sma ( struct ib_device *ibdev, struct ib_mad_interface *mi ) { + + /* Nothing to do */ + DBGC ( ibdev, "IBDEV %p SMA using SMI %p\n", ibdev, mi ); + + return 0; +} + +/** + * Destroy subnet management agent and interface + * + * @v ibdev Infiniband device + * @v mi Management interface + */ +void ib_destroy_sma ( struct ib_device *ibdev __unused, + struct ib_mad_interface *mi __unused ) { + /* Nothing to do */ +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_smc.c b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_smc.c new file mode 100644 index 0000000..d308dd9 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_smc.c @@ -0,0 +1,179 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <byteswap.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_smc.h> + +/** + * @file + * + * Infiniband Subnet Management Client + * + */ + +/** + * Get port information + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_get_port_info ( struct ib_device *ibdev, + ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Construct MAD */ + memset ( mad, 0, sizeof ( *mad ) ); + mad->hdr.base_version = IB_MGMT_BASE_VERSION; + mad->hdr.mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->hdr.class_version = 1; + mad->hdr.method = IB_MGMT_METHOD_GET; + mad->hdr.attr_id = htons ( IB_SMP_ATTR_PORT_INFO ); + mad->hdr.attr_mod = htonl ( ibdev->port ); + + if ( ( rc = local_mad ( ibdev, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get port info: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get GUID information + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_get_guid_info ( struct ib_device *ibdev, + ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Construct MAD */ + memset ( mad, 0, sizeof ( *mad ) ); + mad->hdr.base_version = IB_MGMT_BASE_VERSION; + mad->hdr.mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->hdr.class_version = 1; + mad->hdr.method = IB_MGMT_METHOD_GET; + mad->hdr.attr_id = htons ( IB_SMP_ATTR_GUID_INFO ); + + if ( ( rc = local_mad ( ibdev, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get GUID info: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get partition key table + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_get_pkey_table ( struct ib_device *ibdev, + ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Construct MAD */ + memset ( mad, 0, sizeof ( *mad ) ); + mad->hdr.base_version = IB_MGMT_BASE_VERSION; + mad->hdr.mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->hdr.class_version = 1; + mad->hdr.method = IB_MGMT_METHOD_GET; + mad->hdr.attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ); + + if ( ( rc = local_mad ( ibdev, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get pkey table: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get MAD parameters + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @ret rc Return status code + */ +int ib_smc_update ( struct ib_device *ibdev, ib_local_mad_t local_mad ) { + union ib_mad mad; + struct ib_port_info *port_info = &mad.smp.smp_data.port_info; + struct ib_guid_info *guid_info = &mad.smp.smp_data.guid_info; + struct ib_pkey_table *pkey_table = &mad.smp.smp_data.pkey_table; + int rc; + + /* Port info gives us the link state, the first half of the + * port GID and the SM LID. + */ + if ( ( rc = ib_smc_get_port_info ( ibdev, local_mad, &mad ) ) != 0 ) + return rc; + memcpy ( &ibdev->gid.u.half[0], port_info->gid_prefix, + sizeof ( ibdev->gid.u.half[0] ) ); + ibdev->lid = ntohs ( port_info->lid ); + ibdev->sm_lid = ntohs ( port_info->mastersm_lid ); + ibdev->link_width_enabled = port_info->link_width_enabled; + ibdev->link_width_supported = port_info->link_width_supported; + ibdev->link_width_active = port_info->link_width_active; + ibdev->link_speed_supported = + ( port_info->link_speed_supported__port_state >> 4 ); + ibdev->port_state = + ( port_info->link_speed_supported__port_state & 0xf ); + ibdev->link_speed_active = + ( port_info->link_speed_active__link_speed_enabled >> 4 ); + ibdev->link_speed_enabled = + ( port_info->link_speed_active__link_speed_enabled & 0xf ); + ibdev->sm_sl = ( port_info->neighbour_mtu__mastersm_sl & 0xf ); + + /* GUID info gives us the second half of the port GID */ + if ( ( rc = ib_smc_get_guid_info ( ibdev, local_mad, &mad ) ) != 0 ) + return rc; + memcpy ( &ibdev->gid.u.half[1], guid_info->guid[0], + sizeof ( ibdev->gid.u.half[1] ) ); + + /* Get partition key */ + if ( ( rc = ib_smc_get_pkey_table ( ibdev, local_mad, &mad ) ) != 0 ) + return rc; + ibdev->pkey = ntohs ( pkey_table->pkey[0] ); + + DBGC ( ibdev, "IBDEV %p port GID is %08x:%08x:%08x:%08x\n", ibdev, + htonl ( ibdev->gid.u.dwords[0] ), + htonl ( ibdev->gid.u.dwords[1] ), + htonl ( ibdev->gid.u.dwords[2] ), + htonl ( ibdev->gid.u.dwords[3] ) ); + + return 0; +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_srp.c b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_srp.c new file mode 100644 index 0000000..4191c86 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/infiniband/ib_srp.c @@ -0,0 +1,406 @@ +/* + * Copyright (C) 2009 Fen Systems Ltd <mbrown@fensystems.co.uk>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +FILE_LICENCE ( BSD2 ); + +#include <stdlib.h> +#include <errno.h> +#include <gpxe/srp.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_cmrc.h> +#include <gpxe/ib_srp.h> + +/** + * @file + * + * SCSI RDMA Protocol over Infiniband + * + */ + +/* Disambiguate the various possible EINVALs */ +#define EINVAL_BYTE_STRING_LEN ( EINVAL | EUNIQ_01 ) +#define EINVAL_BYTE_STRING ( EINVAL | EUNIQ_02 ) +#define EINVAL_INTEGER ( EINVAL | EUNIQ_03 ) +#define EINVAL_RP_TOO_SHORT ( EINVAL | EUNIQ_04 ) + +/** IB SRP parse flags */ +enum ib_srp_parse_flags { + IB_SRP_PARSE_REQUIRED = 0x0000, + IB_SRP_PARSE_OPTIONAL = 0x8000, + IB_SRP_PARSE_FLAG_MASK = 0xf000, +}; + +/** IB SRP root path parameters */ +struct ib_srp_root_path { + /** SCSI LUN */ + struct scsi_lun *lun; + /** SRP port IDs */ + struct srp_port_ids *port_ids; + /** IB SRP parameters */ + struct ib_srp_parameters *ib; +}; + +/** + * Parse IB SRP root path byte-string value + * + * @v rp_comp Root path component string + * @v default_value Default value to use if component string is empty + * @ret value Value + */ +static int ib_srp_parse_byte_string ( const char *rp_comp, uint8_t *bytes, + unsigned int size_flags ) { + size_t size = ( size_flags & ~IB_SRP_PARSE_FLAG_MASK ); + size_t rp_comp_len = strlen ( rp_comp ); + char buf[3]; + char *buf_end; + + /* Allow optional components to be empty */ + if ( ( rp_comp_len == 0 ) && + ( size_flags & IB_SRP_PARSE_OPTIONAL ) ) + return 0; + + /* Check string length */ + if ( rp_comp_len != ( 2 * size ) ) + return -EINVAL_BYTE_STRING_LEN; + + /* Parse byte string */ + for ( ; size ; size--, rp_comp += 2, bytes++ ) { + memcpy ( buf, rp_comp, 2 ); + buf[2] = '\0'; + *bytes = strtoul ( buf, &buf_end, 16 ); + if ( buf_end != &buf[2] ) + return -EINVAL_BYTE_STRING; + } + return 0; +} + +/** + * Parse IB SRP root path integer value + * + * @v rp_comp Root path component string + * @v default_value Default value to use if component string is empty + * @ret value Value + */ +static int ib_srp_parse_integer ( const char *rp_comp, int default_value ) { + int value; + char *end; + + value = strtoul ( rp_comp, &end, 16 ); + if ( *end ) + return -EINVAL_INTEGER; + + if ( end == rp_comp ) + return default_value; + + return value; +} + +/** + * Parse IB SRP root path literal component + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_literal ( const char *rp_comp __unused, + struct ib_srp_root_path *rp __unused ) { + /* Ignore */ + return 0; +} + +/** + * Parse IB SRP root path source GID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_sgid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + struct ib_device *ibdev; + + /* Default to the GID of the last opened Infiniband device */ + if ( ( ibdev = last_opened_ibdev() ) != NULL ) + memcpy ( &rp->ib->sgid, &ibdev->gid, sizeof ( rp->ib->sgid ) ); + + return ib_srp_parse_byte_string ( rp_comp, rp->ib->sgid.u.bytes, + ( sizeof ( rp->ib->sgid ) | + IB_SRP_PARSE_OPTIONAL ) ); +} + +/** + * Parse IB SRP root path initiator identifier extension + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_initiator_id_ext ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + struct ib_srp_initiator_port_id *port_id = + ib_srp_initiator_port_id ( rp->port_ids ); + + return ib_srp_parse_byte_string ( rp_comp, port_id->id_ext.u.bytes, + ( sizeof ( port_id->id_ext ) | + IB_SRP_PARSE_OPTIONAL ) ); +} + +/** + * Parse IB SRP root path initiator HCA GUID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_initiator_hca_guid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + struct ib_srp_initiator_port_id *port_id = + ib_srp_initiator_port_id ( rp->port_ids ); + + /* Default to the GUID portion of the source GID */ + memcpy ( &port_id->hca_guid, &rp->ib->sgid.u.half[1], + sizeof ( port_id->hca_guid ) ); + + return ib_srp_parse_byte_string ( rp_comp, port_id->hca_guid.u.bytes, + ( sizeof ( port_id->hca_guid ) | + IB_SRP_PARSE_OPTIONAL ) ); +} + +/** + * Parse IB SRP root path destination GID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_dgid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + return ib_srp_parse_byte_string ( rp_comp, rp->ib->dgid.u.bytes, + ( sizeof ( rp->ib->dgid ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** + * Parse IB SRP root path partition key + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_pkey ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + int pkey; + + if ( ( pkey = ib_srp_parse_integer ( rp_comp, IB_PKEY_NONE ) ) < 0 ) + return pkey; + rp->ib->pkey = pkey; + return 0; +} + +/** + * Parse IB SRP root path service ID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_service_id ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + return ib_srp_parse_byte_string ( rp_comp, rp->ib->service_id.u.bytes, + ( sizeof ( rp->ib->service_id ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** + * Parse IB SRP root path LUN + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_lun ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + return scsi_parse_lun ( rp_comp, rp->lun ); +} + +/** + * Parse IB SRP root path target identifier extension + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_target_id_ext ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + struct ib_srp_target_port_id *port_id = + ib_srp_target_port_id ( rp->port_ids ); + + return ib_srp_parse_byte_string ( rp_comp, port_id->id_ext.u.bytes, + ( sizeof ( port_id->id_ext ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** + * Parse IB SRP root path target I/O controller GUID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_target_ioc_guid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + struct ib_srp_target_port_id *port_id = + ib_srp_target_port_id ( rp->port_ids ); + + return ib_srp_parse_byte_string ( rp_comp, port_id->ioc_guid.u.bytes, + ( sizeof ( port_id->ioc_guid ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** IB SRP root path component parser */ +struct ib_srp_root_path_parser { + /** + * Parse IB SRP root path component + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ + int ( * parse ) ( const char *rp_comp, struct ib_srp_root_path *rp ); +}; + +/** IB SRP root path components */ +static struct ib_srp_root_path_parser ib_srp_rp_parser[] = { + { ib_srp_parse_literal }, + { ib_srp_parse_sgid }, + { ib_srp_parse_initiator_id_ext }, + { ib_srp_parse_initiator_hca_guid }, + { ib_srp_parse_dgid }, + { ib_srp_parse_pkey }, + { ib_srp_parse_service_id }, + { ib_srp_parse_lun }, + { ib_srp_parse_target_id_ext }, + { ib_srp_parse_target_ioc_guid }, +}; + +/** Number of IB SRP root path components */ +#define IB_SRP_NUM_RP_COMPONENTS \ + ( sizeof ( ib_srp_rp_parser ) / sizeof ( ib_srp_rp_parser[0] ) ) + +/** + * Parse IB SRP root path + * + * @v srp SRP device + * @v rp_string Root path + * @ret rc Return status code + */ +static int ib_srp_parse_root_path ( struct srp_device *srp, + const char *rp_string ) { + struct ib_srp_parameters *ib_params = ib_srp_params ( srp ); + struct ib_srp_root_path rp = { + .lun = &srp->lun, + .port_ids = &srp->port_ids, + .ib = ib_params, + }; + char rp_string_copy[ strlen ( rp_string ) + 1 ]; + char *rp_comp[IB_SRP_NUM_RP_COMPONENTS]; + char *rp_string_tmp = rp_string_copy; + unsigned int i = 0; + int rc; + + /* Split root path into component parts */ + strcpy ( rp_string_copy, rp_string ); + while ( 1 ) { + rp_comp[i++] = rp_string_tmp; + if ( i == IB_SRP_NUM_RP_COMPONENTS ) + break; + for ( ; *rp_string_tmp != ':' ; rp_string_tmp++ ) { + if ( ! *rp_string_tmp ) { + DBGC ( srp, "SRP %p root path \"%s\" too " + "short\n", srp, rp_string ); + return -EINVAL_RP_TOO_SHORT; + } + } + *(rp_string_tmp++) = '\0'; + } + + /* Parse root path components */ + for ( i = 0 ; i < IB_SRP_NUM_RP_COMPONENTS ; i++ ) { + if ( ( rc = ib_srp_rp_parser[i].parse ( rp_comp[i], + &rp ) ) != 0 ) { + DBGC ( srp, "SRP %p could not parse \"%s\" in root " + "path \"%s\": %s\n", srp, rp_comp[i], + rp_string, strerror ( rc ) ); + return rc; + } + } + + return 0; +} + +/** + * Connect IB SRP session + * + * @v srp SRP device + * @ret rc Return status code + */ +static int ib_srp_connect ( struct srp_device *srp ) { + struct ib_srp_parameters *ib_params = ib_srp_params ( srp ); + struct ib_device *ibdev; + int rc; + + /* Identify Infiniband device */ + ibdev = find_ibdev ( &ib_params->sgid ); + if ( ! ibdev ) { + DBGC ( srp, "SRP %p could not identify Infiniband device\n", + srp ); + return -ENODEV; + } + + /* Configure remaining SRP parameters */ + srp->memory_handle = ibdev->rdma_key; + + /* Open CMRC socket */ + if ( ( rc = ib_cmrc_open ( &srp->socket, ibdev, &ib_params->dgid, + &ib_params->service_id ) ) != 0 ) { + DBGC ( srp, "SRP %p could not open CMRC socket: %s\n", + srp, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** IB SRP transport type */ +struct srp_transport_type ib_srp_transport = { + .priv_len = sizeof ( struct ib_srp_parameters ), + .parse_root_path = ib_srp_parse_root_path, + .connect = ib_srp_connect, +}; diff --git a/debian/grub-extras/disabled/gpxe/src/net/iobpad.c b/debian/grub-extras/disabled/gpxe/src/net/iobpad.c new file mode 100644 index 0000000..cbae221 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/iobpad.c @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** + * @file + * + * I/O buffer padding + * + */ + +#include <string.h> +#include <gpxe/iobuf.h> + +/** + * Pad I/O buffer + * + * @v iobuf I/O buffer + * @v min_len Minimum length + * + * This function pads and aligns I/O buffers, for devices that + * aren't capable of padding in hardware, or that require specific + * alignment in TX buffers. The packet data will end up aligned to a + * multiple of @c IOB_ALIGN. + * + * @c min_len must not exceed @v IOB_ZLEN. + */ +void iob_pad ( struct io_buffer *iobuf, size_t min_len ) { + void *data; + size_t len; + size_t headroom; + signed int pad_len; + + assert ( min_len <= IOB_ZLEN ); + + /* Move packet data to start of I/O buffer. This will both + * align the data (since I/O buffers are aligned to + * IOB_ALIGN) and give us sufficient space for the + * zero-padding + */ + data = iobuf->data; + len = iob_len ( iobuf ); + headroom = iob_headroom ( iobuf ); + iob_push ( iobuf, headroom ); + memmove ( iobuf->data, data, len ); + iob_unput ( iobuf, headroom ); + + /* Pad to minimum packet length */ + pad_len = ( min_len - iob_len ( iobuf ) ); + if ( pad_len > 0 ) + memset ( iob_put ( iobuf, pad_len ), 0, pad_len ); +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/ipv4.c b/debian/grub-extras/disabled/gpxe/src/net/ipv4.c new file mode 100644 index 0000000..d0c2015 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/ipv4.c @@ -0,0 +1,635 @@ +#include <string.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <byteswap.h> +#include <gpxe/list.h> +#include <gpxe/in.h> +#include <gpxe/arp.h> +#include <gpxe/if_ether.h> +#include <gpxe/iobuf.h> +#include <gpxe/netdevice.h> +#include <gpxe/ip.h> +#include <gpxe/tcpip.h> +#include <gpxe/dhcp.h> +#include <gpxe/settings.h> + +/** @file + * + * IPv4 protocol + * + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/* Unique IP datagram identification number */ +static uint16_t next_ident = 0; + +struct net_protocol ipv4_protocol; + +/** List of IPv4 miniroutes */ +struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes ); + +/** List of fragment reassembly buffers */ +static LIST_HEAD ( frag_buffers ); + +/** + * Add IPv4 minirouting table entry + * + * @v netdev Network device + * @v address IPv4 address + * @v netmask Subnet mask + * @v gateway Gateway address (or @c INADDR_NONE for no gateway) + * @ret miniroute Routing table entry, or NULL + */ +static struct ipv4_miniroute * __malloc +add_ipv4_miniroute ( struct net_device *netdev, struct in_addr address, + struct in_addr netmask, struct in_addr gateway ) { + struct ipv4_miniroute *miniroute; + + DBG ( "IPv4 add %s", inet_ntoa ( address ) ); + DBG ( "/%s ", inet_ntoa ( netmask ) ); + if ( gateway.s_addr != INADDR_NONE ) + DBG ( "gw %s ", inet_ntoa ( gateway ) ); + DBG ( "via %s\n", netdev->name ); + + /* Allocate and populate miniroute structure */ + miniroute = malloc ( sizeof ( *miniroute ) ); + if ( ! miniroute ) { + DBG ( "IPv4 could not add miniroute\n" ); + return NULL; + } + + /* Record routing information */ + miniroute->netdev = netdev_get ( netdev ); + miniroute->address = address; + miniroute->netmask = netmask; + miniroute->gateway = gateway; + + /* Add to end of list if we have a gateway, otherwise + * to start of list. + */ + if ( gateway.s_addr != INADDR_NONE ) { + list_add_tail ( &miniroute->list, &ipv4_miniroutes ); + } else { + list_add ( &miniroute->list, &ipv4_miniroutes ); + } + + return miniroute; +} + +/** + * Delete IPv4 minirouting table entry + * + * @v miniroute Routing table entry + */ +static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) { + + DBG ( "IPv4 del %s", inet_ntoa ( miniroute->address ) ); + DBG ( "/%s ", inet_ntoa ( miniroute->netmask ) ); + if ( miniroute->gateway.s_addr != INADDR_NONE ) + DBG ( "gw %s ", inet_ntoa ( miniroute->gateway ) ); + DBG ( "via %s\n", miniroute->netdev->name ); + + netdev_put ( miniroute->netdev ); + list_del ( &miniroute->list ); + free ( miniroute ); +} + +/** + * Perform IPv4 routing + * + * @v dest Final destination address + * @ret dest Next hop destination address + * @ret miniroute Routing table entry to use, or NULL if no route + * + * If the route requires use of a gateway, the next hop destination + * address will be overwritten with the gateway address. + */ +static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) { + struct ipv4_miniroute *miniroute; + int local; + int has_gw; + + /* Never attempt to route the broadcast address */ + if ( dest->s_addr == INADDR_BROADCAST ) + return NULL; + + /* Find first usable route in routing table */ + list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) { + local = ( ( ( dest->s_addr ^ miniroute->address.s_addr ) + & miniroute->netmask.s_addr ) == 0 ); + has_gw = ( miniroute->gateway.s_addr != INADDR_NONE ); + if ( local || has_gw ) { + if ( ! local ) + *dest = miniroute->gateway; + return miniroute; + } + } + + return NULL; +} + +/** + * Fragment reassembly counter timeout + * + * @v timer Retry timer + * @v over If asserted, the timer is greater than @c MAX_TIMEOUT + */ +static void ipv4_frag_expired ( struct retry_timer *timer __unused, + int over ) { + if ( over ) { + DBG ( "Fragment reassembly timeout" ); + /* Free the fragment buffer */ + } +} + +/** + * Free fragment buffer + * + * @v fragbug Fragment buffer + */ +static void free_fragbuf ( struct frag_buffer *fragbuf ) { + free ( fragbuf ); +} + +/** + * Fragment reassembler + * + * @v iobuf I/O buffer, fragment of the datagram + * @ret frag_iob Reassembled packet, or NULL + */ +static struct io_buffer * ipv4_reassemble ( struct io_buffer * iobuf ) { + struct iphdr *iphdr = iobuf->data; + struct frag_buffer *fragbuf; + + /** + * Check if the fragment belongs to any fragment series + */ + list_for_each_entry ( fragbuf, &frag_buffers, list ) { + if ( fragbuf->ident == iphdr->ident && + fragbuf->src.s_addr == iphdr->src.s_addr ) { + /** + * Check if the packet is the expected fragment + * + * The offset of the new packet must be equal to the + * length of the data accumulated so far (the length of + * the reassembled I/O buffer + */ + if ( iob_len ( fragbuf->frag_iob ) == + ( iphdr->frags & IP_MASK_OFFSET ) ) { + /** + * Append the contents of the fragment to the + * reassembled I/O buffer + */ + iob_pull ( iobuf, sizeof ( *iphdr ) ); + memcpy ( iob_put ( fragbuf->frag_iob, + iob_len ( iobuf ) ), + iobuf->data, iob_len ( iobuf ) ); + free_iob ( iobuf ); + + /** Check if the fragment series is over */ + if ( ! ( iphdr->frags & IP_MASK_MOREFRAGS ) ) { + iobuf = fragbuf->frag_iob; + free_fragbuf ( fragbuf ); + return iobuf; + } + + } else { + /* Discard the fragment series */ + free_fragbuf ( fragbuf ); + free_iob ( iobuf ); + } + return NULL; + } + } + + /** Check if the fragment is the first in the fragment series */ + if ( iphdr->frags & IP_MASK_MOREFRAGS && + ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) { + + /** Create a new fragment buffer */ + fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) ); + fragbuf->ident = iphdr->ident; + fragbuf->src = iphdr->src; + + /* Set up the reassembly I/O buffer */ + fragbuf->frag_iob = alloc_iob ( IP_FRAG_IOB_SIZE ); + iob_pull ( iobuf, sizeof ( *iphdr ) ); + memcpy ( iob_put ( fragbuf->frag_iob, iob_len ( iobuf ) ), + iobuf->data, iob_len ( iobuf ) ); + free_iob ( iobuf ); + + /* Set the reassembly timer */ + fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT; + fragbuf->frag_timer.expired = ipv4_frag_expired; + start_timer ( &fragbuf->frag_timer ); + + /* Add the fragment buffer to the list of fragment buffers */ + list_add ( &fragbuf->list, &frag_buffers ); + } + + return NULL; +} + +/** + * Add IPv4 pseudo-header checksum to existing checksum + * + * @v iobuf I/O buffer + * @v csum Existing checksum + * @ret csum Updated checksum + */ +static uint16_t ipv4_pshdr_chksum ( struct io_buffer *iobuf, uint16_t csum ) { + struct ipv4_pseudo_header pshdr; + struct iphdr *iphdr = iobuf->data; + size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 ); + + /* Build pseudo-header */ + pshdr.src = iphdr->src; + pshdr.dest = iphdr->dest; + pshdr.zero_padding = 0x00; + pshdr.protocol = iphdr->protocol; + pshdr.len = htons ( iob_len ( iobuf ) - hdrlen ); + + /* Update the checksum value */ + return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) ); +} + +/** + * Determine link-layer address + * + * @v dest IPv4 destination address + * @v src IPv4 source address + * @v netdev Network device + * @v ll_dest Link-layer destination address buffer + * @ret rc Return status code + */ +static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src, + struct net_device *netdev, uint8_t *ll_dest ) { + struct ll_protocol *ll_protocol = netdev->ll_protocol; + + if ( dest.s_addr == INADDR_BROADCAST ) { + /* Broadcast address */ + memcpy ( ll_dest, netdev->ll_broadcast, + ll_protocol->ll_addr_len ); + return 0; + } else if ( IN_MULTICAST ( ntohl ( dest.s_addr ) ) ) { + return ll_protocol->mc_hash ( AF_INET, &dest, ll_dest ); + } else { + /* Unicast address: resolve via ARP */ + return arp_resolve ( netdev, &ipv4_protocol, &dest, + &src, ll_dest ); + } +} + +/** + * Transmit IP packet + * + * @v iobuf I/O buffer + * @v tcpip Transport-layer protocol + * @v st_src Source network-layer address + * @v st_dest Destination network-layer address + * @v netdev Network device to use if no route found, or NULL + * @v trans_csum Transport-layer checksum to complete, or NULL + * @ret rc Status + * + * This function expects a transport-layer segment and prepends the IP header + */ +static int ipv4_tx ( struct io_buffer *iobuf, + struct tcpip_protocol *tcpip_protocol, + struct sockaddr_tcpip *st_src, + struct sockaddr_tcpip *st_dest, + struct net_device *netdev, + uint16_t *trans_csum ) { + struct iphdr *iphdr = iob_push ( iobuf, sizeof ( *iphdr ) ); + struct sockaddr_in *sin_src = ( ( struct sockaddr_in * ) st_src ); + struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest ); + struct ipv4_miniroute *miniroute; + struct in_addr next_hop; + uint8_t ll_dest[MAX_LL_ADDR_LEN]; + int rc; + + /* Fill up the IP header, except source address */ + memset ( iphdr, 0, sizeof ( *iphdr ) ); + iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) ); + iphdr->service = IP_TOS; + iphdr->len = htons ( iob_len ( iobuf ) ); + iphdr->ident = htons ( ++next_ident ); + iphdr->ttl = IP_TTL; + iphdr->protocol = tcpip_protocol->tcpip_proto; + iphdr->dest = sin_dest->sin_addr; + + /* Use routing table to identify next hop and transmitting netdev */ + next_hop = iphdr->dest; + if ( sin_src ) + iphdr->src = sin_src->sin_addr; + if ( ( next_hop.s_addr != INADDR_BROADCAST ) && + ( ! IN_MULTICAST ( ntohl ( next_hop.s_addr ) ) ) && + ( ( miniroute = ipv4_route ( &next_hop ) ) != NULL ) ) { + iphdr->src = miniroute->address; + netdev = miniroute->netdev; + } + if ( ! netdev ) { + DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr->dest ) ); + rc = -ENETUNREACH; + goto err; + } + + /* Determine link-layer destination address */ + if ( ( rc = ipv4_ll_addr ( next_hop, iphdr->src, netdev, + ll_dest ) ) != 0 ) { + DBG ( "IPv4 has no link-layer address for %s: %s\n", + inet_ntoa ( next_hop ), strerror ( rc ) ); + goto err; + } + + /* Fix up checksums */ + if ( trans_csum ) + *trans_csum = ipv4_pshdr_chksum ( iobuf, *trans_csum ); + iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) ); + + /* Print IP4 header for debugging */ + DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr->src ) ); + DBG ( "%s len %d proto %d id %04x csum %04x\n", + inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), iphdr->protocol, + ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) ); + + /* Hand off to link layer */ + if ( ( rc = net_tx ( iobuf, netdev, &ipv4_protocol, ll_dest ) ) != 0 ) { + DBG ( "IPv4 could not transmit packet via %s: %s\n", + netdev->name, strerror ( rc ) ); + return rc; + } + + return 0; + + err: + free_iob ( iobuf ); + return rc; +} + +/** + * Process incoming packets + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v ll_source Link-layer destination source + * + * This function expects an IP4 network datagram. It processes the headers + * and sends it to the transport layer. + */ +static int ipv4_rx ( struct io_buffer *iobuf, struct net_device *netdev __unused, + const void *ll_source __unused ) { + struct iphdr *iphdr = iobuf->data; + size_t hdrlen; + size_t len; + union { + struct sockaddr_in sin; + struct sockaddr_tcpip st; + } src, dest; + uint16_t csum; + uint16_t pshdr_csum; + int rc; + + /* Sanity check the IPv4 header */ + if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) { + DBG ( "IPv4 packet too short at %zd bytes (min %zd bytes)\n", + iob_len ( iobuf ), sizeof ( *iphdr ) ); + goto err; + } + if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) { + DBG ( "IPv4 version %#02x not supported\n", iphdr->verhdrlen ); + goto err; + } + hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 ); + if ( hdrlen < sizeof ( *iphdr ) ) { + DBG ( "IPv4 header too short at %zd bytes (min %zd bytes)\n", + hdrlen, sizeof ( *iphdr ) ); + goto err; + } + if ( hdrlen > iob_len ( iobuf ) ) { + DBG ( "IPv4 header too long at %zd bytes " + "(packet is %zd bytes)\n", hdrlen, iob_len ( iobuf ) ); + goto err; + } + if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) { + DBG ( "IPv4 checksum incorrect (is %04x including checksum " + "field, should be 0000)\n", csum ); + goto err; + } + len = ntohs ( iphdr->len ); + if ( len < hdrlen ) { + DBG ( "IPv4 length too short at %zd bytes " + "(header is %zd bytes)\n", len, hdrlen ); + goto err; + } + if ( len > iob_len ( iobuf ) ) { + DBG ( "IPv4 length too long at %zd bytes " + "(packet is %zd bytes)\n", len, iob_len ( iobuf ) ); + goto err; + } + + /* Print IPv4 header for debugging */ + DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) ); + DBG ( "%s len %d proto %d id %04x csum %04x\n", + inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol, + ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) ); + + /* Truncate packet to correct length, calculate pseudo-header + * checksum and then strip off the IPv4 header. + */ + iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) ); + pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM ); + iob_pull ( iobuf, hdrlen ); + + /* Fragment reassembly */ + if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) || + ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) { + /* Pass the fragment to ipv4_reassemble() which either + * returns a fully reassembled I/O buffer or NULL. + */ + iobuf = ipv4_reassemble ( iobuf ); + if ( ! iobuf ) + return 0; + } + + /* Construct socket addresses and hand off to transport layer */ + memset ( &src, 0, sizeof ( src ) ); + src.sin.sin_family = AF_INET; + src.sin.sin_addr = iphdr->src; + memset ( &dest, 0, sizeof ( dest ) ); + dest.sin.sin_family = AF_INET; + dest.sin.sin_addr = iphdr->dest; + if ( ( rc = tcpip_rx ( iobuf, iphdr->protocol, &src.st, + &dest.st, pshdr_csum ) ) != 0 ) { + DBG ( "IPv4 received packet rejected by stack: %s\n", + strerror ( rc ) ); + return rc; + } + + return 0; + + err: + free_iob ( iobuf ); + return -EINVAL; +} + +/** + * Check existence of IPv4 address for ARP + * + * @v netdev Network device + * @v net_addr Network-layer address + * @ret rc Return status code + */ +static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) { + const struct in_addr *address = net_addr; + struct ipv4_miniroute *miniroute; + + list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) { + if ( ( miniroute->netdev == netdev ) && + ( miniroute->address.s_addr == address->s_addr ) ) { + /* Found matching address */ + return 0; + } + } + return -ENOENT; +} + +/** + * Convert IPv4 address to dotted-quad notation + * + * @v in IP address + * @ret string IP address in dotted-quad notation + */ +char * inet_ntoa ( struct in_addr in ) { + static char buf[16]; /* "xxx.xxx.xxx.xxx" */ + uint8_t *bytes = ( uint8_t * ) ∈ + + snprintf ( buf, sizeof (buf), "%d.%d.%d.%d", + bytes[0], bytes[1], bytes[2], bytes[3] ); + return buf; +} + +/** + * Transcribe IP address + * + * @v net_addr IP address + * @ret string IP address in dotted-quad notation + * + */ +static const char * ipv4_ntoa ( const void *net_addr ) { + return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) ); +} + +/** IPv4 protocol */ +struct net_protocol ipv4_protocol __net_protocol = { + .name = "IP", + .net_proto = htons ( ETH_P_IP ), + .net_addr_len = sizeof ( struct in_addr ), + .rx = ipv4_rx, + .ntoa = ipv4_ntoa, +}; + +/** IPv4 TCPIP net protocol */ +struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = { + .name = "IPv4", + .sa_family = AF_INET, + .tx = ipv4_tx, +}; + +/** IPv4 ARP protocol */ +struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = { + .net_protocol = &ipv4_protocol, + .check = ipv4_arp_check, +}; + +/****************************************************************************** + * + * Settings + * + ****************************************************************************** + */ + +/** IPv4 address setting */ +struct setting ip_setting __setting = { + .name = "ip", + .description = "IPv4 address", + .tag = DHCP_EB_YIADDR, + .type = &setting_type_ipv4, +}; + +/** IPv4 subnet mask setting */ +struct setting netmask_setting __setting = { + .name = "netmask", + .description = "IPv4 subnet mask", + .tag = DHCP_SUBNET_MASK, + .type = &setting_type_ipv4, +}; + +/** Default gateway setting */ +struct setting gateway_setting __setting = { + .name = "gateway", + .description = "Default gateway", + .tag = DHCP_ROUTERS, + .type = &setting_type_ipv4, +}; + +/** + * Create IPv4 routing table based on configured settings + * + * @ret rc Return status code + */ +static int ipv4_create_routes ( void ) { + struct ipv4_miniroute *miniroute; + struct ipv4_miniroute *tmp; + struct net_device *netdev; + struct settings *settings; + struct in_addr address = { 0 }; + struct in_addr netmask = { 0 }; + struct in_addr gateway = { INADDR_NONE }; + + /* Delete all existing routes */ + list_for_each_entry_safe ( miniroute, tmp, &ipv4_miniroutes, list ) + del_ipv4_miniroute ( miniroute ); + + /* Create a route for each configured network device */ + for_each_netdev ( netdev ) { + settings = netdev_settings ( netdev ); + /* Get IPv4 address */ + address.s_addr = 0; + fetch_ipv4_setting ( settings, &ip_setting, &address ); + if ( ! address.s_addr ) + continue; + /* Calculate default netmask */ + if ( IN_CLASSA ( ntohl ( address.s_addr ) ) ) { + netmask.s_addr = htonl ( IN_CLASSA_NET ); + } else if ( IN_CLASSB ( ntohl ( address.s_addr ) ) ) { + netmask.s_addr = htonl ( IN_CLASSB_NET ); + } else if ( IN_CLASSC ( ntohl ( address.s_addr ) ) ) { + netmask.s_addr = htonl ( IN_CLASSC_NET ); + } else { + netmask.s_addr = 0; + } + /* Override with subnet mask, if present */ + fetch_ipv4_setting ( settings, &netmask_setting, &netmask ); + /* Get default gateway, if present */ + gateway.s_addr = INADDR_NONE; + fetch_ipv4_setting ( settings, &gateway_setting, &gateway ); + /* Configure route */ + miniroute = add_ipv4_miniroute ( netdev, address, + netmask, gateway ); + if ( ! miniroute ) + return -ENOMEM; + } + + return 0; +} + +/** IPv4 settings applicator */ +struct settings_applicator ipv4_settings_applicator __settings_applicator = { + .apply = ipv4_create_routes, +}; + +/* Drag in ICMP */ +REQUIRE_OBJECT ( icmp ); diff --git a/debian/grub-extras/disabled/gpxe/src/net/netdev_settings.c b/debian/grub-extras/disabled/gpxe/src/net/netdev_settings.c new file mode 100644 index 0000000..b9220f5 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/netdev_settings.c @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <errno.h> +#include <gpxe/dhcp.h> +#include <gpxe/settings.h> +#include <gpxe/netdevice.h> + +/** @file + * + * Network device configuration settings + * + */ + +/** Network device named settings */ +struct setting mac_setting __setting = { + .name = "mac", + .description = "MAC address", + .type = &setting_type_hex, +}; + +/** + * Store value of network device setting + * + * @v settings Settings block + * @v setting Setting to store + * @v data Setting data, or NULL to clear setting + * @v len Length of setting data + * @ret rc Return status code + */ +static int netdev_store ( struct settings *settings, struct setting *setting, + const void *data, size_t len ) { + struct net_device *netdev = container_of ( settings, struct net_device, + settings.settings ); + + if ( setting_cmp ( setting, &mac_setting ) == 0 ) { + if ( len != netdev->ll_protocol->ll_addr_len ) + return -EINVAL; + memcpy ( netdev->ll_addr, data, len ); + return 0; + } + + return generic_settings_store ( settings, setting, data, len ); +} + +/** + * Fetch value of network device setting + * + * @v settings Settings block + * @v setting Setting to fetch + * @v data Setting data, or NULL to clear setting + * @v len Length of setting data + * @ret rc Return status code + */ +static int netdev_fetch ( struct settings *settings, struct setting *setting, + void *data, size_t len ) { + struct net_device *netdev = container_of ( settings, struct net_device, + settings.settings ); + + if ( setting_cmp ( setting, &mac_setting ) == 0 ) { + if ( len > netdev->ll_protocol->ll_addr_len ) + len = netdev->ll_protocol->ll_addr_len; + memcpy ( data, netdev->ll_addr, len ); + return netdev->ll_protocol->ll_addr_len; + } + + return generic_settings_fetch ( settings, setting, data, len ); +} + +/** + * Clear network device settings + * + * @v settings Settings block + */ +static void netdev_clear ( struct settings *settings ) { + generic_settings_clear ( settings ); +} + +/** Network device configuration settings operations */ +struct settings_operations netdev_settings_operations = { + .store = netdev_store, + .fetch = netdev_fetch, + .clear = netdev_clear, +}; diff --git a/debian/grub-extras/disabled/gpxe/src/net/netdevice.c b/debian/grub-extras/disabled/gpxe/src/net/netdevice.c new file mode 100644 index 0000000..ee0d0b7 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/netdevice.c @@ -0,0 +1,633 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <byteswap.h> +#include <string.h> +#include <errno.h> +#include <gpxe/if_ether.h> +#include <gpxe/iobuf.h> +#include <gpxe/tables.h> +#include <gpxe/process.h> +#include <gpxe/init.h> +#include <gpxe/device.h> +#include <gpxe/errortab.h> +#include <gpxe/netdevice.h> + +/** @file + * + * Network device management + * + */ + +/** List of network devices */ +struct list_head net_devices = LIST_HEAD_INIT ( net_devices ); + +/** List of open network devices, in reverse order of opening */ +static struct list_head open_net_devices = LIST_HEAD_INIT ( open_net_devices ); + +/** Default link status code */ +#define EUNKNOWN_LINK_STATUS EINPROGRESS + +/** Human-readable message for the default link status */ +struct errortab netdev_errors[] __errortab = { + { EUNKNOWN_LINK_STATUS, "Unknown" }, +}; + +/** + * Mark network device as having link down + * + * @v netdev Network device + */ +void netdev_link_down ( struct net_device *netdev ) { + + switch ( netdev->link_rc ) { + case 0: + case -EUNKNOWN_LINK_STATUS: + netdev->link_rc = -ENOTCONN; + break; + default: + /* Avoid clobbering a more detailed link status code, + * if one is already set. + */ + break; + } +} + +/** + * Record network device statistic + * + * @v stats Network device statistics + * @v rc Status code + */ +static void netdev_record_stat ( struct net_device_stats *stats, int rc ) { + struct net_device_error *error; + struct net_device_error *least_common_error; + unsigned int i; + + /* If this is not an error, just update the good counter */ + if ( rc == 0 ) { + stats->good++; + return; + } + + /* Update the bad counter */ + stats->bad++; + + /* Locate the appropriate error record */ + least_common_error = &stats->errors[0]; + for ( i = 0 ; i < ( sizeof ( stats->errors ) / + sizeof ( stats->errors[0] ) ) ; i++ ) { + error = &stats->errors[i]; + /* Update matching record, if found */ + if ( error->rc == rc ) { + error->count++; + return; + } + if ( error->count < least_common_error->count ) + least_common_error = error; + } + + /* Overwrite the least common error record */ + least_common_error->rc = rc; + least_common_error->count = 1; +} + +/** + * Transmit raw packet via network device + * + * @v netdev Network device + * @v iobuf I/O buffer + * @ret rc Return status code + * + * Transmits the packet via the specified network device. This + * function takes ownership of the I/O buffer. + */ +int netdev_tx ( struct net_device *netdev, struct io_buffer *iobuf ) { + int rc; + + DBGC ( netdev, "NETDEV %p transmitting %p (%p+%zx)\n", + netdev, iobuf, iobuf->data, iob_len ( iobuf ) ); + + list_add_tail ( &iobuf->list, &netdev->tx_queue ); + + if ( ! ( netdev->state & NETDEV_OPEN ) ) { + rc = -ENETUNREACH; + goto err; + } + + if ( ( rc = netdev->op->transmit ( netdev, iobuf ) ) != 0 ) + goto err; + + return 0; + + err: + netdev_tx_complete_err ( netdev, iobuf, rc ); + return rc; +} + +/** + * Complete network transmission + * + * @v netdev Network device + * @v iobuf I/O buffer + * @v rc Packet status code + * + * The packet must currently be in the network device's TX queue. + */ +void netdev_tx_complete_err ( struct net_device *netdev, + struct io_buffer *iobuf, int rc ) { + + /* Update statistics counter */ + netdev_record_stat ( &netdev->tx_stats, rc ); + if ( rc == 0 ) { + DBGC ( netdev, "NETDEV %p transmission %p complete\n", + netdev, iobuf ); + } else { + DBGC ( netdev, "NETDEV %p transmission %p failed: %s\n", + netdev, iobuf, strerror ( rc ) ); + } + + /* Catch data corruption as early as possible */ + assert ( iobuf->list.next != NULL ); + assert ( iobuf->list.prev != NULL ); + + /* Dequeue and free I/O buffer */ + list_del ( &iobuf->list ); + free_iob ( iobuf ); +} + +/** + * Complete network transmission + * + * @v netdev Network device + * @v rc Packet status code + * + * Completes the oldest outstanding packet in the TX queue. + */ +void netdev_tx_complete_next_err ( struct net_device *netdev, int rc ) { + struct io_buffer *iobuf; + + list_for_each_entry ( iobuf, &netdev->tx_queue, list ) { + netdev_tx_complete_err ( netdev, iobuf, rc ); + return; + } +} + +/** + * Flush device's transmit queue + * + * @v netdev Network device + */ +static void netdev_tx_flush ( struct net_device *netdev ) { + + /* Discard any packets in the TX queue */ + while ( ! list_empty ( &netdev->tx_queue ) ) { + netdev_tx_complete_next_err ( netdev, -ECANCELED ); + } +} + +/** + * Add packet to receive queue + * + * @v netdev Network device + * @v iobuf I/O buffer, or NULL + * + * The packet is added to the network device's RX queue. This + * function takes ownership of the I/O buffer. + */ +void netdev_rx ( struct net_device *netdev, struct io_buffer *iobuf ) { + + DBGC ( netdev, "NETDEV %p received %p (%p+%zx)\n", + netdev, iobuf, iobuf->data, iob_len ( iobuf ) ); + + /* Enqueue packet */ + list_add_tail ( &iobuf->list, &netdev->rx_queue ); + + /* Update statistics counter */ + netdev_record_stat ( &netdev->rx_stats, 0 ); +} + +/** + * Discard received packet + * + * @v netdev Network device + * @v iobuf I/O buffer, or NULL + * @v rc Packet status code + * + * The packet is discarded and an RX error is recorded. This function + * takes ownership of the I/O buffer. @c iobuf may be NULL if, for + * example, the net device wishes to report an error due to being + * unable to allocate an I/O buffer. + */ +void netdev_rx_err ( struct net_device *netdev, + struct io_buffer *iobuf, int rc ) { + + DBGC ( netdev, "NETDEV %p failed to receive %p: %s\n", + netdev, iobuf, strerror ( rc ) ); + + /* Discard packet */ + free_iob ( iobuf ); + + /* Update statistics counter */ + netdev_record_stat ( &netdev->rx_stats, rc ); +} + +/** + * Poll for completed and received packets on network device + * + * @v netdev Network device + * + * Polls the network device for completed transmissions and received + * packets. Any received packets will be added to the RX packet queue + * via netdev_rx(). + */ +void netdev_poll ( struct net_device *netdev ) { + + if ( netdev->state & NETDEV_OPEN ) + netdev->op->poll ( netdev ); +} + +/** + * Remove packet from device's receive queue + * + * @v netdev Network device + * @ret iobuf I/O buffer, or NULL + * + * Removes the first packet from the device's RX queue and returns it. + * Ownership of the packet is transferred to the caller. + */ +struct io_buffer * netdev_rx_dequeue ( struct net_device *netdev ) { + struct io_buffer *iobuf; + + list_for_each_entry ( iobuf, &netdev->rx_queue, list ) { + list_del ( &iobuf->list ); + return iobuf; + } + return NULL; +} + +/** + * Flush device's receive queue + * + * @v netdev Network device + */ +static void netdev_rx_flush ( struct net_device *netdev ) { + struct io_buffer *iobuf; + + /* Discard any packets in the RX queue */ + while ( ( iobuf = netdev_rx_dequeue ( netdev ) ) ) { + netdev_rx_err ( netdev, iobuf, -ECANCELED ); + } +} + +/** + * Free network device + * + * @v refcnt Network device reference counter + */ +static void free_netdev ( struct refcnt *refcnt ) { + struct net_device *netdev = + container_of ( refcnt, struct net_device, refcnt ); + + netdev_tx_flush ( netdev ); + netdev_rx_flush ( netdev ); + clear_settings ( netdev_settings ( netdev ) ); + free ( netdev ); +} + +/** + * Allocate network device + * + * @v priv_size Size of private data area (net_device::priv) + * @ret netdev Network device, or NULL + * + * Allocates space for a network device and its private data area. + */ +struct net_device * alloc_netdev ( size_t priv_size ) { + struct net_device *netdev; + size_t total_len; + + total_len = ( sizeof ( *netdev ) + priv_size ); + netdev = zalloc ( total_len ); + if ( netdev ) { + netdev->refcnt.free = free_netdev; + netdev->link_rc = -EUNKNOWN_LINK_STATUS; + INIT_LIST_HEAD ( &netdev->tx_queue ); + INIT_LIST_HEAD ( &netdev->rx_queue ); + netdev_settings_init ( netdev ); + netdev->priv = ( ( ( void * ) netdev ) + sizeof ( *netdev ) ); + } + return netdev; +} + +/** + * Register network device + * + * @v netdev Network device + * @ret rc Return status code + * + * Gives the network device a name and adds it to the list of network + * devices. + */ +int register_netdev ( struct net_device *netdev ) { + static unsigned int ifindex = 0; + int rc; + + /* Create device name */ + snprintf ( netdev->name, sizeof ( netdev->name ), "net%d", + ifindex++ ); + + /* Set initial link-layer address */ + netdev->ll_protocol->init_addr ( netdev->hw_addr, netdev->ll_addr ); + + /* Register per-netdev configuration settings */ + if ( ( rc = register_settings ( netdev_settings ( netdev ), + NULL ) ) != 0 ) { + DBGC ( netdev, "NETDEV %p could not register settings: %s\n", + netdev, strerror ( rc ) ); + return rc; + } + + /* Add to device list */ + netdev_get ( netdev ); + list_add_tail ( &netdev->list, &net_devices ); + DBGC ( netdev, "NETDEV %p registered as %s (phys %s hwaddr %s)\n", + netdev, netdev->name, netdev->dev->name, + netdev_addr ( netdev ) ); + + return 0; +} + +/** + * Open network device + * + * @v netdev Network device + * @ret rc Return status code + */ +int netdev_open ( struct net_device *netdev ) { + int rc; + + /* Do nothing if device is already open */ + if ( netdev->state & NETDEV_OPEN ) + return 0; + + DBGC ( netdev, "NETDEV %p opening\n", netdev ); + + /* Open the device */ + if ( ( rc = netdev->op->open ( netdev ) ) != 0 ) + return rc; + + /* Mark as opened */ + netdev->state |= NETDEV_OPEN; + + /* Add to head of open devices list */ + list_add ( &netdev->open_list, &open_net_devices ); + + return 0; +} + +/** + * Close network device + * + * @v netdev Network device + */ +void netdev_close ( struct net_device *netdev ) { + + /* Do nothing if device is already closed */ + if ( ! ( netdev->state & NETDEV_OPEN ) ) + return; + + DBGC ( netdev, "NETDEV %p closing\n", netdev ); + + /* Close the device */ + netdev->op->close ( netdev ); + + /* Flush TX and RX queues */ + netdev_tx_flush ( netdev ); + netdev_rx_flush ( netdev ); + + /* Mark as closed */ + netdev->state &= ~NETDEV_OPEN; + + /* Remove from open devices list */ + list_del ( &netdev->open_list ); +} + +/** + * Unregister network device + * + * @v netdev Network device + * + * Removes the network device from the list of network devices. + */ +void unregister_netdev ( struct net_device *netdev ) { + + /* Ensure device is closed */ + netdev_close ( netdev ); + + /* Unregister per-netdev configuration settings */ + unregister_settings ( netdev_settings ( netdev ) ); + + /* Remove from device list */ + list_del ( &netdev->list ); + netdev_put ( netdev ); + DBGC ( netdev, "NETDEV %p unregistered\n", netdev ); +} + +/** Enable or disable interrupts + * + * @v netdev Network device + * @v enable Interrupts should be enabled + */ +void netdev_irq ( struct net_device *netdev, int enable ) { + netdev->op->irq ( netdev, enable ); +} + +/** + * Get network device by name + * + * @v name Network device name + * @ret netdev Network device, or NULL + */ +struct net_device * find_netdev ( const char *name ) { + struct net_device *netdev; + + list_for_each_entry ( netdev, &net_devices, list ) { + if ( strcmp ( netdev->name, name ) == 0 ) + return netdev; + } + + return NULL; +} + +/** + * Get network device by PCI bus:dev.fn address + * + * @v bus_type Bus type + * @v location Bus location + * @ret netdev Network device, or NULL + */ +struct net_device * find_netdev_by_location ( unsigned int bus_type, + unsigned int location ) { + struct net_device *netdev; + + list_for_each_entry ( netdev, &net_devices, list ) { + if ( ( netdev->dev->desc.bus_type == bus_type ) && + ( netdev->dev->desc.location == location ) ) + return netdev; + } + + return NULL; +} + +/** + * Get most recently opened network device + * + * @ret netdev Most recently opened network device, or NULL + */ +struct net_device * last_opened_netdev ( void ) { + struct net_device *netdev; + + list_for_each_entry ( netdev, &open_net_devices, open_list ) { + assert ( netdev->state & NETDEV_OPEN ); + return netdev; + } + + return NULL; +} + +/** + * Transmit network-layer packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v ll_dest Destination link-layer address + * @ret rc Return status code + * + * Prepends link-layer headers to the I/O buffer and transmits the + * packet via the specified network device. This function takes + * ownership of the I/O buffer. + */ +int net_tx ( struct io_buffer *iobuf, struct net_device *netdev, + struct net_protocol *net_protocol, const void *ll_dest ) { + struct ll_protocol *ll_protocol = netdev->ll_protocol; + int rc; + + /* Force a poll on the netdevice to (potentially) clear any + * backed-up TX completions. This is needed on some network + * devices to avoid excessive losses due to small TX ring + * sizes. + */ + netdev_poll ( netdev ); + + /* Add link-layer header */ + if ( ( rc = ll_protocol->push ( netdev, iobuf, ll_dest, netdev->ll_addr, + net_protocol->net_proto ) ) != 0 ) { + free_iob ( iobuf ); + return rc; + } + + /* Transmit packet */ + return netdev_tx ( netdev, iobuf ); +} + +/** + * Process received network-layer packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v net_proto Network-layer protocol, in network-byte order + * @v ll_source Source link-layer address + * @ret rc Return status code + */ +int net_rx ( struct io_buffer *iobuf, struct net_device *netdev, + uint16_t net_proto, const void *ll_source ) { + struct net_protocol *net_protocol; + + /* Hand off to network-layer protocol, if any */ + for_each_table_entry ( net_protocol, NET_PROTOCOLS ) { + if ( net_protocol->net_proto == net_proto ) + return net_protocol->rx ( iobuf, netdev, ll_source ); + } + + DBGC ( netdev, "NETDEV %p unknown network protocol %04x\n", + netdev, ntohs ( net_proto ) ); + free_iob ( iobuf ); + return 0; +} + +/** + * Single-step the network stack + * + * @v process Network stack process + * + * This polls all interfaces for received packets, and processes + * packets from the RX queue. + */ +static void net_step ( struct process *process __unused ) { + struct net_device *netdev; + struct io_buffer *iobuf; + struct ll_protocol *ll_protocol; + const void *ll_dest; + const void *ll_source; + uint16_t net_proto; + int rc; + + /* Poll and process each network device */ + list_for_each_entry ( netdev, &net_devices, list ) { + + /* Poll for new packets */ + netdev_poll ( netdev ); + + /* Process at most one received packet. Give priority + * to getting packets out of the NIC over processing + * the received packets, because we advertise a window + * that assumes that we can receive packets from the + * NIC faster than they arrive. + */ + if ( ( iobuf = netdev_rx_dequeue ( netdev ) ) ) { + + DBGC ( netdev, "NETDEV %p processing %p (%p+%zx)\n", + netdev, iobuf, iobuf->data, + iob_len ( iobuf ) ); + + /* Remove link-layer header */ + ll_protocol = netdev->ll_protocol; + if ( ( rc = ll_protocol->pull ( netdev, iobuf, + &ll_dest, &ll_source, + &net_proto ) ) != 0 ) { + free_iob ( iobuf ); + continue; + } + + net_rx ( iobuf, netdev, net_proto, ll_source ); + } + } +} + +/** Networking stack process */ +struct process net_process __permanent_process = { + .list = LIST_HEAD_INIT ( net_process.list ), + .step = net_step, +}; diff --git a/debian/grub-extras/disabled/gpxe/src/net/nullnet.c b/debian/grub-extras/disabled/gpxe/src/net/nullnet.c new file mode 100644 index 0000000..381f02a --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/nullnet.c @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <errno.h> +#include <gpxe/iobuf.h> +#include <gpxe/netdevice.h> + +/** @file + * + * Null network device + * + */ + +static int null_open ( struct net_device *netdev __unused ) { + return -ENODEV; +}; + +static void null_close ( struct net_device *netdev __unused ) { + /* Do nothing */ +}; + +static int null_transmit ( struct net_device *netdev __unused, + struct io_buffer *iobuf __unused ) { + return -ENODEV; +}; + +static void null_poll ( struct net_device *netdev __unused ) { + /* Do nothing */ +} + +static void null_irq ( struct net_device *netdev __unused, + int enable __unused ) { + /* Do nothing */ +} + +struct net_device_operations null_netdev_operations = { + .open = null_open, + .close = null_close, + .transmit = null_transmit, + .poll = null_poll, + .irq = null_irq, +}; diff --git a/debian/grub-extras/disabled/gpxe/src/net/rarp.c b/debian/grub-extras/disabled/gpxe/src/net/rarp.c new file mode 100644 index 0000000..1d0dd96 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/rarp.c @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <byteswap.h> +#include <gpxe/netdevice.h> +#include <gpxe/iobuf.h> +#include <gpxe/if_ether.h> +#include <gpxe/rarp.h> + +/** @file + * + * Reverse Address Resolution Protocol + * + */ + +/** + * Process incoming ARP packets + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v ll_source Link-layer source address + * @ret rc Return status code + * + * This is a dummy method which simply discards RARP packets. + */ +static int rarp_rx ( struct io_buffer *iobuf, + struct net_device *netdev __unused, + const void *ll_source __unused ) { + free_iob ( iobuf ); + return 0; +} + + +/** + * Transcribe RARP address + * + * @v net_addr RARP address + * @ret string "<RARP>" + * + * This operation is meaningless for the RARP protocol. + */ +static const char * rarp_ntoa ( const void *net_addr __unused ) { + return "<RARP>"; +} + +/** RARP protocol */ +struct net_protocol rarp_protocol __net_protocol = { + .name = "RARP", + .net_proto = htons ( ETH_P_RARP ), + .rx = rarp_rx, + .ntoa = rarp_ntoa, +}; diff --git a/debian/grub-extras/disabled/gpxe/src/net/retry.c b/debian/grub-extras/disabled/gpxe/src/net/retry.c new file mode 100644 index 0000000..40f656f --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/retry.c @@ -0,0 +1,192 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stddef.h> +#include <gpxe/timer.h> +#include <gpxe/list.h> +#include <gpxe/process.h> +#include <gpxe/init.h> +#include <gpxe/retry.h> + +/** @file + * + * Retry timers + * + * A retry timer is a binary exponential backoff timer. It can be + * used to build automatic retransmission into network protocols. + * + * This implementation of the timer is designed to satisfy RFC 2988 + * and therefore be usable as a TCP retransmission timer. + * + * + */ + +/* The theoretical minimum that the algorithm in stop_timer() can + * adjust the timeout back down to is seven ticks, so set the minimum + * timeout to at least that value for the sake of consistency. + */ +#define MIN_TIMEOUT 7 + +/** List of running timers */ +static LIST_HEAD ( timers ); + +/** + * Start timer + * + * @v timer Retry timer + * + * This starts the timer running with the current timeout value. If + * stop_timer() is not called before the timer expires, the timer will + * be stopped and the timer's callback function will be called. + */ +void start_timer ( struct retry_timer *timer ) { + if ( ! timer->running ) + list_add ( &timer->list, &timers ); + timer->start = currticks(); + timer->running = 1; + + /* 0 means "use default timeout" */ + if ( timer->min_timeout == 0 ) + timer->min_timeout = DEFAULT_MIN_TIMEOUT; + /* We must never be less than MIN_TIMEOUT under any circumstances */ + if ( timer->min_timeout < MIN_TIMEOUT ) + timer->min_timeout = MIN_TIMEOUT; + /* Honor user-specified minimum timeout */ + if ( timer->timeout < timer->min_timeout ) + timer->timeout = timer->min_timeout; + + DBG2 ( "Timer %p started at time %ld (expires at %ld)\n", + timer, timer->start, ( timer->start + timer->timeout ) ); +} + +/** + * Start timer with a specified fixed timeout + * + * @v timer Retry timer + * @v timeout Timeout, in ticks + */ +void start_timer_fixed ( struct retry_timer *timer, unsigned long timeout ) { + start_timer ( timer ); + timer->timeout = timeout; + DBG2 ( "Timer %p expiry time changed to %ld\n", + timer, ( timer->start + timer->timeout ) ); +} + +/** + * Stop timer + * + * @v timer Retry timer + * + * This stops the timer and updates the timer's timeout value. + */ +void stop_timer ( struct retry_timer *timer ) { + unsigned long old_timeout = timer->timeout; + unsigned long now = currticks(); + unsigned long runtime; + + /* If timer was already stopped, do nothing */ + if ( ! timer->running ) + return; + + list_del ( &timer->list ); + runtime = ( now - timer->start ); + timer->running = 0; + DBG2 ( "Timer %p stopped at time %ld (ran for %ld)\n", + timer, now, runtime ); + + /* Update timer. Variables are: + * + * r = round-trip time estimate (i.e. runtime) + * t = timeout value (i.e. timer->timeout) + * s = smoothed round-trip time + * + * By choice, we set t = 4s, i.e. allow for four times the + * normal round-trip time to pass before retransmitting. + * + * We want to smooth according to s := ( 7 s + r ) / 8 + * + * Since we don't actually store s, this reduces to + * t := ( 7 t / 8 ) + ( r / 2 ) + * + */ + if ( timer->count ) { + timer->count--; + } else { + timer->timeout -= ( timer->timeout >> 3 ); + timer->timeout += ( runtime >> 1 ); + if ( timer->timeout != old_timeout ) { + DBG ( "Timer %p timeout updated to %ld\n", + timer, timer->timeout ); + } + } +} + +/** + * Handle expired timer + * + * @v timer Retry timer + */ +static void timer_expired ( struct retry_timer *timer ) { + int fail; + + /* Stop timer without performing RTT calculations */ + DBG2 ( "Timer %p stopped at time %ld on expiry\n", + timer, currticks() ); + assert ( timer->running ); + list_del ( &timer->list ); + timer->running = 0; + timer->count++; + + /* Back off the timeout value */ + timer->timeout <<= 1; + if ( timer->max_timeout == 0 ) /* 0 means "use default timeout" */ + timer->max_timeout = DEFAULT_MAX_TIMEOUT; + if ( ( fail = ( timer->timeout > timer->max_timeout ) ) ) + timer->timeout = timer->max_timeout; + DBG ( "Timer %p timeout backed off to %ld\n", + timer, timer->timeout ); + + /* Call expiry callback */ + timer->expired ( timer, fail ); +} + +/** + * Single-step the retry timer list + * + * @v process Retry timer process + */ +static void retry_step ( struct process *process __unused ) { + struct retry_timer *timer; + struct retry_timer *tmp; + unsigned long now = currticks(); + unsigned long used; + + list_for_each_entry_safe ( timer, tmp, &timers, list ) { + used = ( now - timer->start ); + if ( used >= timer->timeout ) + timer_expired ( timer ); + } +} + +/** Retry timer process */ +struct process retry_process __permanent_process = { + .list = LIST_HEAD_INIT ( retry_process.list ), + .step = retry_step, +}; diff --git a/debian/grub-extras/disabled/gpxe/src/net/tcp.c b/debian/grub-extras/disabled/gpxe/src/net/tcp.c new file mode 100644 index 0000000..1269579 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/tcp.c @@ -0,0 +1,1156 @@ +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <errno.h> +#include <byteswap.h> +#include <gpxe/timer.h> +#include <gpxe/iobuf.h> +#include <gpxe/malloc.h> +#include <gpxe/retry.h> +#include <gpxe/refcnt.h> +#include <gpxe/xfer.h> +#include <gpxe/open.h> +#include <gpxe/uri.h> +#include <gpxe/tcpip.h> +#include <gpxe/tcp.h> + +/** @file + * + * TCP protocol + * + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** A TCP connection */ +struct tcp_connection { + /** Reference counter */ + struct refcnt refcnt; + /** List of TCP connections */ + struct list_head list; + + /** Data transfer interface */ + struct xfer_interface xfer; + /** Data transfer interface closed flag */ + int xfer_closed; + + /** Remote socket address */ + struct sockaddr_tcpip peer; + /** Local port, in network byte order */ + unsigned int local_port; + + /** Current TCP state */ + unsigned int tcp_state; + /** Previous TCP state + * + * Maintained only for debug messages + */ + unsigned int prev_tcp_state; + /** Current sequence number + * + * Equivalent to SND.UNA in RFC 793 terminology. + */ + uint32_t snd_seq; + /** Unacknowledged sequence count + * + * Equivalent to (SND.NXT-SND.UNA) in RFC 793 terminology. + */ + uint32_t snd_sent; + /** Send window + * + * Equivalent to SND.WND in RFC 793 terminology + */ + uint32_t snd_win; + /** Current acknowledgement number + * + * Equivalent to RCV.NXT in RFC 793 terminology. + */ + uint32_t rcv_ack; + /** Receive window + * + * Equivalent to RCV.WND in RFC 793 terminology. + */ + uint32_t rcv_win; + /** Most recent received timestamp + * + * Equivalent to TS.Recent in RFC 1323 terminology. + */ + uint32_t ts_recent; + /** Timestamps enabled */ + int timestamps; + + /** Transmit queue */ + struct list_head queue; + /** Retransmission timer */ + struct retry_timer timer; +}; + +/** + * List of registered TCP connections + */ +static LIST_HEAD ( tcp_conns ); + +/* Forward declarations */ +static struct xfer_interface_operations tcp_xfer_operations; +static void tcp_expired ( struct retry_timer *timer, int over ); +static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, + uint32_t win ); + +/** + * Name TCP state + * + * @v state TCP state + * @ret name Name of TCP state + */ +static inline __attribute__ (( always_inline )) const char * +tcp_state ( int state ) { + switch ( state ) { + case TCP_CLOSED: return "CLOSED"; + case TCP_LISTEN: return "LISTEN"; + case TCP_SYN_SENT: return "SYN_SENT"; + case TCP_SYN_RCVD: return "SYN_RCVD"; + case TCP_ESTABLISHED: return "ESTABLISHED"; + case TCP_FIN_WAIT_1: return "FIN_WAIT_1"; + case TCP_FIN_WAIT_2: return "FIN_WAIT_2"; + case TCP_CLOSING_OR_LAST_ACK: return "CLOSING/LAST_ACK"; + case TCP_TIME_WAIT: return "TIME_WAIT"; + case TCP_CLOSE_WAIT: return "CLOSE_WAIT"; + default: return "INVALID"; + } +} + +/** + * Dump TCP state transition + * + * @v tcp TCP connection + */ +static inline __attribute__ (( always_inline )) void +tcp_dump_state ( struct tcp_connection *tcp ) { + + if ( tcp->tcp_state != tcp->prev_tcp_state ) { + DBGC ( tcp, "TCP %p transitioned from %s to %s\n", tcp, + tcp_state ( tcp->prev_tcp_state ), + tcp_state ( tcp->tcp_state ) ); + } + tcp->prev_tcp_state = tcp->tcp_state; +} + +/** + * Dump TCP flags + * + * @v flags TCP flags + */ +static inline __attribute__ (( always_inline )) void +tcp_dump_flags ( struct tcp_connection *tcp, unsigned int flags ) { + if ( flags & TCP_RST ) + DBGC2 ( tcp, " RST" ); + if ( flags & TCP_SYN ) + DBGC2 ( tcp, " SYN" ); + if ( flags & TCP_PSH ) + DBGC2 ( tcp, " PSH" ); + if ( flags & TCP_FIN ) + DBGC2 ( tcp, " FIN" ); + if ( flags & TCP_ACK ) + DBGC2 ( tcp, " ACK" ); +} + +/*************************************************************************** + * + * Open and close + * + *************************************************************************** + */ + +/** + * Bind TCP connection to local port + * + * @v tcp TCP connection + * @v port Local port number, in network-endian order + * @ret rc Return status code + * + * If the port is 0, the connection is assigned an available port + * between 1024 and 65535. + */ +static int tcp_bind ( struct tcp_connection *tcp, unsigned int port ) { + struct tcp_connection *existing; + static uint16_t try_port = 1023; + + /* If no port specified, find the first available port */ + if ( ! port ) { + while ( try_port ) { + try_port++; + if ( try_port < 1024 ) + continue; + if ( tcp_bind ( tcp, htons ( try_port ) ) == 0 ) + return 0; + } + DBGC ( tcp, "TCP %p could not bind: no free ports\n", tcp ); + return -EADDRINUSE; + } + + /* Attempt bind to local port */ + list_for_each_entry ( existing, &tcp_conns, list ) { + if ( existing->local_port == port ) { + DBGC ( tcp, "TCP %p could not bind: port %d in use\n", + tcp, ntohs ( port ) ); + return -EADDRINUSE; + } + } + tcp->local_port = port; + + DBGC ( tcp, "TCP %p bound to port %d\n", tcp, ntohs ( port ) ); + return 0; +} + +/** + * Open a TCP connection + * + * @v xfer Data transfer interface + * @v peer Peer socket address + * @v local Local socket address, or NULL + * @ret rc Return status code + */ +static int tcp_open ( struct xfer_interface *xfer, struct sockaddr *peer, + struct sockaddr *local ) { + struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer; + struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local; + struct tcp_connection *tcp; + unsigned int bind_port; + int rc; + + /* Allocate and initialise structure */ + tcp = zalloc ( sizeof ( *tcp ) ); + if ( ! tcp ) + return -ENOMEM; + DBGC ( tcp, "TCP %p allocated\n", tcp ); + xfer_init ( &tcp->xfer, &tcp_xfer_operations, &tcp->refcnt ); + tcp->prev_tcp_state = TCP_CLOSED; + tcp->tcp_state = TCP_STATE_SENT ( TCP_SYN ); + tcp_dump_state ( tcp ); + tcp->snd_seq = random(); + INIT_LIST_HEAD ( &tcp->queue ); + tcp->timer.expired = tcp_expired; + memcpy ( &tcp->peer, st_peer, sizeof ( tcp->peer ) ); + + /* Bind to local port */ + bind_port = ( st_local ? st_local->st_port : 0 ); + if ( ( rc = tcp_bind ( tcp, bind_port ) ) != 0 ) + goto err; + + /* Start timer to initiate SYN */ + start_timer_nodelay ( &tcp->timer ); + + /* Attach parent interface, transfer reference to connection + * list and return + */ + xfer_plug_plug ( &tcp->xfer, xfer ); + list_add ( &tcp->list, &tcp_conns ); + return 0; + + err: + ref_put ( &tcp->refcnt ); + return rc; +} + +/** + * Close TCP connection + * + * @v tcp TCP connection + * @v rc Reason for close + * + * Closes the data transfer interface. If the TCP state machine is in + * a suitable state, the connection will be deleted. + */ +static void tcp_close ( struct tcp_connection *tcp, int rc ) { + struct io_buffer *iobuf; + struct io_buffer *tmp; + + /* Close data transfer interface */ + xfer_nullify ( &tcp->xfer ); + xfer_close ( &tcp->xfer, rc ); + tcp->xfer_closed = 1; + + /* If we are in CLOSED, or have otherwise not yet received a + * SYN (i.e. we are in LISTEN or SYN_SENT), just delete the + * connection. + */ + if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) { + + /* Transition to CLOSED for the sake of debugging messages */ + tcp->tcp_state = TCP_CLOSED; + tcp_dump_state ( tcp ); + + /* Free any unsent I/O buffers */ + list_for_each_entry_safe ( iobuf, tmp, &tcp->queue, list ) { + list_del ( &iobuf->list ); + free_iob ( iobuf ); + } + + /* Remove from list and drop reference */ + stop_timer ( &tcp->timer ); + list_del ( &tcp->list ); + ref_put ( &tcp->refcnt ); + DBGC ( tcp, "TCP %p connection deleted\n", tcp ); + return; + } + + /* If we have not had our SYN acknowledged (i.e. we are in + * SYN_RCVD), pretend that it has been acknowledged so that we + * can send a FIN without breaking things. + */ + if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) ) + tcp_rx_ack ( tcp, ( tcp->snd_seq + 1 ), 0 ); + + /* If we have no data remaining to send, start sending FIN */ + if ( list_empty ( &tcp->queue ) ) { + tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN ); + tcp_dump_state ( tcp ); + } +} + +/*************************************************************************** + * + * Transmit data path + * + *************************************************************************** + */ + +/** + * Calculate transmission window + * + * @v tcp TCP connection + * @ret len Maximum length that can be sent in a single packet + */ +static size_t tcp_xmit_win ( struct tcp_connection *tcp ) { + size_t len; + + /* Not ready if we're not in a suitable connection state */ + if ( ! TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) + return 0; + + /* Length is the minimum of the receiver's window and the path MTU */ + len = tcp->snd_win; + if ( len > TCP_PATH_MTU ) + len = TCP_PATH_MTU; + + return len; +} + +/** + * Process TCP transmit queue + * + * @v tcp TCP connection + * @v max_len Maximum length to process + * @v dest I/O buffer to fill with data, or NULL + * @v remove Remove data from queue + * @ret len Length of data processed + * + * This processes at most @c max_len bytes from the TCP connection's + * transmit queue. Data will be copied into the @c dest I/O buffer + * (if provided) and, if @c remove is true, removed from the transmit + * queue. + */ +static size_t tcp_process_queue ( struct tcp_connection *tcp, size_t max_len, + struct io_buffer *dest, int remove ) { + struct io_buffer *iobuf; + struct io_buffer *tmp; + size_t frag_len; + size_t len = 0; + + list_for_each_entry_safe ( iobuf, tmp, &tcp->queue, list ) { + frag_len = iob_len ( iobuf ); + if ( frag_len > max_len ) + frag_len = max_len; + if ( dest ) { + memcpy ( iob_put ( dest, frag_len ), iobuf->data, + frag_len ); + } + if ( remove ) { + iob_pull ( iobuf, frag_len ); + if ( ! iob_len ( iobuf ) ) { + list_del ( &iobuf->list ); + free_iob ( iobuf ); + } + } + len += frag_len; + max_len -= frag_len; + } + return len; +} + +/** + * Transmit any outstanding data + * + * @v tcp TCP connection + * @v force_send Force sending of packet + * + * Transmits any outstanding data on the connection. + * + * Note that even if an error is returned, the retransmission timer + * will have been started if necessary, and so the stack will + * eventually attempt to retransmit the failed packet. + */ +static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) { + struct io_buffer *iobuf; + struct tcp_header *tcphdr; + struct tcp_mss_option *mssopt; + struct tcp_timestamp_padded_option *tsopt; + void *payload; + unsigned int flags; + size_t len = 0; + uint32_t seq_len; + uint32_t app_win; + uint32_t max_rcv_win; + int rc; + + /* If retransmission timer is already running, do nothing */ + if ( timer_running ( &tcp->timer ) ) + return 0; + + /* Calculate both the actual (payload) and sequence space + * lengths that we wish to transmit. + */ + if ( TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) { + len = tcp_process_queue ( tcp, tcp_xmit_win ( tcp ), + NULL, 0 ); + } + seq_len = len; + flags = TCP_FLAGS_SENDING ( tcp->tcp_state ); + if ( flags & ( TCP_SYN | TCP_FIN ) ) { + /* SYN or FIN consume one byte, and we can never send both */ + assert ( ! ( ( flags & TCP_SYN ) && ( flags & TCP_FIN ) ) ); + seq_len++; + } + tcp->snd_sent = seq_len; + + /* If we have nothing to transmit, stop now */ + if ( ( seq_len == 0 ) && ! force_send ) + return 0; + + /* If we are transmitting anything that requires + * acknowledgement (i.e. consumes sequence space), start the + * retransmission timer. Do this before attempting to + * allocate the I/O buffer, in case allocation itself fails. + */ + if ( seq_len ) + start_timer ( &tcp->timer ); + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( len + MAX_HDR_LEN ); + if ( ! iobuf ) { + DBGC ( tcp, "TCP %p could not allocate iobuf for %08x..%08x " + "%08x\n", tcp, tcp->snd_seq, ( tcp->snd_seq + seq_len ), + tcp->rcv_ack ); + return -ENOMEM; + } + iob_reserve ( iobuf, MAX_HDR_LEN ); + + /* Fill data payload from transmit queue */ + tcp_process_queue ( tcp, len, iobuf, 0 ); + + /* Expand receive window if possible */ + // max_rcv_win = ( ( freemem * 3 ) / 4 ); + //if ( max_rcv_win > TCP_MAX_WINDOW_SIZE ) + max_rcv_win = TCP_MAX_WINDOW_SIZE; + app_win = xfer_window ( &tcp->xfer ); + if ( max_rcv_win > app_win ) + max_rcv_win = app_win; + max_rcv_win &= ~0x03; /* Keep everything dword-aligned */ + if ( tcp->rcv_win < max_rcv_win ) + tcp->rcv_win = max_rcv_win; + + /* Fill up the TCP header */ + payload = iobuf->data; + if ( flags & TCP_SYN ) { + mssopt = iob_push ( iobuf, sizeof ( *mssopt ) ); + mssopt->kind = TCP_OPTION_MSS; + mssopt->length = sizeof ( *mssopt ); + mssopt->mss = htons ( TCP_MSS ); + } + if ( ( flags & TCP_SYN ) || tcp->timestamps ) { + tsopt = iob_push ( iobuf, sizeof ( *tsopt ) ); + memset ( tsopt->nop, TCP_OPTION_NOP, sizeof ( tsopt->nop ) ); + tsopt->tsopt.kind = TCP_OPTION_TS; + tsopt->tsopt.length = sizeof ( tsopt->tsopt ); + tsopt->tsopt.tsval = ntohl ( currticks() ); + tsopt->tsopt.tsecr = ntohl ( tcp->ts_recent ); + } + if ( ! ( flags & TCP_SYN ) ) + flags |= TCP_PSH; + tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) ); + memset ( tcphdr, 0, sizeof ( *tcphdr ) ); + tcphdr->src = tcp->local_port; + tcphdr->dest = tcp->peer.st_port; + tcphdr->seq = htonl ( tcp->snd_seq ); + tcphdr->ack = htonl ( tcp->rcv_ack ); + tcphdr->hlen = ( ( payload - iobuf->data ) << 2 ); + tcphdr->flags = flags; + tcphdr->win = htons ( tcp->rcv_win ); + tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); + + /* Dump header */ + DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4zd", + tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), + ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) + seq_len ), + ntohl ( tcphdr->ack ), len ); + tcp_dump_flags ( tcp, tcphdr->flags ); + DBGC2 ( tcp, "\n" ); + + /* Transmit packet */ + if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, &tcp->peer, NULL, + &tcphdr->csum ) ) != 0 ) { + DBGC ( tcp, "TCP %p could not transmit %08x..%08x %08x: %s\n", + tcp, tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), + tcp->rcv_ack, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Retransmission timer expired + * + * @v timer Retry timer + * @v over Failure indicator + */ +static void tcp_expired ( struct retry_timer *timer, int over ) { + struct tcp_connection *tcp = + container_of ( timer, struct tcp_connection, timer ); + int graceful_close = TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ); + + DBGC ( tcp, "TCP %p timer %s in %s for %08x..%08x %08x\n", tcp, + ( over ? "expired" : "fired" ), tcp_state ( tcp->tcp_state ), + tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack ); + + assert ( ( tcp->tcp_state == TCP_SYN_SENT ) || + ( tcp->tcp_state == TCP_SYN_RCVD ) || + ( tcp->tcp_state == TCP_ESTABLISHED ) || + ( tcp->tcp_state == TCP_FIN_WAIT_1 ) || + ( tcp->tcp_state == TCP_TIME_WAIT ) || + ( tcp->tcp_state == TCP_CLOSE_WAIT ) || + ( tcp->tcp_state == TCP_CLOSING_OR_LAST_ACK ) ); + + if ( over || graceful_close ) { + /* If we have finally timed out and given up, or if + * this is the result of a graceful close, terminate + * the connection + */ + tcp->tcp_state = TCP_CLOSED; + tcp_dump_state ( tcp ); + tcp_close ( tcp, -ETIMEDOUT ); + } else { + /* Otherwise, retransmit the packet */ + tcp_xmit ( tcp, 0 ); + } +} + +/** + * Send RST response to incoming packet + * + * @v in_tcphdr TCP header of incoming packet + * @ret rc Return status code + */ +static int tcp_xmit_reset ( struct tcp_connection *tcp, + struct sockaddr_tcpip *st_dest, + struct tcp_header *in_tcphdr ) { + struct io_buffer *iobuf; + struct tcp_header *tcphdr; + int rc; + + /* Allocate space for dataless TX buffer */ + iobuf = alloc_iob ( MAX_HDR_LEN ); + if ( ! iobuf ) { + DBGC ( tcp, "TCP %p could not allocate iobuf for RST " + "%08x..%08x %08x\n", tcp, ntohl ( in_tcphdr->ack ), + ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ) ); + return -ENOMEM; + } + iob_reserve ( iobuf, MAX_HDR_LEN ); + + /* Construct RST response */ + tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) ); + memset ( tcphdr, 0, sizeof ( *tcphdr ) ); + tcphdr->src = in_tcphdr->dest; + tcphdr->dest = in_tcphdr->src; + tcphdr->seq = in_tcphdr->ack; + tcphdr->ack = in_tcphdr->seq; + tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 ); + tcphdr->flags = ( TCP_RST | TCP_ACK ); + tcphdr->win = htons ( TCP_MAX_WINDOW_SIZE ); + tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); + + /* Dump header */ + DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4d", + tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), + ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) ), + ntohl ( tcphdr->ack ), 0 ); + tcp_dump_flags ( tcp, tcphdr->flags ); + DBGC2 ( tcp, "\n" ); + + /* Transmit packet */ + if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, st_dest, + NULL, &tcphdr->csum ) ) != 0 ) { + DBGC ( tcp, "TCP %p could not transmit RST %08x..%08x %08x: " + "%s\n", tcp, ntohl ( in_tcphdr->ack ), + ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ), + strerror ( rc ) ); + return rc; + } + + return 0; +} + +/*************************************************************************** + * + * Receive data path + * + *************************************************************************** + */ + +/** + * Identify TCP connection by local port number + * + * @v local_port Local port (in network-endian order) + * @ret tcp TCP connection, or NULL + */ +static struct tcp_connection * tcp_demux ( unsigned int local_port ) { + struct tcp_connection *tcp; + + list_for_each_entry ( tcp, &tcp_conns, list ) { + if ( tcp->local_port == local_port ) + return tcp; + } + return NULL; +} + +/** + * Parse TCP received options + * + * @v tcp TCP connection + * @v data Raw options data + * @v len Raw options length + * @v options Options structure to fill in + */ +static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data, + size_t len, struct tcp_options *options ) { + const void *end = ( data + len ); + const struct tcp_option *option; + unsigned int kind; + + memset ( options, 0, sizeof ( *options ) ); + while ( data < end ) { + option = data; + kind = option->kind; + if ( kind == TCP_OPTION_END ) + return; + if ( kind == TCP_OPTION_NOP ) { + data++; + continue; + } + switch ( kind ) { + case TCP_OPTION_MSS: + options->mssopt = data; + break; + case TCP_OPTION_TS: + options->tsopt = data; + break; + default: + DBGC ( tcp, "TCP %p received unknown option %d\n", + tcp, kind ); + break; + } + data += option->length; + } +} + +/** + * Consume received sequence space + * + * @v tcp TCP connection + * @v seq_len Sequence space length to consume + */ +static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) { + tcp->rcv_ack += seq_len; + if ( tcp->rcv_win > seq_len ) { + tcp->rcv_win -= seq_len; + } else { + tcp->rcv_win = 0; + } +} + +/** + * Handle TCP received SYN + * + * @v tcp TCP connection + * @v seq SEQ value (in host-endian order) + * @v options TCP options + * @ret rc Return status code + */ +static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq, + struct tcp_options *options ) { + + /* Synchronise sequence numbers on first SYN */ + if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) { + tcp->rcv_ack = seq; + if ( options->tsopt ) + tcp->timestamps = 1; + } + + /* Ignore duplicate SYN */ + if ( ( tcp->rcv_ack - seq ) > 0 ) + return 0; + + /* Mark SYN as received and start sending ACKs with each packet */ + tcp->tcp_state |= ( TCP_STATE_SENT ( TCP_ACK ) | + TCP_STATE_RCVD ( TCP_SYN ) ); + + /* Acknowledge SYN */ + tcp_rx_seq ( tcp, 1 ); + + return 0; +} + +/** + * Handle TCP received ACK + * + * @v tcp TCP connection + * @v ack ACK value (in host-endian order) + * @v win WIN value (in host-endian order) + * @ret rc Return status code + */ +static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, + uint32_t win ) { + uint32_t ack_len = ( ack - tcp->snd_seq ); + size_t len; + unsigned int acked_flags; + + /* Check for out-of-range or old duplicate ACKs */ + if ( ack_len > tcp->snd_sent ) { + DBGC ( tcp, "TCP %p received ACK for %08x..%08x, " + "sent only %08x..%08x\n", tcp, tcp->snd_seq, + ( tcp->snd_seq + ack_len ), tcp->snd_seq, + ( tcp->snd_seq + tcp->snd_sent ) ); + + if ( TCP_HAS_BEEN_ESTABLISHED ( tcp->tcp_state ) ) { + /* Just ignore what might be old duplicate ACKs */ + return 0; + } else { + /* Send RST if an out-of-range ACK is received + * on a not-yet-established connection, as per + * RFC 793. + */ + return -EINVAL; + } + } + + /* Ignore ACKs that don't actually acknowledge any new data. + * (In particular, do not stop the retransmission timer; this + * avoids creating a sorceror's apprentice syndrome when a + * duplicate ACK is received and we still have data in our + * transmit queue.) + */ + if ( ack_len == 0 ) + return 0; + + /* Stop the retransmission timer */ + stop_timer ( &tcp->timer ); + + /* Determine acknowledged flags and data length */ + len = ack_len; + acked_flags = ( TCP_FLAGS_SENDING ( tcp->tcp_state ) & + ( TCP_SYN | TCP_FIN ) ); + if ( acked_flags ) + len--; + + /* Update SEQ and sent counters, and window size */ + tcp->snd_seq = ack; + tcp->snd_sent = 0; + tcp->snd_win = win; + + /* Remove any acknowledged data from transmit queue */ + tcp_process_queue ( tcp, len, NULL, 1 ); + + /* Mark SYN/FIN as acknowledged if applicable. */ + if ( acked_flags ) + tcp->tcp_state |= TCP_STATE_ACKED ( acked_flags ); + + /* Start sending FIN if we've had all possible data ACKed */ + if ( list_empty ( &tcp->queue ) && tcp->xfer_closed ) + tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN ); + + return 0; +} + +/** + * Handle TCP received data + * + * @v tcp TCP connection + * @v seq SEQ value (in host-endian order) + * @v iobuf I/O buffer + * @ret rc Return status code + * + * This function takes ownership of the I/O buffer. + */ +static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq, + struct io_buffer *iobuf ) { + uint32_t already_rcvd; + uint32_t len; + int rc; + + /* Ignore duplicate or out-of-order data */ + already_rcvd = ( tcp->rcv_ack - seq ); + len = iob_len ( iobuf ); + if ( already_rcvd >= len ) { + free_iob ( iobuf ); + return 0; + } + iob_pull ( iobuf, already_rcvd ); + len -= already_rcvd; + + /* Deliver data to application */ + if ( ( rc = xfer_deliver_iob ( &tcp->xfer, iobuf ) ) != 0 ) { + DBGC ( tcp, "TCP %p could not deliver %08x..%08x: %s\n", + tcp, seq, ( seq + len ), strerror ( rc ) ); + return rc; + } + + /* Acknowledge new data */ + tcp_rx_seq ( tcp, len ); + + return 0; +} + +/** + * Handle TCP received FIN + * + * @v tcp TCP connection + * @v seq SEQ value (in host-endian order) + * @ret rc Return status code + */ +static int tcp_rx_fin ( struct tcp_connection *tcp, uint32_t seq ) { + + /* Ignore duplicate or out-of-order FIN */ + if ( ( tcp->rcv_ack - seq ) > 0 ) + return 0; + + /* Mark FIN as received and acknowledge it */ + tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN ); + tcp_rx_seq ( tcp, 1 ); + + /* Close connection */ + tcp_close ( tcp, 0 ); + + return 0; +} + +/** + * Handle TCP received RST + * + * @v tcp TCP connection + * @v seq SEQ value (in host-endian order) + * @ret rc Return status code + */ +static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) { + + /* Accept RST only if it falls within the window. If we have + * not yet received a SYN, then we have no window to test + * against, so fall back to checking that our SYN has been + * ACKed. + */ + if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) { + if ( ( seq - tcp->rcv_ack ) >= tcp->rcv_win ) + return 0; + } else { + if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) ) + return 0; + } + + /* Abort connection */ + tcp->tcp_state = TCP_CLOSED; + tcp_dump_state ( tcp ); + tcp_close ( tcp, -ECONNRESET ); + + DBGC ( tcp, "TCP %p connection reset by peer\n", tcp ); + return -ECONNRESET; +} + +/** + * Process received packet + * + * @v iobuf I/O buffer + * @v st_src Partially-filled source address + * @v st_dest Partially-filled destination address + * @v pshdr_csum Pseudo-header checksum + * @ret rc Return status code + */ +static int tcp_rx ( struct io_buffer *iobuf, + struct sockaddr_tcpip *st_src, + struct sockaddr_tcpip *st_dest __unused, + uint16_t pshdr_csum ) { + struct tcp_header *tcphdr = iobuf->data; + struct tcp_connection *tcp; + struct tcp_options options; + size_t hlen; + uint16_t csum; + uint32_t start_seq; + uint32_t seq; + uint32_t ack; + uint32_t win; + unsigned int flags; + size_t len; + int rc; + + /* Sanity check packet */ + if ( iob_len ( iobuf ) < sizeof ( *tcphdr ) ) { + DBG ( "TCP packet too short at %zd bytes (min %zd bytes)\n", + iob_len ( iobuf ), sizeof ( *tcphdr ) ); + rc = -EINVAL; + goto discard; + } + hlen = ( ( tcphdr->hlen & TCP_MASK_HLEN ) / 16 ) * 4; + if ( hlen < sizeof ( *tcphdr ) ) { + DBG ( "TCP header too short at %zd bytes (min %zd bytes)\n", + hlen, sizeof ( *tcphdr ) ); + rc = -EINVAL; + goto discard; + } + if ( hlen > iob_len ( iobuf ) ) { + DBG ( "TCP header too long at %zd bytes (max %zd bytes)\n", + hlen, iob_len ( iobuf ) ); + rc = -EINVAL; + goto discard; + } + csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data, + iob_len ( iobuf ) ); + if ( csum != 0 ) { + DBG ( "TCP checksum incorrect (is %04x including checksum " + "field, should be 0000)\n", csum ); + rc = -EINVAL; + goto discard; + } + + /* Parse parameters from header and strip header */ + tcp = tcp_demux ( tcphdr->dest ); + start_seq = seq = ntohl ( tcphdr->seq ); + ack = ntohl ( tcphdr->ack ); + win = ntohs ( tcphdr->win ); + flags = tcphdr->flags; + tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ), + ( hlen - sizeof ( *tcphdr ) ), &options ); + iob_pull ( iobuf, hlen ); + len = iob_len ( iobuf ); + + /* Dump header */ + DBGC2 ( tcp, "TCP %p RX %d<-%d %08x %08x..%08zx %4zd", + tcp, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ), + ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ), + ( ntohl ( tcphdr->seq ) + len + + ( ( tcphdr->flags & ( TCP_SYN | TCP_FIN ) ) ? 1 : 0 )), len); + tcp_dump_flags ( tcp, tcphdr->flags ); + DBGC2 ( tcp, "\n" ); + + /* If no connection was found, send RST */ + if ( ! tcp ) { + tcp_xmit_reset ( tcp, st_src, tcphdr ); + rc = -ENOTCONN; + goto discard; + } + + /* Handle ACK, if present */ + if ( flags & TCP_ACK ) { + if ( ( rc = tcp_rx_ack ( tcp, ack, win ) ) != 0 ) { + tcp_xmit_reset ( tcp, st_src, tcphdr ); + goto discard; + } + } + + /* Handle SYN, if present */ + if ( flags & TCP_SYN ) { + tcp_rx_syn ( tcp, seq, &options ); + seq++; + } + + /* Handle RST, if present */ + if ( flags & TCP_RST ) { + if ( ( rc = tcp_rx_rst ( tcp, seq ) ) != 0 ) + goto discard; + } + + /* Handle new data, if any */ + tcp_rx_data ( tcp, seq, iobuf ); + seq += len; + + /* Handle FIN, if present */ + if ( flags & TCP_FIN ) { + tcp_rx_fin ( tcp, seq ); + seq++; + } + + /* Update timestamp, if present and applicable */ + if ( ( seq == tcp->rcv_ack ) && options.tsopt ) + tcp->ts_recent = ntohl ( options.tsopt->tsval ); + + /* Dump out any state change as a result of the received packet */ + tcp_dump_state ( tcp ); + + /* Send out any pending data. We force sending a reply if either + * + * a) the peer is expecting an ACK (i.e. consumed sequence space), or + * b) either end of the packet was outside the receive window + * + * Case (b) enables us to support TCP keepalives using + * zero-length packets, which we would otherwise ignore. Note + * that for case (b), we need *only* consider zero-length + * packets, since non-zero-length packets will already be + * caught by case (a). + */ + tcp_xmit ( tcp, ( ( start_seq != seq ) || + ( ( seq - tcp->rcv_ack ) > tcp->rcv_win ) ) ); + + /* If this packet was the last we expect to receive, set up + * timer to expire and cause the connection to be freed. + */ + if ( TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ) ) { + tcp->timer.timeout = ( 2 * TCP_MSL ); + start_timer ( &tcp->timer ); + } + + return 0; + + discard: + /* Free received packet */ + free_iob ( iobuf ); + return rc; +} + +/** TCP protocol */ +struct tcpip_protocol tcp_protocol __tcpip_protocol = { + .name = "TCP", + .rx = tcp_rx, + .tcpip_proto = IP_TCP, +}; + +/*************************************************************************** + * + * Data transfer interface + * + *************************************************************************** + */ + +/** + * Close interface + * + * @v xfer Data transfer interface + * @v rc Reason for close + */ +static void tcp_xfer_close ( struct xfer_interface *xfer, int rc ) { + struct tcp_connection *tcp = + container_of ( xfer, struct tcp_connection, xfer ); + + /* Close data transfer interface */ + tcp_close ( tcp, rc ); + + /* Transmit FIN, if possible */ + tcp_xmit ( tcp, 0 ); +} + +/** + * Check flow control window + * + * @v xfer Data transfer interface + * @ret len Length of window + */ +static size_t tcp_xfer_window ( struct xfer_interface *xfer ) { + struct tcp_connection *tcp = + container_of ( xfer, struct tcp_connection, xfer ); + + /* Not ready if data queue is non-empty. This imposes a limit + * of only one unACKed packet in the TX queue at any time; we + * do this to conserve memory usage. + */ + if ( ! list_empty ( &tcp->queue ) ) + return 0; + + /* Return TCP window length */ + return tcp_xmit_win ( tcp ); +} + +/** + * Deliver datagram as I/O buffer + * + * @v xfer Data transfer interface + * @v iobuf Datagram I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int tcp_xfer_deliver_iob ( struct xfer_interface *xfer, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + struct tcp_connection *tcp = + container_of ( xfer, struct tcp_connection, xfer ); + + /* Enqueue packet */ + list_add_tail ( &iobuf->list, &tcp->queue ); + + /* Transmit data, if possible */ + tcp_xmit ( tcp, 0 ); + + return 0; +} + +/** TCP data transfer interface operations */ +static struct xfer_interface_operations tcp_xfer_operations = { + .close = tcp_xfer_close, + .vredirect = ignore_xfer_vredirect, + .window = tcp_xfer_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = tcp_xfer_deliver_iob, + .deliver_raw = xfer_deliver_as_iob, +}; + +/*************************************************************************** + * + * Openers + * + *************************************************************************** + */ + +/** TCP socket opener */ +struct socket_opener tcp_socket_opener __socket_opener = { + .semantics = TCP_SOCK_STREAM, + .family = AF_INET, + .open = tcp_open, +}; + +/** Linkage hack */ +int tcp_sock_stream = TCP_SOCK_STREAM; + +/** + * Open TCP URI + * + * @v xfer Data transfer interface + * @v uri URI + * @ret rc Return status code + */ +static int tcp_open_uri ( struct xfer_interface *xfer, struct uri *uri ) { + struct sockaddr_tcpip peer; + + /* Sanity check */ + if ( ! uri->host ) + return -EINVAL; + + memset ( &peer, 0, sizeof ( peer ) ); + peer.st_port = htons ( uri_port ( uri, 0 ) ); + return xfer_open_named_socket ( xfer, SOCK_STREAM, + ( struct sockaddr * ) &peer, + uri->host, NULL ); +} + +/** TCP URI opener */ +struct uri_opener tcp_uri_opener __uri_opener = { + .scheme = "tcp", + .open = tcp_open_uri, +}; + diff --git a/debian/grub-extras/disabled/gpxe/src/net/tcp/http.c b/debian/grub-extras/disabled/gpxe/src/net/tcp/http.c new file mode 100644 index 0000000..a02408a --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/tcp/http.c @@ -0,0 +1,603 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** + * @file + * + * Hyper Text Transfer Protocol (HTTP) + * + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <strings.h> +#include <byteswap.h> +#include <errno.h> +#include <assert.h> +#include <gpxe/uri.h> +#include <gpxe/refcnt.h> +#include <gpxe/iobuf.h> +#include <gpxe/xfer.h> +#include <gpxe/open.h> +#include <gpxe/socket.h> +#include <gpxe/tcpip.h> +#include <gpxe/process.h> +#include <gpxe/linebuf.h> +#include <gpxe/features.h> +#include <gpxe/base64.h> +#include <gpxe/http.h> + +FEATURE ( FEATURE_PROTOCOL, "HTTP", DHCP_EB_FEATURE_HTTP, 1 ); + +/** HTTP receive state */ +enum http_rx_state { + HTTP_RX_RESPONSE = 0, + HTTP_RX_HEADER, + HTTP_RX_DATA, + HTTP_RX_DEAD, +}; + +/** + * An HTTP request + * + */ +struct http_request { + /** Reference count */ + struct refcnt refcnt; + /** Data transfer interface */ + struct xfer_interface xfer; + + /** URI being fetched */ + struct uri *uri; + /** Transport layer interface */ + struct xfer_interface socket; + + /** TX process */ + struct process process; + + /** HTTP response code */ + unsigned int response; + /** HTTP Content-Length */ + size_t content_length; + /** Received length */ + size_t rx_len; + /** RX state */ + enum http_rx_state rx_state; + /** Line buffer for received header lines */ + struct line_buffer linebuf; +}; + +/** + * Free HTTP request + * + * @v refcnt Reference counter + */ +static void http_free ( struct refcnt *refcnt ) { + struct http_request *http = + container_of ( refcnt, struct http_request, refcnt ); + + uri_put ( http->uri ); + empty_line_buffer ( &http->linebuf ); + free ( http ); +}; + +/** + * Mark HTTP request as complete + * + * @v http HTTP request + * @v rc Return status code + */ +static void http_done ( struct http_request *http, int rc ) { + + /* Prevent further processing of any current packet */ + http->rx_state = HTTP_RX_DEAD; + + /* If we had a Content-Length, and the received content length + * isn't correct, flag an error + */ + if ( http->content_length && + ( http->content_length != http->rx_len ) ) { + DBGC ( http, "HTTP %p incorrect length %zd, should be %zd\n", + http, http->rx_len, http->content_length ); + rc = -EIO; + } + + /* Remove process */ + process_del ( &http->process ); + + /* Close all data transfer interfaces */ + xfer_nullify ( &http->socket ); + xfer_close ( &http->socket, rc ); + xfer_nullify ( &http->xfer ); + xfer_close ( &http->xfer, rc ); +} + +/** + * Convert HTTP response code to return status code + * + * @v response HTTP response code + * @ret rc Return status code + */ +static int http_response_to_rc ( unsigned int response ) { + switch ( response ) { + case 200: + case 301: + case 302: + return 0; + case 404: + return -ENOENT; + case 403: + return -EPERM; + case 401: + return -EACCES; + default: + return -EIO; + } +} + +/** + * Handle HTTP response + * + * @v http HTTP request + * @v response HTTP response + * @ret rc Return status code + */ +static int http_rx_response ( struct http_request *http, char *response ) { + char *spc; + int rc; + + DBGC ( http, "HTTP %p response \"%s\"\n", http, response ); + + /* Check response starts with "HTTP/" */ + if ( strncmp ( response, "HTTP/", 5 ) != 0 ) + return -EIO; + + /* Locate and check response code */ + spc = strchr ( response, ' ' ); + if ( ! spc ) + return -EIO; + http->response = strtoul ( spc, NULL, 10 ); + if ( ( rc = http_response_to_rc ( http->response ) ) != 0 ) + return rc; + + /* Move to received headers */ + http->rx_state = HTTP_RX_HEADER; + return 0; +} + +/** + * Handle HTTP Location header + * + * @v http HTTP request + * @v value HTTP header value + * @ret rc Return status code + */ +static int http_rx_location ( struct http_request *http, const char *value ) { + int rc; + + /* Redirect to new location */ + DBGC ( http, "HTTP %p redirecting to %s\n", http, value ); + if ( ( rc = xfer_redirect ( &http->xfer, LOCATION_URI_STRING, + value ) ) != 0 ) { + DBGC ( http, "HTTP %p could not redirect: %s\n", + http, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Handle HTTP Content-Length header + * + * @v http HTTP request + * @v value HTTP header value + * @ret rc Return status code + */ +static int http_rx_content_length ( struct http_request *http, + const char *value ) { + char *endp; + + http->content_length = strtoul ( value, &endp, 10 ); + if ( *endp != '\0' ) { + DBGC ( http, "HTTP %p invalid Content-Length \"%s\"\n", + http, value ); + return -EIO; + } + + /* Use seek() to notify recipient of filesize */ + xfer_seek ( &http->xfer, http->content_length, SEEK_SET ); + xfer_seek ( &http->xfer, 0, SEEK_SET ); + + return 0; +} + +/** An HTTP header handler */ +struct http_header_handler { + /** Name (e.g. "Content-Length") */ + const char *header; + /** Handle received header + * + * @v http HTTP request + * @v value HTTP header value + * @ret rc Return status code + * + * If an error is returned, the download will be aborted. + */ + int ( * rx ) ( struct http_request *http, const char *value ); +}; + +/** List of HTTP header handlers */ +static struct http_header_handler http_header_handlers[] = { + { + .header = "Location", + .rx = http_rx_location, + }, + { + .header = "Content-Length", + .rx = http_rx_content_length, + }, + { NULL, NULL } +}; + +/** + * Handle HTTP header + * + * @v http HTTP request + * @v header HTTP header + * @ret rc Return status code + */ +static int http_rx_header ( struct http_request *http, char *header ) { + struct http_header_handler *handler; + char *separator; + char *value; + int rc; + + /* An empty header line marks the transition to the data phase */ + if ( ! header[0] ) { + DBGC ( http, "HTTP %p start of data\n", http ); + empty_line_buffer ( &http->linebuf ); + http->rx_state = HTTP_RX_DATA; + return 0; + } + + DBGC ( http, "HTTP %p header \"%s\"\n", http, header ); + + /* Split header at the ": " */ + separator = strstr ( header, ": " ); + if ( ! separator ) { + DBGC ( http, "HTTP %p malformed header\n", http ); + return -EIO; + } + *separator = '\0'; + value = ( separator + 2 ); + + /* Hand off to header handler, if one exists */ + for ( handler = http_header_handlers ; handler->header ; handler++ ) { + if ( strcasecmp ( header, handler->header ) == 0 ) { + if ( ( rc = handler->rx ( http, value ) ) != 0 ) + return rc; + break; + } + } + return 0; +} + +/** An HTTP line-based data handler */ +struct http_line_handler { + /** Handle line + * + * @v http HTTP request + * @v line Line to handle + * @ret rc Return status code + */ + int ( * rx ) ( struct http_request *http, char *line ); +}; + +/** List of HTTP line-based data handlers */ +static struct http_line_handler http_line_handlers[] = { + [HTTP_RX_RESPONSE] = { .rx = http_rx_response }, + [HTTP_RX_HEADER] = { .rx = http_rx_header }, +}; + +/** + * Handle new data arriving via HTTP connection in the data phase + * + * @v http HTTP request + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int http_rx_data ( struct http_request *http, + struct io_buffer *iobuf ) { + int rc; + + /* Update received length */ + http->rx_len += iob_len ( iobuf ); + + /* Hand off data buffer */ + if ( ( rc = xfer_deliver_iob ( &http->xfer, iobuf ) ) != 0 ) + return rc; + + /* If we have reached the content-length, stop now */ + if ( http->content_length && + ( http->rx_len >= http->content_length ) ) { + http_done ( http, 0 ); + } + + return 0; +} + +/** + * Handle new data arriving via HTTP connection + * + * @v socket Transport layer interface + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int http_socket_deliver_iob ( struct xfer_interface *socket, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + struct http_request *http = + container_of ( socket, struct http_request, socket ); + struct http_line_handler *lh; + char *line; + ssize_t len; + int rc = 0; + + while ( iob_len ( iobuf ) ) { + switch ( http->rx_state ) { + case HTTP_RX_DEAD: + /* Do no further processing */ + goto done; + case HTTP_RX_DATA: + /* Once we're into the data phase, just fill + * the data buffer + */ + rc = http_rx_data ( http, iob_disown ( iobuf ) ); + goto done; + case HTTP_RX_RESPONSE: + case HTTP_RX_HEADER: + /* In the other phases, buffer and process a + * line at a time + */ + len = line_buffer ( &http->linebuf, iobuf->data, + iob_len ( iobuf ) ); + if ( len < 0 ) { + rc = len; + DBGC ( http, "HTTP %p could not buffer line: " + "%s\n", http, strerror ( rc ) ); + goto done; + } + iob_pull ( iobuf, len ); + line = buffered_line ( &http->linebuf ); + if ( line ) { + lh = &http_line_handlers[http->rx_state]; + if ( ( rc = lh->rx ( http, line ) ) != 0 ) + goto done; + } + break; + default: + assert ( 0 ); + break; + } + } + + done: + if ( rc ) + http_done ( http, rc ); + free_iob ( iobuf ); + return rc; +} + +/** + * HTTP process + * + * @v process Process + */ +static void http_step ( struct process *process ) { + struct http_request *http = + container_of ( process, struct http_request, process ); + const char *path = http->uri->path; + const char *host = http->uri->host; + const char *query = http->uri->query; + const char *user = http->uri->user; + const char *password = + ( http->uri->password ? http->uri->password : "" ); + size_t user_pw_len = ( user ? ( strlen ( user ) + 1 /* ":" */ + + strlen ( password ) ) : 0 ); + size_t user_pw_base64_len = base64_encoded_len ( user_pw_len ); + char user_pw[ user_pw_len + 1 /* NUL */ ]; + char user_pw_base64[ user_pw_base64_len + 1 /* NUL */ ]; + int rc; + + if ( xfer_window ( &http->socket ) ) { + + /* We want to execute only once */ + process_del ( &http->process ); + + /* Construct authorisation, if applicable */ + if ( user ) { + char *buf = user_pw; + ssize_t remaining = sizeof ( user_pw ); + size_t len; + + /* URI-decode the username and password */ + len = uri_decode ( user, buf, remaining ); + buf += len; + remaining -= len; + *(remaining--, buf++) = ':'; + len = uri_decode ( password, buf, remaining ); + buf += len; + remaining -= len; + assert ( remaining >= 0 ); + + /* Base64-encode the "user:password" string */ + base64_encode ( user_pw, user_pw_base64 ); + } + + /* Send GET request */ + if ( ( rc = xfer_printf ( &http->socket, + "GET %s%s%s HTTP/1.0\r\n" + "User-Agent: gPXE/" VERSION "\r\n" + "%s%s%s" + "Host: %s\r\n" + "\r\n", + ( path ? path : "/" ), + ( query ? "?" : "" ), + ( query ? query : "" ), + ( user ? + "Authorization: Basic " : "" ), + ( user ? user_pw_base64 : "" ), + ( user ? "\r\n" : "" ), + host ) ) != 0 ) { + http_done ( http, rc ); + } + } +} + +/** + * HTTP connection closed by network stack + * + * @v socket Transport layer interface + * @v rc Reason for close + */ +static void http_socket_close ( struct xfer_interface *socket, int rc ) { + struct http_request *http = + container_of ( socket, struct http_request, socket ); + + DBGC ( http, "HTTP %p socket closed: %s\n", + http, strerror ( rc ) ); + + http_done ( http, rc ); +} + +/** HTTP socket operations */ +static struct xfer_interface_operations http_socket_operations = { + .close = http_socket_close, + .vredirect = xfer_vreopen, + .window = unlimited_xfer_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = http_socket_deliver_iob, + .deliver_raw = xfer_deliver_as_iob, +}; + +/** + * Close HTTP data transfer interface + * + * @v xfer Data transfer interface + * @v rc Reason for close + */ +static void http_xfer_close ( struct xfer_interface *xfer, int rc ) { + struct http_request *http = + container_of ( xfer, struct http_request, xfer ); + + DBGC ( http, "HTTP %p interface closed: %s\n", + http, strerror ( rc ) ); + + http_done ( http, rc ); +} + +/** HTTP data transfer interface operations */ +static struct xfer_interface_operations http_xfer_operations = { + .close = http_xfer_close, + .vredirect = ignore_xfer_vredirect, + .window = unlimited_xfer_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = xfer_deliver_as_raw, + .deliver_raw = ignore_xfer_deliver_raw, +}; + +/** + * Initiate an HTTP connection, with optional filter + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @v default_port Default port number + * @v filter Filter to apply to socket, or NULL + * @ret rc Return status code + */ +int http_open_filter ( struct xfer_interface *xfer, struct uri *uri, + unsigned int default_port, + int ( * filter ) ( struct xfer_interface *xfer, + struct xfer_interface **next ) ) { + struct http_request *http; + struct sockaddr_tcpip server; + struct xfer_interface *socket; + int rc; + + /* Sanity checks */ + if ( ! uri->host ) + return -EINVAL; + + /* Allocate and populate HTTP structure */ + http = zalloc ( sizeof ( *http ) ); + if ( ! http ) + return -ENOMEM; + http->refcnt.free = http_free; + xfer_init ( &http->xfer, &http_xfer_operations, &http->refcnt ); + http->uri = uri_get ( uri ); + xfer_init ( &http->socket, &http_socket_operations, &http->refcnt ); + process_init ( &http->process, http_step, &http->refcnt ); + + /* Open socket */ + memset ( &server, 0, sizeof ( server ) ); + server.st_port = htons ( uri_port ( http->uri, default_port ) ); + socket = &http->socket; + if ( filter ) { + if ( ( rc = filter ( socket, &socket ) ) != 0 ) + goto err; + } + if ( ( rc = xfer_open_named_socket ( socket, SOCK_STREAM, + ( struct sockaddr * ) &server, + uri->host, NULL ) ) != 0 ) + goto err; + + /* Attach to parent interface, mortalise self, and return */ + xfer_plug_plug ( &http->xfer, xfer ); + ref_put ( &http->refcnt ); + return 0; + + err: + DBGC ( http, "HTTP %p could not create request: %s\n", + http, strerror ( rc ) ); + http_done ( http, rc ); + ref_put ( &http->refcnt ); + return rc; +} + +/** + * Initiate an HTTP connection + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int http_open ( struct xfer_interface *xfer, struct uri *uri ) { + return http_open_filter ( xfer, uri, HTTP_PORT, NULL ); +} + +/** HTTP URI opener */ +struct uri_opener http_uri_opener __uri_opener = { + .scheme = "http", + .open = http_open, +}; diff --git a/debian/grub-extras/disabled/gpxe/src/net/tcp/https.c b/debian/grub-extras/disabled/gpxe/src/net/tcp/https.c new file mode 100644 index 0000000..7a2961f --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/tcp/https.c @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** + * @file + * + * Secure Hyper Text Transfer Protocol (HTTPS) + * + */ + +#include <stddef.h> +#include <gpxe/open.h> +#include <gpxe/tls.h> +#include <gpxe/http.h> +#include <gpxe/features.h> + +FEATURE ( FEATURE_PROTOCOL, "HTTPS", DHCP_EB_FEATURE_HTTPS, 1 ); + +/** + * Initiate an HTTPS connection + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int https_open ( struct xfer_interface *xfer, struct uri *uri ) { + return http_open_filter ( xfer, uri, HTTPS_PORT, add_tls ); +} + +/** HTTPS URI opener */ +struct uri_opener https_uri_opener __uri_opener = { + .scheme = "https", + .open = https_open, +}; diff --git a/debian/grub-extras/disabled/gpxe/src/net/tcp/iscsi.c b/debian/grub-extras/disabled/gpxe/src/net/tcp/iscsi.c new file mode 100644 index 0000000..771384b --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/tcp/iscsi.c @@ -0,0 +1,1934 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stddef.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <assert.h> +#include <byteswap.h> +#include <gpxe/vsprintf.h> +#include <gpxe/socket.h> +#include <gpxe/xfer.h> +#include <gpxe/open.h> +#include <gpxe/scsi.h> +#include <gpxe/process.h> +#include <gpxe/uaccess.h> +#include <gpxe/tcpip.h> +#include <gpxe/settings.h> +#include <gpxe/features.h> +#include <gpxe/iscsi.h> + +/** @file + * + * iSCSI protocol + * + */ + +FEATURE ( FEATURE_PROTOCOL, "iSCSI", DHCP_EB_FEATURE_ISCSI, 1 ); + +/** iSCSI initiator name (explicitly specified) */ +static char *iscsi_explicit_initiator_iqn; + +/** Default iSCSI initiator name (constructed from hostname) */ +static char *iscsi_default_initiator_iqn; + +/** iSCSI initiator username */ +static char *iscsi_initiator_username; + +/** iSCSI initiator password */ +static char *iscsi_initiator_password; + +/** iSCSI target username */ +static char *iscsi_target_username; + +/** iSCSI target password */ +static char *iscsi_target_password; + +static void iscsi_start_tx ( struct iscsi_session *iscsi ); +static void iscsi_start_login ( struct iscsi_session *iscsi ); +static void iscsi_start_data_out ( struct iscsi_session *iscsi, + unsigned int datasn ); + +/** + * Finish receiving PDU data into buffer + * + * @v iscsi iSCSI session + */ +static void iscsi_rx_buffered_data_done ( struct iscsi_session *iscsi ) { + free ( iscsi->rx_buffer ); + iscsi->rx_buffer = NULL; +} + +/** + * Free iSCSI session + * + * @v refcnt Reference counter + */ +static void iscsi_free ( struct refcnt *refcnt ) { + struct iscsi_session *iscsi = + container_of ( refcnt, struct iscsi_session, refcnt ); + + free ( iscsi->target_address ); + free ( iscsi->target_iqn ); + free ( iscsi->initiator_username ); + free ( iscsi->initiator_password ); + free ( iscsi->target_username ); + free ( iscsi->target_password ); + chap_finish ( &iscsi->chap ); + iscsi_rx_buffered_data_done ( iscsi ); + free ( iscsi ); +} + +/** + * Open iSCSI transport-layer connection + * + * @v iscsi iSCSI session + * @ret rc Return status code + */ +static int iscsi_open_connection ( struct iscsi_session *iscsi ) { + struct sockaddr_tcpip target; + int rc; + + assert ( iscsi->tx_state == ISCSI_TX_IDLE ); + assert ( iscsi->rx_state == ISCSI_RX_BHS ); + assert ( iscsi->rx_offset == 0 ); + + /* Open socket */ + memset ( &target, 0, sizeof ( target ) ); + target.st_port = htons ( iscsi->target_port ); + if ( ( rc = xfer_open_named_socket ( &iscsi->socket, SOCK_STREAM, + ( struct sockaddr * ) &target, + iscsi->target_address, + NULL ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not open socket: %s\n", + iscsi, strerror ( rc ) ); + return rc; + } + + /* Enter security negotiation phase */ + iscsi->status = ( ISCSI_STATUS_SECURITY_NEGOTIATION_PHASE | + ISCSI_STATUS_STRINGS_SECURITY ); + if ( iscsi->target_username ) + iscsi->status |= ISCSI_STATUS_AUTH_REVERSE_REQUIRED; + + /* Assign fresh initiator task tag */ + iscsi->itt++; + + /* Initiate login */ + iscsi_start_login ( iscsi ); + + return 0; +} + +/** + * Close iSCSI transport-layer connection + * + * @v iscsi iSCSI session + * @v rc Reason for close + * + * Closes the transport-layer connection and resets the session state + * ready to attempt a fresh login. + */ +static void iscsi_close_connection ( struct iscsi_session *iscsi, int rc ) { + + /* Close all data transfer interfaces */ + xfer_close ( &iscsi->socket, rc ); + + /* Clear connection status */ + iscsi->status = 0; + + /* Reset TX and RX state machines */ + iscsi->tx_state = ISCSI_TX_IDLE; + iscsi->rx_state = ISCSI_RX_BHS; + iscsi->rx_offset = 0; + + /* Free any temporary dynamically allocated memory */ + chap_finish ( &iscsi->chap ); + iscsi_rx_buffered_data_done ( iscsi ); +} + +/** + * Mark iSCSI SCSI operation as complete + * + * @v iscsi iSCSI session + * @v rc Return status code + * + * Note that iscsi_scsi_done() will not close the connection, and must + * therefore be called only when the internal state machines are in an + * appropriate state, otherwise bad things may happen on the next call + * to iscsi_issue(). The general rule is to call iscsi_scsi_done() + * only at the end of receiving a PDU; at this point the TX and RX + * engines should both be idle. + */ +static void iscsi_scsi_done ( struct iscsi_session *iscsi, int rc ) { + + assert ( iscsi->tx_state == ISCSI_TX_IDLE ); + assert ( iscsi->command != NULL ); + + iscsi->command->rc = rc; + iscsi->command = NULL; +} + +/**************************************************************************** + * + * iSCSI SCSI command issuing + * + */ + +/** + * Build iSCSI SCSI command BHS + * + * @v iscsi iSCSI session + * + * We don't currently support bidirectional commands (i.e. with both + * Data-In and Data-Out segments); these would require providing code + * to generate an AHS, and there doesn't seem to be any need for it at + * the moment. + */ +static void iscsi_start_command ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_scsi_command *command = &iscsi->tx_bhs.scsi_command; + + assert ( ! ( iscsi->command->data_in && iscsi->command->data_out ) ); + + /* Construct BHS and initiate transmission */ + iscsi_start_tx ( iscsi ); + command->opcode = ISCSI_OPCODE_SCSI_COMMAND; + command->flags = ( ISCSI_FLAG_FINAL | + ISCSI_COMMAND_ATTR_SIMPLE ); + if ( iscsi->command->data_in ) + command->flags |= ISCSI_COMMAND_FLAG_READ; + if ( iscsi->command->data_out ) + command->flags |= ISCSI_COMMAND_FLAG_WRITE; + /* lengths left as zero */ + command->lun = iscsi->lun; + command->itt = htonl ( ++iscsi->itt ); + command->exp_len = htonl ( iscsi->command->data_in_len | + iscsi->command->data_out_len ); + command->cmdsn = htonl ( iscsi->cmdsn ); + command->expstatsn = htonl ( iscsi->statsn + 1 ); + memcpy ( &command->cdb, &iscsi->command->cdb, sizeof ( command->cdb )); + DBGC2 ( iscsi, "iSCSI %p start " SCSI_CDB_FORMAT " %s %#zx\n", + iscsi, SCSI_CDB_DATA ( command->cdb ), + ( iscsi->command->data_in ? "in" : "out" ), + ( iscsi->command->data_in ? + iscsi->command->data_in_len : + iscsi->command->data_out_len ) ); +} + +/** + * Receive data segment of an iSCSI SCSI response PDU + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + */ +static int iscsi_rx_scsi_response ( struct iscsi_session *iscsi, + const void *data, size_t len, + size_t remaining ) { + struct iscsi_bhs_scsi_response *response + = &iscsi->rx_bhs.scsi_response; + int sense_offset; + + /* Capture the sense response code as it floats past, if present */ + sense_offset = ISCSI_SENSE_RESPONSE_CODE_OFFSET - iscsi->rx_offset; + if ( ( sense_offset >= 0 ) && len ) { + iscsi->command->sense_response = + * ( ( char * ) data + sense_offset ); + } + + /* Wait for whole SCSI response to arrive */ + if ( remaining ) + return 0; + + /* Record SCSI status code */ + iscsi->command->status = response->status; + + /* Check for errors */ + if ( response->response != ISCSI_RESPONSE_COMMAND_COMPLETE ) + return -EIO; + + /* Mark as completed */ + iscsi_scsi_done ( iscsi, 0 ); + return 0; +} + +/** + * Receive data segment of an iSCSI data-in PDU + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + */ +static int iscsi_rx_data_in ( struct iscsi_session *iscsi, + const void *data, size_t len, + size_t remaining ) { + struct iscsi_bhs_data_in *data_in = &iscsi->rx_bhs.data_in; + unsigned long offset; + + /* Copy data to data-in buffer */ + offset = ntohl ( data_in->offset ) + iscsi->rx_offset; + assert ( iscsi->command != NULL ); + assert ( iscsi->command->data_in ); + assert ( ( offset + len ) <= iscsi->command->data_in_len ); + copy_to_user ( iscsi->command->data_in, offset, data, len ); + + /* Wait for whole SCSI response to arrive */ + if ( remaining ) + return 0; + + /* Mark as completed if status is present */ + if ( data_in->flags & ISCSI_DATA_FLAG_STATUS ) { + assert ( ( offset + len ) == iscsi->command->data_in_len ); + assert ( data_in->flags & ISCSI_FLAG_FINAL ); + iscsi->command->status = data_in->status; + /* iSCSI cannot return an error status via a data-in */ + iscsi_scsi_done ( iscsi, 0 ); + } + + return 0; +} + +/** + * Receive data segment of an iSCSI R2T PDU + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + */ +static int iscsi_rx_r2t ( struct iscsi_session *iscsi, + const void *data __unused, size_t len __unused, + size_t remaining __unused ) { + struct iscsi_bhs_r2t *r2t = &iscsi->rx_bhs.r2t; + + /* Record transfer parameters and trigger first data-out */ + iscsi->ttt = ntohl ( r2t->ttt ); + iscsi->transfer_offset = ntohl ( r2t->offset ); + iscsi->transfer_len = ntohl ( r2t->len ); + iscsi_start_data_out ( iscsi, 0 ); + + return 0; +} + +/** + * Build iSCSI data-out BHS + * + * @v iscsi iSCSI session + * @v datasn Data sequence number within the transfer + * + */ +static void iscsi_start_data_out ( struct iscsi_session *iscsi, + unsigned int datasn ) { + struct iscsi_bhs_data_out *data_out = &iscsi->tx_bhs.data_out; + unsigned long offset; + unsigned long remaining; + unsigned long len; + + /* We always send 512-byte Data-Out PDUs; this removes the + * need to worry about the target's MaxRecvDataSegmentLength. + */ + offset = datasn * 512; + remaining = iscsi->transfer_len - offset; + len = remaining; + if ( len > 512 ) + len = 512; + + /* Construct BHS and initiate transmission */ + iscsi_start_tx ( iscsi ); + data_out->opcode = ISCSI_OPCODE_DATA_OUT; + if ( len == remaining ) + data_out->flags = ( ISCSI_FLAG_FINAL ); + ISCSI_SET_LENGTHS ( data_out->lengths, 0, len ); + data_out->lun = iscsi->lun; + data_out->itt = htonl ( iscsi->itt ); + data_out->ttt = htonl ( iscsi->ttt ); + data_out->expstatsn = htonl ( iscsi->statsn + 1 ); + data_out->datasn = htonl ( datasn ); + data_out->offset = htonl ( iscsi->transfer_offset + offset ); + DBGC ( iscsi, "iSCSI %p start data out DataSN %#x len %#lx\n", + iscsi, datasn, len ); +} + +/** + * Complete iSCSI data-out PDU transmission + * + * @v iscsi iSCSI session + * + */ +static void iscsi_data_out_done ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_data_out *data_out = &iscsi->tx_bhs.data_out; + + /* If we haven't reached the end of the sequence, start + * sending the next data-out PDU. + */ + if ( ! ( data_out->flags & ISCSI_FLAG_FINAL ) ) + iscsi_start_data_out ( iscsi, ntohl ( data_out->datasn ) + 1 ); +} + +/** + * Send iSCSI data-out data segment + * + * @v iscsi iSCSI session + * @ret rc Return status code + */ +static int iscsi_tx_data_out ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_data_out *data_out = &iscsi->tx_bhs.data_out; + struct io_buffer *iobuf; + unsigned long offset; + size_t len; + + offset = ntohl ( data_out->offset ); + len = ISCSI_DATA_LEN ( data_out->lengths ); + + assert ( iscsi->command != NULL ); + assert ( iscsi->command->data_out ); + assert ( ( offset + len ) <= iscsi->command->data_out_len ); + + iobuf = xfer_alloc_iob ( &iscsi->socket, len ); + if ( ! iobuf ) + return -ENOMEM; + + copy_from_user ( iob_put ( iobuf, len ), + iscsi->command->data_out, offset, len ); + + return xfer_deliver_iob ( &iscsi->socket, iobuf ); +} + +/**************************************************************************** + * + * iSCSI login + * + */ + +/** + * Build iSCSI login request strings + * + * @v iscsi iSCSI session + * + * These are the initial set of strings sent in the first login + * request PDU. We want the following settings: + * + * HeaderDigest=None + * DataDigest=None + * MaxConnections is irrelevant; we make only one connection anyway + * InitialR2T=Yes [1] + * ImmediateData is irrelevant; we never send immediate data + * MaxRecvDataSegmentLength=8192 (default; we don't care) [3] + * MaxBurstLength=262144 (default; we don't care) [3] + * FirstBurstLength=262144 (default; we don't care) + * DefaultTime2Wait=0 [2] + * DefaultTime2Retain=0 [2] + * MaxOutstandingR2T=1 + * DataPDUInOrder=Yes + * DataSequenceInOrder=Yes + * ErrorRecoveryLevel=0 + * + * [1] InitialR2T has an OR resolution function, so the target may + * force us to use it. We therefore simplify our logic by always + * using it. + * + * [2] These ensure that we can safely start a new task once we have + * reconnected after a failure, without having to manually tidy up + * after the old one. + * + * [3] We are quite happy to use the RFC-defined default values for + * these parameters, but some targets (notably OpenSolaris) + * incorrectly assume a default value of zero, so we explicitly + * specify the default values. + */ +static int iscsi_build_login_request_strings ( struct iscsi_session *iscsi, + void *data, size_t len ) { + unsigned int used = 0; + unsigned int i; + const char *auth_method; + + if ( iscsi->status & ISCSI_STATUS_STRINGS_SECURITY ) { + /* Default to allowing no authentication */ + auth_method = "None"; + /* If we have a credential to supply, permit CHAP */ + if ( iscsi->initiator_username ) + auth_method = "CHAP,None"; + /* If we have a credential to check, force CHAP */ + if ( iscsi->target_username ) + auth_method = "CHAP"; + used += ssnprintf ( data + used, len - used, + "InitiatorName=%s%c" + "TargetName=%s%c" + "SessionType=Normal%c" + "AuthMethod=%s%c", + iscsi_initiator_iqn(), 0, + iscsi->target_iqn, 0, 0, + auth_method, 0 ); + } + + if ( iscsi->status & ISCSI_STATUS_STRINGS_CHAP_ALGORITHM ) { + used += ssnprintf ( data + used, len - used, "CHAP_A=5%c", 0 ); + } + + if ( ( iscsi->status & ISCSI_STATUS_STRINGS_CHAP_RESPONSE ) ) { + assert ( iscsi->initiator_username != NULL ); + used += ssnprintf ( data + used, len - used, + "CHAP_N=%s%cCHAP_R=0x", + iscsi->initiator_username, 0 ); + for ( i = 0 ; i < iscsi->chap.response_len ; i++ ) { + used += ssnprintf ( data + used, len - used, "%02x", + iscsi->chap.response[i] ); + } + used += ssnprintf ( data + used, len - used, "%c", 0 ); + } + + if ( ( iscsi->status & ISCSI_STATUS_STRINGS_CHAP_CHALLENGE ) ) { + used += ssnprintf ( data + used, len - used, + "CHAP_I=%d%cCHAP_C=0x", + iscsi->chap_challenge[0], 0 ); + for ( i = 1 ; i < sizeof ( iscsi->chap_challenge ) ; i++ ) { + used += ssnprintf ( data + used, len - used, "%02x", + iscsi->chap_challenge[i] ); + } + used += ssnprintf ( data + used, len - used, "%c", 0 ); + } + + if ( iscsi->status & ISCSI_STATUS_STRINGS_OPERATIONAL ) { + used += ssnprintf ( data + used, len - used, + "HeaderDigest=None%c" + "DataDigest=None%c" + "InitialR2T=Yes%c" + "MaxRecvDataSegmentLength=8192%c" + "MaxBurstLength=262144%c" + "DefaultTime2Wait=0%c" + "DefaultTime2Retain=0%c" + "MaxOutstandingR2T=1%c" + "DataPDUInOrder=Yes%c" + "DataSequenceInOrder=Yes%c" + "ErrorRecoveryLevel=0%c", + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ); + } + + return used; +} + +/** + * Build iSCSI login request BHS + * + * @v iscsi iSCSI session + */ +static void iscsi_start_login ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_login_request *request = &iscsi->tx_bhs.login_request; + int len; + + /* Construct BHS and initiate transmission */ + iscsi_start_tx ( iscsi ); + request->opcode = ( ISCSI_OPCODE_LOGIN_REQUEST | + ISCSI_FLAG_IMMEDIATE ); + request->flags = ( ( iscsi->status & ISCSI_STATUS_PHASE_MASK ) | + ISCSI_LOGIN_FLAG_TRANSITION ); + /* version_max and version_min left as zero */ + len = iscsi_build_login_request_strings ( iscsi, NULL, 0 ); + ISCSI_SET_LENGTHS ( request->lengths, 0, len ); + request->isid_iana_en = htonl ( ISCSI_ISID_IANA | + IANA_EN_FEN_SYSTEMS ); + /* isid_iana_qual left as zero */ + request->tsih = htons ( iscsi->tsih ); + request->itt = htonl ( iscsi->itt ); + /* cid left as zero */ + request->cmdsn = htonl ( iscsi->cmdsn ); + request->expstatsn = htonl ( iscsi->statsn + 1 ); +} + +/** + * Complete iSCSI login request PDU transmission + * + * @v iscsi iSCSI session + * + */ +static void iscsi_login_request_done ( struct iscsi_session *iscsi ) { + + /* Clear any "strings to send" flags */ + iscsi->status &= ~ISCSI_STATUS_STRINGS_MASK; + + /* Free any dynamically allocated storage used for login */ + chap_finish ( &iscsi->chap ); +} + +/** + * Transmit data segment of an iSCSI login request PDU + * + * @v iscsi iSCSI session + * @ret rc Return status code + * + * For login requests, the data segment consists of the login strings. + */ +static int iscsi_tx_login_request ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_login_request *request = &iscsi->tx_bhs.login_request; + struct io_buffer *iobuf; + size_t len; + + len = ISCSI_DATA_LEN ( request->lengths ); + iobuf = xfer_alloc_iob ( &iscsi->socket, len ); + if ( ! iobuf ) + return -ENOMEM; + iob_put ( iobuf, len ); + iscsi_build_login_request_strings ( iscsi, iobuf->data, len ); + return xfer_deliver_iob ( &iscsi->socket, iobuf ); +} + +/** + * Handle iSCSI TargetAddress text value + * + * @v iscsi iSCSI session + * @v value TargetAddress value + * @ret rc Return status code + */ +static int iscsi_handle_targetaddress_value ( struct iscsi_session *iscsi, + const char *value ) { + char *separator; + + DBGC ( iscsi, "iSCSI %p will redirect to %s\n", iscsi, value ); + + /* Replace target address */ + free ( iscsi->target_address ); + iscsi->target_address = strdup ( value ); + if ( ! iscsi->target_address ) + return -ENOMEM; + + /* Replace target port */ + iscsi->target_port = htons ( ISCSI_PORT ); + separator = strchr ( iscsi->target_address, ':' ); + if ( separator ) { + *separator = '\0'; + iscsi->target_port = strtoul ( ( separator + 1 ), NULL, 0 ); + } + + return 0; +} + +/** + * Handle iSCSI AuthMethod text value + * + * @v iscsi iSCSI session + * @v value AuthMethod value + * @ret rc Return status code + */ +static int iscsi_handle_authmethod_value ( struct iscsi_session *iscsi, + const char *value ) { + + /* If server requests CHAP, send the CHAP_A string */ + if ( strcmp ( value, "CHAP" ) == 0 ) { + DBGC ( iscsi, "iSCSI %p initiating CHAP authentication\n", + iscsi ); + iscsi->status |= ( ISCSI_STATUS_STRINGS_CHAP_ALGORITHM | + ISCSI_STATUS_AUTH_FORWARD_REQUIRED ); + } + + return 0; +} + +/** + * Handle iSCSI CHAP_A text value + * + * @v iscsi iSCSI session + * @v value CHAP_A value + * @ret rc Return status code + */ +static int iscsi_handle_chap_a_value ( struct iscsi_session *iscsi, + const char *value ) { + + /* We only ever offer "5" (i.e. MD5) as an algorithm, so if + * the server responds with anything else it is a protocol + * violation. + */ + if ( strcmp ( value, "5" ) != 0 ) { + DBGC ( iscsi, "iSCSI %p got invalid CHAP algorithm \"%s\"\n", + iscsi, value ); + return -EPROTO; + } + + return 0; +} + +/** + * Handle iSCSI CHAP_I text value + * + * @v iscsi iSCSI session + * @v value CHAP_I value + * @ret rc Return status code + */ +static int iscsi_handle_chap_i_value ( struct iscsi_session *iscsi, + const char *value ) { + unsigned int identifier; + char *endp; + int rc; + + /* The CHAP identifier is an integer value */ + identifier = strtoul ( value, &endp, 0 ); + if ( *endp != '\0' ) { + DBGC ( iscsi, "iSCSI %p saw invalid CHAP identifier \"%s\"\n", + iscsi, value ); + return -EPROTO; + } + + /* Prepare for CHAP with MD5 */ + chap_finish ( &iscsi->chap ); + if ( ( rc = chap_init ( &iscsi->chap, &md5_algorithm ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not initialise CHAP: %s\n", + iscsi, strerror ( rc ) ); + return rc; + } + + /* Identifier and secret are the first two components of the + * challenge. + */ + chap_set_identifier ( &iscsi->chap, identifier ); + if ( iscsi->initiator_password ) { + chap_update ( &iscsi->chap, iscsi->initiator_password, + strlen ( iscsi->initiator_password ) ); + } + + return 0; +} + +/** + * Handle iSCSI CHAP_C text value + * + * @v iscsi iSCSI session + * @v value CHAP_C value + * @ret rc Return status code + */ +static int iscsi_handle_chap_c_value ( struct iscsi_session *iscsi, + const char *value ) { + char buf[3]; + char *endp; + uint8_t byte; + unsigned int i; + + /* Check and strip leading "0x" */ + if ( ( value[0] != '0' ) || ( value[1] != 'x' ) ) { + DBGC ( iscsi, "iSCSI %p saw invalid CHAP challenge \"%s\"\n", + iscsi, value ); + return -EPROTO; + } + value += 2; + + /* Process challenge an octet at a time */ + for ( ; ( value[0] && value[1] ) ; value += 2 ) { + memcpy ( buf, value, 2 ); + buf[2] = 0; + byte = strtoul ( buf, &endp, 16 ); + if ( *endp != '\0' ) { + DBGC ( iscsi, "iSCSI %p saw invalid CHAP challenge " + "byte \"%s\"\n", iscsi, buf ); + return -EPROTO; + } + chap_update ( &iscsi->chap, &byte, sizeof ( byte ) ); + } + + /* Build CHAP response */ + DBGC ( iscsi, "iSCSI %p sending CHAP response\n", iscsi ); + chap_respond ( &iscsi->chap ); + iscsi->status |= ISCSI_STATUS_STRINGS_CHAP_RESPONSE; + + /* Send CHAP challenge, if applicable */ + if ( iscsi->target_username ) { + iscsi->status |= ISCSI_STATUS_STRINGS_CHAP_CHALLENGE; + /* Generate CHAP challenge data */ + for ( i = 0 ; i < sizeof ( iscsi->chap_challenge ) ; i++ ) { + iscsi->chap_challenge[i] = random(); + } + } + + return 0; +} + +/** + * Handle iSCSI CHAP_N text value + * + * @v iscsi iSCSI session + * @v value CHAP_N value + * @ret rc Return status code + */ +static int iscsi_handle_chap_n_value ( struct iscsi_session *iscsi, + const char *value ) { + + /* The target username isn't actually involved at any point in + * the authentication process; it merely serves to identify + * which password the target is using to generate the CHAP + * response. We unnecessarily verify that the username is as + * expected, in order to provide mildly helpful diagnostics if + * the target is supplying the wrong username/password + * combination. + */ + if ( iscsi->target_username && + ( strcmp ( iscsi->target_username, value ) != 0 ) ) { + DBGC ( iscsi, "iSCSI %p target username \"%s\" incorrect " + "(wanted \"%s\")\n", + iscsi, value, iscsi->target_username ); + return -EACCES; + } + + return 0; +} + +/** + * Handle iSCSI CHAP_R text value + * + * @v iscsi iSCSI session + * @v value CHAP_R value + * @ret rc Return status code + */ +static int iscsi_handle_chap_r_value ( struct iscsi_session *iscsi, + const char *value ) { + char buf[3]; + char *endp; + uint8_t byte; + unsigned int i; + int rc; + + /* Generate CHAP response for verification */ + chap_finish ( &iscsi->chap ); + if ( ( rc = chap_init ( &iscsi->chap, &md5_algorithm ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not initialise CHAP: %s\n", + iscsi, strerror ( rc ) ); + return rc; + } + chap_set_identifier ( &iscsi->chap, iscsi->chap_challenge[0] ); + if ( iscsi->target_password ) { + chap_update ( &iscsi->chap, iscsi->target_password, + strlen ( iscsi->target_password ) ); + } + chap_update ( &iscsi->chap, &iscsi->chap_challenge[1], + ( sizeof ( iscsi->chap_challenge ) - 1 ) ); + chap_respond ( &iscsi->chap ); + + /* Check and strip leading "0x" */ + if ( ( value[0] != '0' ) || ( value[1] != 'x' ) ) { + DBGC ( iscsi, "iSCSI %p saw invalid CHAP response \"%s\"\n", + iscsi, value ); + return -EPROTO; + } + value += 2; + + /* Check CHAP response length */ + if ( strlen ( value ) != ( 2 * iscsi->chap.response_len ) ) { + DBGC ( iscsi, "iSCSI %p invalid CHAP response length\n", + iscsi ); + return -EPROTO; + } + + /* Process response an octet at a time */ + for ( i = 0 ; ( value[0] && value[1] ) ; value += 2, i++ ) { + memcpy ( buf, value, 2 ); + buf[2] = 0; + byte = strtoul ( buf, &endp, 16 ); + if ( *endp != '\0' ) { + DBGC ( iscsi, "iSCSI %p saw invalid CHAP response " + "byte \"%s\"\n", iscsi, buf ); + return -EPROTO; + } + if ( byte != iscsi->chap.response[i] ) { + DBGC ( iscsi, "iSCSI %p saw incorrect CHAP " + "response\n", iscsi ); + return -EACCES; + } + } + assert ( i == iscsi->chap.response_len ); + + /* Mark session as authenticated */ + iscsi->status |= ISCSI_STATUS_AUTH_REVERSE_OK; + + return 0; +} + +/** An iSCSI text string that we want to handle */ +struct iscsi_string_type { + /** String key + * + * This is the portion up to and including the "=" sign, + * e.g. "InitiatorName=", "CHAP_A=", etc. + */ + const char *key; + /** Handle iSCSI string value + * + * @v iscsi iSCSI session + * @v value iSCSI string value + * @ret rc Return status code + */ + int ( * handle ) ( struct iscsi_session *iscsi, const char *value ); +}; + +/** iSCSI text strings that we want to handle */ +static struct iscsi_string_type iscsi_string_types[] = { + { "TargetAddress=", iscsi_handle_targetaddress_value }, + { "AuthMethod=", iscsi_handle_authmethod_value }, + { "CHAP_A=", iscsi_handle_chap_a_value }, + { "CHAP_I=", iscsi_handle_chap_i_value }, + { "CHAP_C=", iscsi_handle_chap_c_value }, + { "CHAP_N=", iscsi_handle_chap_n_value }, + { "CHAP_R=", iscsi_handle_chap_r_value }, + { NULL, NULL } +}; + +/** + * Handle iSCSI string + * + * @v iscsi iSCSI session + * @v string iSCSI string (in "key=value" format) + * @ret rc Return status code + */ +static int iscsi_handle_string ( struct iscsi_session *iscsi, + const char *string ) { + struct iscsi_string_type *type; + size_t key_len; + int rc; + + for ( type = iscsi_string_types ; type->key ; type++ ) { + key_len = strlen ( type->key ); + if ( strncmp ( string, type->key, key_len ) != 0 ) + continue; + DBGC ( iscsi, "iSCSI %p handling %s\n", iscsi, string ); + if ( ( rc = type->handle ( iscsi, + ( string + key_len ) ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not handle %s: %s\n", + iscsi, string, strerror ( rc ) ); + return rc; + } + return 0; + } + DBGC ( iscsi, "iSCSI %p ignoring %s\n", iscsi, string ); + return 0; +} + +/** + * Handle iSCSI strings + * + * @v iscsi iSCSI session + * @v string iSCSI string buffer + * @v len Length of string buffer + * @ret rc Return status code + */ +static int iscsi_handle_strings ( struct iscsi_session *iscsi, + const char *strings, size_t len ) { + size_t string_len; + int rc; + + /* Handle each string in turn, taking care not to overrun the + * data buffer in case of badly-terminated data. + */ + while ( 1 ) { + string_len = ( strnlen ( strings, len ) + 1 ); + if ( string_len > len ) + break; + if ( ( rc = iscsi_handle_string ( iscsi, strings ) ) != 0 ) + return rc; + strings += string_len; + len -= string_len; + } + return 0; +} + +/** + * Receive PDU data into buffer + * + * @v iscsi iSCSI session + * @v data Data to receive + * @v len Length of data + * @ret rc Return status code + * + * This can be used when the RX PDU type handler wishes to buffer up + * all received data and process the PDU as a single unit. The caller + * is repsonsible for calling iscsi_rx_buffered_data_done() after + * processing the data. + */ +static int iscsi_rx_buffered_data ( struct iscsi_session *iscsi, + const void *data, size_t len ) { + + /* Allocate buffer on first call */ + if ( ! iscsi->rx_buffer ) { + iscsi->rx_buffer = malloc ( iscsi->rx_len ); + if ( ! iscsi->rx_buffer ) + return -ENOMEM; + } + + /* Copy data to buffer */ + assert ( ( iscsi->rx_offset + len ) <= iscsi->rx_len ); + memcpy ( ( iscsi->rx_buffer + iscsi->rx_offset ), data, len ); + + return 0; +} + +/** + * Convert iSCSI response status to return status code + * + * @v status_class iSCSI status class + * @v status_detail iSCSI status detail + * @ret rc Return status code + */ +static int iscsi_status_to_rc ( unsigned int status_class, + unsigned int status_detail ) { + switch ( status_class ) { + case ISCSI_STATUS_INITIATOR_ERROR : + switch ( status_detail ) { + case ISCSI_STATUS_INITIATOR_ERROR_AUTHENTICATION : + return -EACCES; + case ISCSI_STATUS_INITIATOR_ERROR_AUTHORISATION : + return -EPERM; + case ISCSI_STATUS_INITIATOR_ERROR_NOT_FOUND : + case ISCSI_STATUS_INITIATOR_ERROR_REMOVED : + return -ENODEV; + default : + return -ENOTSUP; + } + case ISCSI_STATUS_TARGET_ERROR : + return -EIO; + default : + return -EINVAL; + } +} + +/** + * Receive data segment of an iSCSI login response PDU + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + */ +static int iscsi_rx_login_response ( struct iscsi_session *iscsi, + const void *data, size_t len, + size_t remaining ) { + struct iscsi_bhs_login_response *response + = &iscsi->rx_bhs.login_response; + int rc; + + /* Buffer up the PDU data */ + if ( ( rc = iscsi_rx_buffered_data ( iscsi, data, len ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not buffer login response: %s\n", + iscsi, strerror ( rc ) ); + return rc; + } + if ( remaining ) + return 0; + + /* Process string data and discard string buffer */ + if ( ( rc = iscsi_handle_strings ( iscsi, iscsi->rx_buffer, + iscsi->rx_len ) ) != 0 ) + return rc; + iscsi_rx_buffered_data_done ( iscsi ); + + /* Check for login redirection */ + if ( response->status_class == ISCSI_STATUS_REDIRECT ) { + DBGC ( iscsi, "iSCSI %p redirecting to new server\n", iscsi ); + iscsi_close_connection ( iscsi, 0 ); + if ( ( rc = iscsi_open_connection ( iscsi ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not redirect: %s\n ", + iscsi, strerror ( rc ) ); + return rc; + } + return 0; + } + + /* Check for fatal errors */ + if ( response->status_class != 0 ) { + DBGC ( iscsi, "iSCSI login failure: class %02x detail %02x\n", + response->status_class, response->status_detail ); + rc = iscsi_status_to_rc ( response->status_class, + response->status_detail ); + iscsi->instant_rc = rc; + return rc; + } + + /* Handle login transitions */ + if ( response->flags & ISCSI_LOGIN_FLAG_TRANSITION ) { + iscsi->status &= ~( ISCSI_STATUS_PHASE_MASK | + ISCSI_STATUS_STRINGS_MASK ); + switch ( response->flags & ISCSI_LOGIN_NSG_MASK ) { + case ISCSI_LOGIN_NSG_OPERATIONAL_NEGOTIATION: + iscsi->status |= + ( ISCSI_STATUS_OPERATIONAL_NEGOTIATION_PHASE | + ISCSI_STATUS_STRINGS_OPERATIONAL ); + break; + case ISCSI_LOGIN_NSG_FULL_FEATURE_PHASE: + iscsi->status |= ISCSI_STATUS_FULL_FEATURE_PHASE; + break; + default: + DBGC ( iscsi, "iSCSI %p got invalid response flags " + "%02x\n", iscsi, response->flags ); + return -EIO; + } + } + + /* Send next login request PDU if we haven't reached the full + * feature phase yet. + */ + if ( ( iscsi->status & ISCSI_STATUS_PHASE_MASK ) != + ISCSI_STATUS_FULL_FEATURE_PHASE ) { + iscsi_start_login ( iscsi ); + return 0; + } + + /* Check that target authentication was successful (if required) */ + if ( ( iscsi->status & ISCSI_STATUS_AUTH_REVERSE_REQUIRED ) && + ! ( iscsi->status & ISCSI_STATUS_AUTH_REVERSE_OK ) ) { + DBGC ( iscsi, "iSCSI %p nefarious target tried to bypass " + "authentication\n", iscsi ); + return -EPROTO; + } + + /* Reset retry count */ + iscsi->retry_count = 0; + + /* Record TSIH for future reference */ + iscsi->tsih = ntohl ( response->tsih ); + + /* Send the actual SCSI command */ + iscsi_start_command ( iscsi ); + + return 0; +} + +/**************************************************************************** + * + * iSCSI to socket interface + * + */ + +/** + * Start up a new TX PDU + * + * @v iscsi iSCSI session + * + * This initiates the process of sending a new PDU. Only one PDU may + * be in transit at any one time. + */ +static void iscsi_start_tx ( struct iscsi_session *iscsi ) { + assert ( iscsi->tx_state == ISCSI_TX_IDLE ); + + /* Initialise TX BHS */ + memset ( &iscsi->tx_bhs, 0, sizeof ( iscsi->tx_bhs ) ); + + /* Flag TX engine to start transmitting */ + iscsi->tx_state = ISCSI_TX_BHS; +} + +/** + * Transmit nothing + * + * @v iscsi iSCSI session + * @ret rc Return status code + */ +static int iscsi_tx_nothing ( struct iscsi_session *iscsi __unused ) { + return 0; +} + +/** + * Transmit basic header segment of an iSCSI PDU + * + * @v iscsi iSCSI session + * @ret rc Return status code + */ +static int iscsi_tx_bhs ( struct iscsi_session *iscsi ) { + return xfer_deliver_raw ( &iscsi->socket, &iscsi->tx_bhs, + sizeof ( iscsi->tx_bhs ) ); +} + +/** + * Transmit data segment of an iSCSI PDU + * + * @v iscsi iSCSI session + * @ret rc Return status code + * + * Handle transmission of part of a PDU data segment. iscsi::tx_bhs + * will be valid when this is called. + */ +static int iscsi_tx_data ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_common *common = &iscsi->tx_bhs.common; + + switch ( common->opcode & ISCSI_OPCODE_MASK ) { + case ISCSI_OPCODE_DATA_OUT: + return iscsi_tx_data_out ( iscsi ); + case ISCSI_OPCODE_LOGIN_REQUEST: + return iscsi_tx_login_request ( iscsi ); + default: + /* Nothing to send in other states */ + return 0; + } +} + +/** + * Transmit data padding of an iSCSI PDU + * + * @v iscsi iSCSI session + * @ret rc Return status code + * + * Handle transmission of any data padding in a PDU data segment. + * iscsi::tx_bhs will be valid when this is called. + */ +static int iscsi_tx_data_padding ( struct iscsi_session *iscsi ) { + static const char pad[] = { '\0', '\0', '\0' }; + struct iscsi_bhs_common *common = &iscsi->tx_bhs.common; + size_t pad_len; + + pad_len = ISCSI_DATA_PAD_LEN ( common->lengths ); + if ( ! pad_len ) + return 0; + + return xfer_deliver_raw ( &iscsi->socket, pad, pad_len ); +} + +/** + * Complete iSCSI PDU transmission + * + * @v iscsi iSCSI session + * + * Called when a PDU has been completely transmitted and the TX state + * machine is about to enter the idle state. iscsi::tx_bhs will be + * valid for the just-completed PDU when this is called. + */ +static void iscsi_tx_done ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_common *common = &iscsi->tx_bhs.common; + + switch ( common->opcode & ISCSI_OPCODE_MASK ) { + case ISCSI_OPCODE_DATA_OUT: + iscsi_data_out_done ( iscsi ); + case ISCSI_OPCODE_LOGIN_REQUEST: + iscsi_login_request_done ( iscsi ); + default: + /* No action */ + break; + } +} + +/** + * Transmit iSCSI PDU + * + * @v iscsi iSCSI session + * @v buf Temporary data buffer + * @v len Length of temporary data buffer + * + * Constructs data to be sent for the current TX state + */ +static void iscsi_tx_step ( struct process *process ) { + struct iscsi_session *iscsi = + container_of ( process, struct iscsi_session, process ); + struct iscsi_bhs_common *common = &iscsi->tx_bhs.common; + int ( * tx ) ( struct iscsi_session *iscsi ); + enum iscsi_tx_state next_state; + size_t tx_len; + int rc; + + /* Select fragment to transmit */ + while ( 1 ) { + switch ( iscsi->tx_state ) { + case ISCSI_TX_IDLE: + /* Stop processing */ + return; + case ISCSI_TX_BHS: + tx = iscsi_tx_bhs; + tx_len = sizeof ( iscsi->tx_bhs ); + next_state = ISCSI_TX_AHS; + break; + case ISCSI_TX_AHS: + tx = iscsi_tx_nothing; + tx_len = 0; + next_state = ISCSI_TX_DATA; + break; + case ISCSI_TX_DATA: + tx = iscsi_tx_data; + tx_len = ISCSI_DATA_LEN ( common->lengths ); + next_state = ISCSI_TX_DATA_PADDING; + break; + case ISCSI_TX_DATA_PADDING: + tx = iscsi_tx_data_padding; + tx_len = ISCSI_DATA_PAD_LEN ( common->lengths ); + next_state = ISCSI_TX_IDLE; + break; + default: + assert ( 0 ); + return; + } + + /* Check for window availability, if needed */ + if ( tx_len && ( xfer_window ( &iscsi->socket ) == 0 ) ) { + /* Cannot transmit at this point; stop processing */ + return; + } + + /* Transmit data */ + if ( ( rc = tx ( iscsi ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not transmit: %s\n", + iscsi, strerror ( rc ) ); + return; + } + + /* Move to next state */ + iscsi->tx_state = next_state; + if ( next_state == ISCSI_TX_IDLE ) + iscsi_tx_done ( iscsi ); + } +} + +/** + * Receive basic header segment of an iSCSI PDU + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + * + * This fills in iscsi::rx_bhs with the data from the BHS portion of + * the received PDU. + */ +static int iscsi_rx_bhs ( struct iscsi_session *iscsi, const void *data, + size_t len, size_t remaining __unused ) { + memcpy ( &iscsi->rx_bhs.bytes[iscsi->rx_offset], data, len ); + if ( ( iscsi->rx_offset + len ) >= sizeof ( iscsi->rx_bhs ) ) { + DBGC2 ( iscsi, "iSCSI %p received PDU opcode %#x len %#x\n", + iscsi, iscsi->rx_bhs.common.opcode, + ISCSI_DATA_LEN ( iscsi->rx_bhs.common.lengths ) ); + } + return 0; +} + +/** + * Discard portion of an iSCSI PDU. + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + * + * This discards data from a portion of a received PDU. + */ +static int iscsi_rx_discard ( struct iscsi_session *iscsi __unused, + const void *data __unused, size_t len __unused, + size_t remaining __unused ) { + /* Do nothing */ + return 0; +} + +/** + * Receive data segment of an iSCSI PDU + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + * + * Handle processing of part of a PDU data segment. iscsi::rx_bhs + * will be valid when this is called. + */ +static int iscsi_rx_data ( struct iscsi_session *iscsi, const void *data, + size_t len, size_t remaining ) { + struct iscsi_bhs_common_response *response + = &iscsi->rx_bhs.common_response; + + /* Update cmdsn and statsn */ + iscsi->cmdsn = ntohl ( response->expcmdsn ); + iscsi->statsn = ntohl ( response->statsn ); + + switch ( response->opcode & ISCSI_OPCODE_MASK ) { + case ISCSI_OPCODE_LOGIN_RESPONSE: + return iscsi_rx_login_response ( iscsi, data, len, remaining ); + case ISCSI_OPCODE_SCSI_RESPONSE: + return iscsi_rx_scsi_response ( iscsi, data, len, remaining ); + case ISCSI_OPCODE_DATA_IN: + return iscsi_rx_data_in ( iscsi, data, len, remaining ); + case ISCSI_OPCODE_R2T: + return iscsi_rx_r2t ( iscsi, data, len, remaining ); + default: + if ( remaining ) + return 0; + DBGC ( iscsi, "iSCSI %p unknown opcode %02x\n", iscsi, + response->opcode ); + return -ENOTSUP; + } +} + +/** + * Receive new data + * + * @v socket Transport layer interface + * @v data Received data + * @v len Length of received data + * @ret rc Return status code + * + * This handles received PDUs. The receive strategy is to fill in + * iscsi::rx_bhs with the contents of the BHS portion of the PDU, + * throw away any AHS portion, and then process each part of the data + * portion as it arrives. The data processing routine therefore + * always has a full copy of the BHS available, even for portions of + * the data in different packets to the BHS. + */ +static int iscsi_socket_deliver_raw ( struct xfer_interface *socket, + const void *data, size_t len ) { + struct iscsi_session *iscsi = + container_of ( socket, struct iscsi_session, socket ); + struct iscsi_bhs_common *common = &iscsi->rx_bhs.common; + int ( * rx ) ( struct iscsi_session *iscsi, const void *data, + size_t len, size_t remaining ); + enum iscsi_rx_state next_state; + size_t frag_len; + size_t remaining; + int rc; + + while ( 1 ) { + switch ( iscsi->rx_state ) { + case ISCSI_RX_BHS: + rx = iscsi_rx_bhs; + iscsi->rx_len = sizeof ( iscsi->rx_bhs ); + next_state = ISCSI_RX_AHS; + break; + case ISCSI_RX_AHS: + rx = iscsi_rx_discard; + iscsi->rx_len = 4 * ISCSI_AHS_LEN ( common->lengths ); + next_state = ISCSI_RX_DATA; + break; + case ISCSI_RX_DATA: + rx = iscsi_rx_data; + iscsi->rx_len = ISCSI_DATA_LEN ( common->lengths ); + next_state = ISCSI_RX_DATA_PADDING; + break; + case ISCSI_RX_DATA_PADDING: + rx = iscsi_rx_discard; + iscsi->rx_len = ISCSI_DATA_PAD_LEN ( common->lengths ); + next_state = ISCSI_RX_BHS; + break; + default: + assert ( 0 ); + return -EINVAL; + } + + frag_len = iscsi->rx_len - iscsi->rx_offset; + if ( frag_len > len ) + frag_len = len; + remaining = iscsi->rx_len - iscsi->rx_offset - frag_len; + if ( ( rc = rx ( iscsi, data, frag_len, remaining ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not process received " + "data: %s\n", iscsi, strerror ( rc ) ); + iscsi_close_connection ( iscsi, rc ); + iscsi_scsi_done ( iscsi, rc ); + return rc; + } + + iscsi->rx_offset += frag_len; + data += frag_len; + len -= frag_len; + + /* If all the data for this state has not yet been + * received, stay in this state for now. + */ + if ( iscsi->rx_offset != iscsi->rx_len ) + return 0; + + iscsi->rx_state = next_state; + iscsi->rx_offset = 0; + } + + return 0; +} + +/** + * Handle stream connection closure + * + * @v socket Transport layer interface + * @v rc Reason for close + * + */ +static void iscsi_socket_close ( struct xfer_interface *socket, int rc ) { + struct iscsi_session *iscsi = + container_of ( socket, struct iscsi_session, socket ); + + /* Even a graceful close counts as an error for iSCSI */ + if ( ! rc ) + rc = -ECONNRESET; + + /* Close session cleanly */ + iscsi_close_connection ( iscsi, rc ); + + /* Retry connection if within the retry limit, otherwise fail */ + if ( ++iscsi->retry_count <= ISCSI_MAX_RETRIES ) { + DBGC ( iscsi, "iSCSI %p retrying connection (retry #%d)\n", + iscsi, iscsi->retry_count ); + if ( ( rc = iscsi_open_connection ( iscsi ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not reconnect: %s\n", + iscsi, strerror ( rc ) ); + iscsi_scsi_done ( iscsi, rc ); + } + } else { + DBGC ( iscsi, "iSCSI %p retry count exceeded\n", iscsi ); + iscsi->instant_rc = rc; + iscsi_scsi_done ( iscsi, rc ); + } +} + +/** + * Handle redirection event + * + * @v socket Transport layer interface + * @v type Location type + * @v args Remaining arguments depend upon location type + * @ret rc Return status code + */ +static int iscsi_vredirect ( struct xfer_interface *socket, int type, + va_list args ) { + struct iscsi_session *iscsi = + container_of ( socket, struct iscsi_session, socket ); + va_list tmp; + struct sockaddr *peer; + + /* Intercept redirects to a LOCATION_SOCKET and record the IP + * address for the iBFT. This is a bit of a hack, but avoids + * inventing an ioctl()-style call to retrieve the socket + * address from a data-xfer interface. + */ + if ( type == LOCATION_SOCKET ) { + va_copy ( tmp, args ); + ( void ) va_arg ( tmp, int ); /* Discard "semantics" */ + peer = va_arg ( tmp, struct sockaddr * ); + memcpy ( &iscsi->target_sockaddr, peer, + sizeof ( iscsi->target_sockaddr ) ); + va_end ( tmp ); + } + + return xfer_vreopen ( socket, type, args ); +} + + +/** iSCSI socket operations */ +static struct xfer_interface_operations iscsi_socket_operations = { + .close = iscsi_socket_close, + .vredirect = iscsi_vredirect, + .window = unlimited_xfer_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = xfer_deliver_as_raw, + .deliver_raw = iscsi_socket_deliver_raw, +}; + + +/**************************************************************************** + * + * iSCSI command issuing + * + */ + +/** + * Issue SCSI command + * + * @v scsi SCSI device + * @v command SCSI command + * @ret rc Return status code + */ +static int iscsi_command ( struct scsi_device *scsi, + struct scsi_command *command ) { + struct iscsi_session *iscsi = + container_of ( scsi->backend, struct iscsi_session, refcnt ); + int rc; + + /* Abort immediately if we have a recorded permanent failure */ + if ( iscsi->instant_rc ) + return iscsi->instant_rc; + + /* Record SCSI command */ + iscsi->command = command; + + /* Issue command or open connection as appropriate */ + if ( iscsi->status ) { + iscsi_start_command ( iscsi ); + } else { + if ( ( rc = iscsi_open_connection ( iscsi ) ) != 0 ) { + iscsi->command = NULL; + return rc; + } + } + + return 0; +} + +/** + * Shut down iSCSI interface + * + * @v scsi SCSI device + */ +void iscsi_detach ( struct scsi_device *scsi ) { + struct iscsi_session *iscsi = + container_of ( scsi->backend, struct iscsi_session, refcnt ); + + xfer_nullify ( &iscsi->socket ); + iscsi_close_connection ( iscsi, 0 ); + process_del ( &iscsi->process ); + scsi->command = scsi_detached_command; + ref_put ( scsi->backend ); + scsi->backend = NULL; +} + +/**************************************************************************** + * + * Instantiator + * + */ + +/** iSCSI root path components (as per RFC4173) */ +enum iscsi_root_path_component { + RP_LITERAL = 0, + RP_SERVERNAME, + RP_PROTOCOL, + RP_PORT, + RP_LUN, + RP_TARGETNAME, + NUM_RP_COMPONENTS +}; + +/** + * Parse iSCSI root path + * + * @v iscsi iSCSI session + * @v root_path iSCSI root path (as per RFC4173) + * @ret rc Return status code + */ +static int iscsi_parse_root_path ( struct iscsi_session *iscsi, + const char *root_path ) { + char rp_copy[ strlen ( root_path ) + 1 ]; + char *rp_comp[NUM_RP_COMPONENTS]; + char *rp = rp_copy; + int i = 0; + int rc; + + /* Split root path into component parts */ + strcpy ( rp_copy, root_path ); + while ( 1 ) { + rp_comp[i++] = rp; + if ( i == NUM_RP_COMPONENTS ) + break; + for ( ; *rp != ':' ; rp++ ) { + if ( ! *rp ) { + DBGC ( iscsi, "iSCSI %p root path \"%s\" " + "too short\n", iscsi, root_path ); + return -EINVAL; + } + } + *(rp++) = '\0'; + } + + /* Use root path components to configure iSCSI session */ + iscsi->target_address = strdup ( rp_comp[RP_SERVERNAME] ); + if ( ! iscsi->target_address ) + return -ENOMEM; + iscsi->target_port = strtoul ( rp_comp[RP_PORT], NULL, 10 ); + if ( ! iscsi->target_port ) + iscsi->target_port = ISCSI_PORT; + if ( ( rc = scsi_parse_lun ( rp_comp[RP_LUN], &iscsi->lun ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p invalid LUN \"%s\"\n", + iscsi, rp_comp[RP_LUN] ); + return rc; + } + iscsi->target_iqn = strdup ( rp_comp[RP_TARGETNAME] ); + if ( ! iscsi->target_iqn ) + return -ENOMEM; + + return 0; +} + +/** + * Set iSCSI authentication details + * + * @v iscsi iSCSI session + * @v initiator_username Initiator username, if any + * @v initiator_password Initiator password, if any + * @v target_username Target username, if any + * @v target_password Target password, if any + * @ret rc Return status code + */ +static int iscsi_set_auth ( struct iscsi_session *iscsi, + const char *initiator_username, + const char *initiator_password, + const char *target_username, + const char *target_password ) { + + /* Check for initiator or target credentials */ + if ( initiator_username || initiator_password || + target_username || target_password ) { + + /* We must have at least an initiator username+password */ + if ( ! ( initiator_username && initiator_password ) ) + goto invalid_auth; + + /* Store initiator credentials */ + iscsi->initiator_username = strdup ( initiator_username ); + if ( ! iscsi->initiator_username ) + return -ENOMEM; + iscsi->initiator_password = strdup ( initiator_password ); + if ( ! iscsi->initiator_password ) + return -ENOMEM; + + /* Check for target credentials */ + if ( target_username || target_password ) { + + /* We must have target username+password */ + if ( ! ( target_username && target_password ) ) + goto invalid_auth; + + /* Store target credentials */ + iscsi->target_username = strdup ( target_username ); + if ( ! iscsi->target_username ) + return -ENOMEM; + iscsi->target_password = strdup ( target_password ); + if ( ! iscsi->target_password ) + return -ENOMEM; + } + } + + return 0; + + invalid_auth: + DBGC ( iscsi, "iSCSI %p invalid credentials: initiator " + "%sname,%spw, target %sname,%spw\n", iscsi, + ( initiator_username ? "" : "no " ), + ( initiator_password ? "" : "no " ), + ( target_username ? "" : "no " ), + ( target_password ? "" : "no " ) ); + return -EINVAL; +} + +/** + * Attach iSCSI interface + * + * @v scsi SCSI device + * @v root_path iSCSI root path (as per RFC4173) + * @ret rc Return status code + */ +int iscsi_attach ( struct scsi_device *scsi, const char *root_path ) { + struct iscsi_session *iscsi; + int rc; + + /* Allocate and initialise structure */ + iscsi = zalloc ( sizeof ( *iscsi ) ); + if ( ! iscsi ) + return -ENOMEM; + iscsi->refcnt.free = iscsi_free; + xfer_init ( &iscsi->socket, &iscsi_socket_operations, &iscsi->refcnt ); + process_init ( &iscsi->process, iscsi_tx_step, &iscsi->refcnt ); + + /* Parse root path */ + if ( ( rc = iscsi_parse_root_path ( iscsi, root_path ) ) != 0 ) + goto err; + /* Set fields not specified by root path */ + if ( ( rc = iscsi_set_auth ( iscsi, + iscsi_initiator_username, + iscsi_initiator_password, + iscsi_target_username, + iscsi_target_password ) ) != 0 ) + goto err; + + /* Sanity checks */ + if ( ! iscsi->target_address ) { + DBGC ( iscsi, "iSCSI %p does not yet support discovery\n", + iscsi ); + rc = -ENOTSUP; + goto err; + } + if ( ! iscsi->target_iqn ) { + DBGC ( iscsi, "iSCSI %p no target address supplied in %s\n", + iscsi, root_path ); + rc = -EINVAL; + goto err; + } + + /* Attach parent interface, mortalise self, and return */ + scsi->backend = ref_get ( &iscsi->refcnt ); + scsi->command = iscsi_command; + ref_put ( &iscsi->refcnt ); + return 0; + + err: + ref_put ( &iscsi->refcnt ); + return rc; +} + +/**************************************************************************** + * + * Settings + * + */ + +/** iSCSI initiator IQN setting */ +struct setting initiator_iqn_setting __setting = { + .name = "initiator-iqn", + .description = "iSCSI initiator name", + .tag = DHCP_ISCSI_INITIATOR_IQN, + .type = &setting_type_string, +}; + +/** iSCSI reverse username setting */ +struct setting reverse_username_setting __setting = { + .name = "reverse-username", + .description = "Reverse user name", + .tag = DHCP_EB_REVERSE_USERNAME, + .type = &setting_type_string, +}; + +/** iSCSI reverse password setting */ +struct setting reverse_password_setting __setting = { + .name = "reverse-password", + .description = "Reverse password", + .tag = DHCP_EB_REVERSE_PASSWORD, + .type = &setting_type_string, +}; + +/** An iSCSI string setting */ +struct iscsi_string_setting { + /** Setting */ + struct setting *setting; + /** String to update */ + char **string; + /** String prefix */ + const char *prefix; +}; + +/** iSCSI string settings */ +static struct iscsi_string_setting iscsi_string_settings[] = { + { + .setting = &initiator_iqn_setting, + .string = &iscsi_explicit_initiator_iqn, + .prefix = "", + }, + { + .setting = &username_setting, + .string = &iscsi_initiator_username, + .prefix = "", + }, + { + .setting = &password_setting, + .string = &iscsi_initiator_password, + .prefix = "", + }, + { + .setting = &reverse_username_setting, + .string = &iscsi_target_username, + .prefix = "", + }, + { + .setting = &reverse_password_setting, + .string = &iscsi_target_password, + .prefix = "", + }, + { + .setting = &hostname_setting, + .string = &iscsi_default_initiator_iqn, + .prefix = "iqn.2000-01.org.etherboot:", + }, +}; + +/** + * Apply iSCSI setting + * + * @v setting iSCSI string setting + * @ret rc Return status code + */ +static int apply_iscsi_string_setting ( struct iscsi_string_setting *setting ){ + size_t prefix_len; + int setting_len; + size_t len; + int check_len; + char *p; + + /* Free old string */ + free ( *setting->string ); + *setting->string = NULL; + + /* Allocate new string */ + prefix_len = strlen ( setting->prefix ); + setting_len = fetch_setting_len ( NULL, setting->setting ); + if ( setting_len < 0 ) { + /* Missing settings are not errors; leave strings as NULL */ + return 0; + } + len = ( prefix_len + setting_len + 1 ); + p = *setting->string = malloc ( len ); + if ( ! p ) + return -ENOMEM; + + /* Fill new string */ + strcpy ( p, setting->prefix ); + check_len = fetch_string_setting ( NULL, setting->setting, + ( p + prefix_len ), + ( len - prefix_len ) ); + assert ( check_len == setting_len ); + + return 0; +} + +/** + * Apply iSCSI settings + * + * @ret rc Return status code + */ +static int apply_iscsi_settings ( void ) { + struct iscsi_string_setting *setting; + unsigned int i; + int rc; + + for ( i = 0 ; i < ( sizeof ( iscsi_string_settings ) / + sizeof ( iscsi_string_settings[0] ) ) ; i++ ) { + setting = &iscsi_string_settings[i]; + if ( ( rc = apply_iscsi_string_setting ( setting ) ) != 0 ) { + DBG ( "iSCSI could not apply setting %s\n", + setting->setting->name ); + return rc; + } + } + + return 0; +} + +/** iSCSI settings applicator */ +struct settings_applicator iscsi_settings_applicator __settings_applicator = { + .apply = apply_iscsi_settings, +}; + +/**************************************************************************** + * + * Initiator name + * + */ + +/** + * Get iSCSI initiator IQN + * + * @v iscsi iSCSI session + * @ret rc Return status code + */ +const char * iscsi_initiator_iqn ( void ) { + + if ( iscsi_explicit_initiator_iqn ) + return iscsi_explicit_initiator_iqn; + if ( iscsi_default_initiator_iqn ) + return iscsi_default_initiator_iqn; + return "iqn.2000-09.org.etherboot:UNKNOWN"; +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/tcpip.c b/debian/grub-extras/disabled/gpxe/src/net/tcpip.c new file mode 100644 index 0000000..932fd48 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/tcpip.c @@ -0,0 +1,135 @@ +#include <stdint.h> +#include <string.h> +#include <errno.h> +#include <byteswap.h> +#include <gpxe/iobuf.h> +#include <gpxe/tables.h> +#include <gpxe/tcpip.h> + +/** @file + * + * Transport-network layer interface + * + * This file contains functions and utilities for the + * TCP/IP transport-network layer interface + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** Process a received TCP/IP packet + * + * @v iobuf I/O buffer + * @v tcpip_proto Transport-layer protocol number + * @v st_src Partially-filled source address + * @v st_dest Partially-filled destination address + * @v pshdr_csum Pseudo-header checksum + * @ret rc Return status code + * + * This function expects a transport-layer segment from the network + * layer. The network layer should fill in as much as it can of the + * source and destination addresses (i.e. it should fill in the + * address family and the network-layer addresses, but leave the ports + * and the rest of the structures as zero). + */ +int tcpip_rx ( struct io_buffer *iobuf, uint8_t tcpip_proto, + struct sockaddr_tcpip *st_src, + struct sockaddr_tcpip *st_dest, + uint16_t pshdr_csum ) { + struct tcpip_protocol *tcpip; + + /* Hand off packet to the appropriate transport-layer protocol */ + for_each_table_entry ( tcpip, TCPIP_PROTOCOLS ) { + if ( tcpip->tcpip_proto == tcpip_proto ) { + DBG ( "TCP/IP received %s packet\n", tcpip->name ); + return tcpip->rx ( iobuf, st_src, st_dest, pshdr_csum ); + } + } + + DBG ( "Unrecognised TCP/IP protocol %d\n", tcpip_proto ); + free_iob ( iobuf ); + return -EPROTONOSUPPORT; +} + +/** Transmit a TCP/IP packet + * + * @v iobuf I/O buffer + * @v tcpip_protocol Transport-layer protocol + * @v st_src Source address, or NULL to use route default + * @v st_dest Destination address + * @v netdev Network device to use if no route found, or NULL + * @v trans_csum Transport-layer checksum to complete, or NULL + * @ret rc Return status code + */ +int tcpip_tx ( struct io_buffer *iobuf, struct tcpip_protocol *tcpip_protocol, + struct sockaddr_tcpip *st_src, struct sockaddr_tcpip *st_dest, + struct net_device *netdev, uint16_t *trans_csum ) { + struct tcpip_net_protocol *tcpip_net; + + /* Hand off packet to the appropriate network-layer protocol */ + for_each_table_entry ( tcpip_net, TCPIP_NET_PROTOCOLS ) { + if ( tcpip_net->sa_family == st_dest->st_family ) { + DBG ( "TCP/IP sending %s packet\n", tcpip_net->name ); + return tcpip_net->tx ( iobuf, tcpip_protocol, st_src, + st_dest, netdev, trans_csum ); + } + } + + DBG ( "Unrecognised TCP/IP address family %d\n", st_dest->st_family ); + free_iob ( iobuf ); + return -EAFNOSUPPORT; +} + +/** + * Calculate continued TCP/IP checkum + * + * @v partial Checksum of already-summed data, in network byte order + * @v data Data buffer + * @v len Length of data buffer + * @ret cksum Updated checksum, in network byte order + * + * Calculates a TCP/IP-style 16-bit checksum over the data block. The + * checksum is returned in network byte order. + * + * This function may be used to add new data to an existing checksum. + * The function assumes that both the old data and the new data start + * on even byte offsets; if this is not the case then you will need to + * byte-swap either the input partial checksum, the output checksum, + * or both. Deciding which to swap is left as an exercise for the + * interested reader. + */ +uint16_t tcpip_continue_chksum ( uint16_t partial, const void *data, + size_t len ) { + unsigned int cksum = ( ( ~partial ) & 0xffff ); + unsigned int value; + unsigned int i; + + for ( i = 0 ; i < len ; i++ ) { + value = * ( ( uint8_t * ) data + i ); + if ( i & 1 ) { + /* Odd bytes: swap on little-endian systems */ + value = be16_to_cpu ( value ); + } else { + /* Even bytes: swap on big-endian systems */ + value = le16_to_cpu ( value ); + } + cksum += value; + if ( cksum > 0xffff ) + cksum -= 0xffff; + } + + return ( ~cksum ); +} + +/** + * Calculate TCP/IP checkum + * + * @v data Data buffer + * @v len Length of data buffer + * @ret cksum Checksum, in network byte order + * + * Calculates a TCP/IP-style 16-bit checksum over the data block. The + * checksum is returned in network byte order. + */ +uint16_t tcpip_chksum ( const void *data, size_t len ) { + return tcpip_continue_chksum ( TCPIP_EMPTY_CSUM, data, len ); +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/tls.c b/debian/grub-extras/disabled/gpxe/src/net/tls.c new file mode 100644 index 0000000..a5b126e --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/tls.c @@ -0,0 +1,1759 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** + * @file + * + * Transport Layer Security Protocol + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <errno.h> +#include <byteswap.h> +#include <gpxe/hmac.h> +#include <gpxe/md5.h> +#include <gpxe/sha1.h> +#include <gpxe/aes.h> +#include <gpxe/rsa.h> +#include <gpxe/xfer.h> +#include <gpxe/open.h> +#include <gpxe/filter.h> +#include <gpxe/asn1.h> +#include <gpxe/x509.h> +#include <gpxe/tls.h> + +static int tls_send_plaintext ( struct tls_session *tls, unsigned int type, + const void *data, size_t len ); +static void tls_clear_cipher ( struct tls_session *tls, + struct tls_cipherspec *cipherspec ); + +/****************************************************************************** + * + * Utility functions + * + ****************************************************************************** + */ + +/** + * Extract 24-bit field value + * + * @v field24 24-bit field + * @ret value Field value + * + * TLS uses 24-bit integers in several places, which are awkward to + * parse in C. + */ +static unsigned long tls_uint24 ( uint8_t field24[3] ) { + return ( ( field24[0] << 16 ) + ( field24[1] << 8 ) + field24[2] ); +} + +/****************************************************************************** + * + * Cleanup functions + * + ****************************************************************************** + */ + +/** + * Free TLS session + * + * @v refcnt Reference counter + */ +static void free_tls ( struct refcnt *refcnt ) { + struct tls_session *tls = + container_of ( refcnt, struct tls_session, refcnt ); + + /* Free dynamically-allocated resources */ + tls_clear_cipher ( tls, &tls->tx_cipherspec ); + tls_clear_cipher ( tls, &tls->tx_cipherspec_pending ); + tls_clear_cipher ( tls, &tls->rx_cipherspec ); + tls_clear_cipher ( tls, &tls->rx_cipherspec_pending ); + x509_free_rsa_public_key ( &tls->rsa ); + free ( tls->rx_data ); + + /* Free TLS structure itself */ + free ( tls ); +} + +/** + * Finish with TLS session + * + * @v tls TLS session + * @v rc Status code + */ +static void tls_close ( struct tls_session *tls, int rc ) { + + /* Remove process */ + process_del ( &tls->process ); + + /* Close ciphertext and plaintext streams */ + xfer_nullify ( &tls->cipherstream.xfer ); + xfer_close ( &tls->cipherstream.xfer, rc ); + xfer_nullify ( &tls->plainstream.xfer ); + xfer_close ( &tls->plainstream.xfer, rc ); +} + +/****************************************************************************** + * + * Random number generation + * + ****************************************************************************** + */ + +/** + * Generate random data + * + * @v data Buffer to fill + * @v len Length of buffer + */ +static void tls_generate_random ( void *data, size_t len ) { + /* FIXME: Some real random data source would be nice... */ + memset ( data, 0x01, len ); +} + +/** + * Update HMAC with a list of ( data, len ) pairs + * + * @v digest Hash function to use + * @v digest_ctx Digest context + * @v args ( data, len ) pairs of data, terminated by NULL + */ +static void tls_hmac_update_va ( struct digest_algorithm *digest, + void *digest_ctx, va_list args ) { + void *data; + size_t len; + + while ( ( data = va_arg ( args, void * ) ) ) { + len = va_arg ( args, size_t ); + hmac_update ( digest, digest_ctx, data, len ); + } +} + +/** + * Generate secure pseudo-random data using a single hash function + * + * @v tls TLS session + * @v digest Hash function to use + * @v secret Secret + * @v secret_len Length of secret + * @v out Output buffer + * @v out_len Length of output buffer + * @v seeds ( data, len ) pairs of seed data, terminated by NULL + */ +static void tls_p_hash_va ( struct tls_session *tls, + struct digest_algorithm *digest, + void *secret, size_t secret_len, + void *out, size_t out_len, + va_list seeds ) { + uint8_t secret_copy[secret_len]; + uint8_t digest_ctx[digest->ctxsize]; + uint8_t digest_ctx_partial[digest->ctxsize]; + uint8_t a[digest->digestsize]; + uint8_t out_tmp[digest->digestsize]; + size_t frag_len = digest->digestsize; + va_list tmp; + + /* Copy the secret, in case HMAC modifies it */ + memcpy ( secret_copy, secret, secret_len ); + secret = secret_copy; + DBGC2 ( tls, "TLS %p %s secret:\n", tls, digest->name ); + DBGC2_HD ( tls, secret, secret_len ); + + /* Calculate A(1) */ + hmac_init ( digest, digest_ctx, secret, &secret_len ); + va_copy ( tmp, seeds ); + tls_hmac_update_va ( digest, digest_ctx, tmp ); + va_end ( tmp ); + hmac_final ( digest, digest_ctx, secret, &secret_len, a ); + DBGC2 ( tls, "TLS %p %s A(1):\n", tls, digest->name ); + DBGC2_HD ( tls, &a, sizeof ( a ) ); + + /* Generate as much data as required */ + while ( out_len ) { + /* Calculate output portion */ + hmac_init ( digest, digest_ctx, secret, &secret_len ); + hmac_update ( digest, digest_ctx, a, sizeof ( a ) ); + memcpy ( digest_ctx_partial, digest_ctx, digest->ctxsize ); + va_copy ( tmp, seeds ); + tls_hmac_update_va ( digest, digest_ctx, tmp ); + va_end ( tmp ); + hmac_final ( digest, digest_ctx, + secret, &secret_len, out_tmp ); + + /* Copy output */ + if ( frag_len > out_len ) + frag_len = out_len; + memcpy ( out, out_tmp, frag_len ); + DBGC2 ( tls, "TLS %p %s output:\n", tls, digest->name ); + DBGC2_HD ( tls, out, frag_len ); + + /* Calculate A(i) */ + hmac_final ( digest, digest_ctx_partial, + secret, &secret_len, a ); + DBGC2 ( tls, "TLS %p %s A(n):\n", tls, digest->name ); + DBGC2_HD ( tls, &a, sizeof ( a ) ); + + out += frag_len; + out_len -= frag_len; + } +} + +/** + * Generate secure pseudo-random data + * + * @v tls TLS session + * @v secret Secret + * @v secret_len Length of secret + * @v out Output buffer + * @v out_len Length of output buffer + * @v ... ( data, len ) pairs of seed data, terminated by NULL + */ +static void tls_prf ( struct tls_session *tls, void *secret, size_t secret_len, + void *out, size_t out_len, ... ) { + va_list seeds; + va_list tmp; + size_t subsecret_len; + void *md5_secret; + void *sha1_secret; + uint8_t out_md5[out_len]; + uint8_t out_sha1[out_len]; + unsigned int i; + + va_start ( seeds, out_len ); + + /* Split secret into two, with an overlap of up to one byte */ + subsecret_len = ( ( secret_len + 1 ) / 2 ); + md5_secret = secret; + sha1_secret = ( secret + secret_len - subsecret_len ); + + /* Calculate MD5 portion */ + va_copy ( tmp, seeds ); + tls_p_hash_va ( tls, &md5_algorithm, md5_secret, subsecret_len, + out_md5, out_len, seeds ); + va_end ( tmp ); + + /* Calculate SHA1 portion */ + va_copy ( tmp, seeds ); + tls_p_hash_va ( tls, &sha1_algorithm, sha1_secret, subsecret_len, + out_sha1, out_len, seeds ); + va_end ( tmp ); + + /* XOR the two portions together into the final output buffer */ + for ( i = 0 ; i < out_len ; i++ ) { + *( ( uint8_t * ) out + i ) = ( out_md5[i] ^ out_sha1[i] ); + } + + va_end ( seeds ); +} + +/** + * Generate secure pseudo-random data + * + * @v secret Secret + * @v secret_len Length of secret + * @v out Output buffer + * @v out_len Length of output buffer + * @v label String literal label + * @v ... ( data, len ) pairs of seed data + */ +#define tls_prf_label( tls, secret, secret_len, out, out_len, label, ... ) \ + tls_prf ( (tls), (secret), (secret_len), (out), (out_len), \ + label, ( sizeof ( label ) - 1 ), __VA_ARGS__, NULL ) + +/****************************************************************************** + * + * Secret management + * + ****************************************************************************** + */ + +/** + * Generate master secret + * + * @v tls TLS session + * + * The pre-master secret and the client and server random values must + * already be known. + */ +static void tls_generate_master_secret ( struct tls_session *tls ) { + DBGC ( tls, "TLS %p pre-master-secret:\n", tls ); + DBGC_HD ( tls, &tls->pre_master_secret, + sizeof ( tls->pre_master_secret ) ); + DBGC ( tls, "TLS %p client random bytes:\n", tls ); + DBGC_HD ( tls, &tls->client_random, sizeof ( tls->client_random ) ); + DBGC ( tls, "TLS %p server random bytes:\n", tls ); + DBGC_HD ( tls, &tls->server_random, sizeof ( tls->server_random ) ); + + tls_prf_label ( tls, &tls->pre_master_secret, + sizeof ( tls->pre_master_secret ), + &tls->master_secret, sizeof ( tls->master_secret ), + "master secret", + &tls->client_random, sizeof ( tls->client_random ), + &tls->server_random, sizeof ( tls->server_random ) ); + + DBGC ( tls, "TLS %p generated master secret:\n", tls ); + DBGC_HD ( tls, &tls->master_secret, sizeof ( tls->master_secret ) ); +} + +/** + * Generate key material + * + * @v tls TLS session + * + * The master secret must already be known. + */ +static int tls_generate_keys ( struct tls_session *tls ) { + struct tls_cipherspec *tx_cipherspec = &tls->tx_cipherspec_pending; + struct tls_cipherspec *rx_cipherspec = &tls->rx_cipherspec_pending; + size_t hash_size = tx_cipherspec->digest->digestsize; + size_t key_size = tx_cipherspec->key_len; + size_t iv_size = tx_cipherspec->cipher->blocksize; + size_t total = ( 2 * ( hash_size + key_size + iv_size ) ); + uint8_t key_block[total]; + uint8_t *key; + int rc; + + /* Generate key block */ + tls_prf_label ( tls, &tls->master_secret, sizeof ( tls->master_secret ), + key_block, sizeof ( key_block ), "key expansion", + &tls->server_random, sizeof ( tls->server_random ), + &tls->client_random, sizeof ( tls->client_random ) ); + + /* Split key block into portions */ + key = key_block; + + /* TX MAC secret */ + memcpy ( tx_cipherspec->mac_secret, key, hash_size ); + DBGC ( tls, "TLS %p TX MAC secret:\n", tls ); + DBGC_HD ( tls, key, hash_size ); + key += hash_size; + + /* RX MAC secret */ + memcpy ( rx_cipherspec->mac_secret, key, hash_size ); + DBGC ( tls, "TLS %p RX MAC secret:\n", tls ); + DBGC_HD ( tls, key, hash_size ); + key += hash_size; + + /* TX key */ + if ( ( rc = cipher_setkey ( tx_cipherspec->cipher, + tx_cipherspec->cipher_ctx, + key, key_size ) ) != 0 ) { + DBGC ( tls, "TLS %p could not set TX key: %s\n", + tls, strerror ( rc ) ); + return rc; + } + DBGC ( tls, "TLS %p TX key:\n", tls ); + DBGC_HD ( tls, key, key_size ); + key += key_size; + + /* RX key */ + if ( ( rc = cipher_setkey ( rx_cipherspec->cipher, + rx_cipherspec->cipher_ctx, + key, key_size ) ) != 0 ) { + DBGC ( tls, "TLS %p could not set TX key: %s\n", + tls, strerror ( rc ) ); + return rc; + } + DBGC ( tls, "TLS %p RX key:\n", tls ); + DBGC_HD ( tls, key, key_size ); + key += key_size; + + /* TX initialisation vector */ + cipher_setiv ( tx_cipherspec->cipher, tx_cipherspec->cipher_ctx, key ); + DBGC ( tls, "TLS %p TX IV:\n", tls ); + DBGC_HD ( tls, key, iv_size ); + key += iv_size; + + /* RX initialisation vector */ + cipher_setiv ( rx_cipherspec->cipher, rx_cipherspec->cipher_ctx, key ); + DBGC ( tls, "TLS %p RX IV:\n", tls ); + DBGC_HD ( tls, key, iv_size ); + key += iv_size; + + assert ( ( key_block + total ) == key ); + + return 0; +} + +/****************************************************************************** + * + * Cipher suite management + * + ****************************************************************************** + */ + +/** + * Clear cipher suite + * + * @v cipherspec TLS cipher specification + */ +static void tls_clear_cipher ( struct tls_session *tls __unused, + struct tls_cipherspec *cipherspec ) { + free ( cipherspec->dynamic ); + memset ( cipherspec, 0, sizeof ( cipherspec ) ); + cipherspec->pubkey = &pubkey_null; + cipherspec->cipher = &cipher_null; + cipherspec->digest = &digest_null; +} + +/** + * Set cipher suite + * + * @v tls TLS session + * @v cipherspec TLS cipher specification + * @v pubkey Public-key encryption elgorithm + * @v cipher Bulk encryption cipher algorithm + * @v digest MAC digest algorithm + * @v key_len Key length + * @ret rc Return status code + */ +static int tls_set_cipher ( struct tls_session *tls, + struct tls_cipherspec *cipherspec, + struct pubkey_algorithm *pubkey, + struct cipher_algorithm *cipher, + struct digest_algorithm *digest, + size_t key_len ) { + size_t total; + void *dynamic; + + /* Clear out old cipher contents, if any */ + tls_clear_cipher ( tls, cipherspec ); + + /* Allocate dynamic storage */ + total = ( pubkey->ctxsize + 2 * cipher->ctxsize + digest->digestsize ); + dynamic = malloc ( total ); + if ( ! dynamic ) { + DBGC ( tls, "TLS %p could not allocate %zd bytes for crypto " + "context\n", tls, total ); + return -ENOMEM; + } + memset ( dynamic, 0, total ); + + /* Assign storage */ + cipherspec->dynamic = dynamic; + cipherspec->pubkey_ctx = dynamic; dynamic += pubkey->ctxsize; + cipherspec->cipher_ctx = dynamic; dynamic += cipher->ctxsize; + cipherspec->cipher_next_ctx = dynamic; dynamic += cipher->ctxsize; + cipherspec->mac_secret = dynamic; dynamic += digest->digestsize; + assert ( ( cipherspec->dynamic + total ) == dynamic ); + + /* Store parameters */ + cipherspec->pubkey = pubkey; + cipherspec->cipher = cipher; + cipherspec->digest = digest; + cipherspec->key_len = key_len; + + return 0; +} + +/** + * Select next cipher suite + * + * @v tls TLS session + * @v cipher_suite Cipher suite specification + * @ret rc Return status code + */ +static int tls_select_cipher ( struct tls_session *tls, + unsigned int cipher_suite ) { + struct pubkey_algorithm *pubkey = &pubkey_null; + struct cipher_algorithm *cipher = &cipher_null; + struct digest_algorithm *digest = &digest_null; + unsigned int key_len = 0; + int rc; + + switch ( cipher_suite ) { + case htons ( TLS_RSA_WITH_AES_128_CBC_SHA ): + key_len = ( 128 / 8 ); + cipher = &aes_cbc_algorithm; + digest = &sha1_algorithm; + break; + case htons ( TLS_RSA_WITH_AES_256_CBC_SHA ): + key_len = ( 256 / 8 ); + cipher = &aes_cbc_algorithm; + digest = &sha1_algorithm; + break; + default: + DBGC ( tls, "TLS %p does not support cipher %04x\n", + tls, ntohs ( cipher_suite ) ); + return -ENOTSUP; + } + + /* Set ciphers */ + if ( ( rc = tls_set_cipher ( tls, &tls->tx_cipherspec_pending, pubkey, + cipher, digest, key_len ) ) != 0 ) + return rc; + if ( ( rc = tls_set_cipher ( tls, &tls->rx_cipherspec_pending, pubkey, + cipher, digest, key_len ) ) != 0 ) + return rc; + + DBGC ( tls, "TLS %p selected %s-%s-%d-%s\n", tls, + pubkey->name, cipher->name, ( key_len * 8 ), digest->name ); + + return 0; +} + +/** + * Activate next cipher suite + * + * @v tls TLS session + * @v pending Pending cipher specification + * @v active Active cipher specification to replace + * @ret rc Return status code + */ +static int tls_change_cipher ( struct tls_session *tls, + struct tls_cipherspec *pending, + struct tls_cipherspec *active ) { + + /* Sanity check */ + if ( /* FIXME (when pubkey is not hard-coded to RSA): + * ( pending->pubkey == &pubkey_null ) || */ + ( pending->cipher == &cipher_null ) || + ( pending->digest == &digest_null ) ) { + DBGC ( tls, "TLS %p refusing to use null cipher\n", tls ); + return -ENOTSUP; + } + + tls_clear_cipher ( tls, active ); + memswap ( active, pending, sizeof ( *active ) ); + return 0; +} + +/****************************************************************************** + * + * Handshake verification + * + ****************************************************************************** + */ + +/** + * Add handshake record to verification hash + * + * @v tls TLS session + * @v data Handshake record + * @v len Length of handshake record + */ +static void tls_add_handshake ( struct tls_session *tls, + const void *data, size_t len ) { + + digest_update ( &md5_algorithm, tls->handshake_md5_ctx, data, len ); + digest_update ( &sha1_algorithm, tls->handshake_sha1_ctx, data, len ); +} + +/** + * Calculate handshake verification hash + * + * @v tls TLS session + * @v out Output buffer + * + * Calculates the MD5+SHA1 digest over all handshake messages seen so + * far. + */ +static void tls_verify_handshake ( struct tls_session *tls, void *out ) { + struct digest_algorithm *md5 = &md5_algorithm; + struct digest_algorithm *sha1 = &sha1_algorithm; + uint8_t md5_ctx[md5->ctxsize]; + uint8_t sha1_ctx[sha1->ctxsize]; + void *md5_digest = out; + void *sha1_digest = ( out + md5->digestsize ); + + memcpy ( md5_ctx, tls->handshake_md5_ctx, sizeof ( md5_ctx ) ); + memcpy ( sha1_ctx, tls->handshake_sha1_ctx, sizeof ( sha1_ctx ) ); + digest_final ( md5, md5_ctx, md5_digest ); + digest_final ( sha1, sha1_ctx, sha1_digest ); +} + +/****************************************************************************** + * + * Record handling + * + ****************************************************************************** + */ + +/** + * Transmit Handshake record + * + * @v tls TLS session + * @v data Plaintext record + * @v len Length of plaintext record + * @ret rc Return status code + */ +static int tls_send_handshake ( struct tls_session *tls, + void *data, size_t len ) { + + /* Add to handshake digest */ + tls_add_handshake ( tls, data, len ); + + /* Send record */ + return tls_send_plaintext ( tls, TLS_TYPE_HANDSHAKE, data, len ); +} + +/** + * Transmit Client Hello record + * + * @v tls TLS session + * @ret rc Return status code + */ +static int tls_send_client_hello ( struct tls_session *tls ) { + struct { + uint32_t type_length; + uint16_t version; + uint8_t random[32]; + uint8_t session_id_len; + uint16_t cipher_suite_len; + uint16_t cipher_suites[2]; + uint8_t compression_methods_len; + uint8_t compression_methods[1]; + } __attribute__ (( packed )) hello; + + memset ( &hello, 0, sizeof ( hello ) ); + hello.type_length = ( cpu_to_le32 ( TLS_CLIENT_HELLO ) | + htonl ( sizeof ( hello ) - + sizeof ( hello.type_length ) ) ); + hello.version = htons ( TLS_VERSION_TLS_1_0 ); + memcpy ( &hello.random, &tls->client_random, sizeof ( hello.random ) ); + hello.cipher_suite_len = htons ( sizeof ( hello.cipher_suites ) ); + hello.cipher_suites[0] = htons ( TLS_RSA_WITH_AES_128_CBC_SHA ); + hello.cipher_suites[1] = htons ( TLS_RSA_WITH_AES_256_CBC_SHA ); + hello.compression_methods_len = sizeof ( hello.compression_methods ); + + return tls_send_handshake ( tls, &hello, sizeof ( hello ) ); +} + +/** + * Transmit Client Key Exchange record + * + * @v tls TLS session + * @ret rc Return status code + */ +static int tls_send_client_key_exchange ( struct tls_session *tls ) { + /* FIXME: Hack alert */ + RSA_CTX *rsa_ctx; + RSA_pub_key_new ( &rsa_ctx, tls->rsa.modulus, tls->rsa.modulus_len, + tls->rsa.exponent, tls->rsa.exponent_len ); + struct { + uint32_t type_length; + uint16_t encrypted_pre_master_secret_len; + uint8_t encrypted_pre_master_secret[rsa_ctx->num_octets]; + } __attribute__ (( packed )) key_xchg; + + memset ( &key_xchg, 0, sizeof ( key_xchg ) ); + key_xchg.type_length = ( cpu_to_le32 ( TLS_CLIENT_KEY_EXCHANGE ) | + htonl ( sizeof ( key_xchg ) - + sizeof ( key_xchg.type_length ) ) ); + key_xchg.encrypted_pre_master_secret_len + = htons ( sizeof ( key_xchg.encrypted_pre_master_secret ) ); + + /* FIXME: Hack alert */ + DBGC ( tls, "RSA encrypting plaintext, modulus, exponent:\n" ); + DBGC_HD ( tls, &tls->pre_master_secret, + sizeof ( tls->pre_master_secret ) ); + DBGC_HD ( tls, tls->rsa.modulus, tls->rsa.modulus_len ); + DBGC_HD ( tls, tls->rsa.exponent, tls->rsa.exponent_len ); + RSA_encrypt ( rsa_ctx, ( const uint8_t * ) &tls->pre_master_secret, + sizeof ( tls->pre_master_secret ), + key_xchg.encrypted_pre_master_secret, 0 ); + DBGC ( tls, "RSA encrypt done. Ciphertext:\n" ); + DBGC_HD ( tls, &key_xchg.encrypted_pre_master_secret, + sizeof ( key_xchg.encrypted_pre_master_secret ) ); + RSA_free ( rsa_ctx ); + + + return tls_send_handshake ( tls, &key_xchg, sizeof ( key_xchg ) ); +} + +/** + * Transmit Change Cipher record + * + * @v tls TLS session + * @ret rc Return status code + */ +static int tls_send_change_cipher ( struct tls_session *tls ) { + static const uint8_t change_cipher[1] = { 1 }; + return tls_send_plaintext ( tls, TLS_TYPE_CHANGE_CIPHER, + change_cipher, sizeof ( change_cipher ) ); +} + +/** + * Transmit Finished record + * + * @v tls TLS session + * @ret rc Return status code + */ +static int tls_send_finished ( struct tls_session *tls ) { + struct { + uint32_t type_length; + uint8_t verify_data[12]; + } __attribute__ (( packed )) finished; + uint8_t digest[MD5_DIGEST_SIZE + SHA1_DIGEST_SIZE]; + + memset ( &finished, 0, sizeof ( finished ) ); + finished.type_length = ( cpu_to_le32 ( TLS_FINISHED ) | + htonl ( sizeof ( finished ) - + sizeof ( finished.type_length ) ) ); + tls_verify_handshake ( tls, digest ); + tls_prf_label ( tls, &tls->master_secret, sizeof ( tls->master_secret ), + finished.verify_data, sizeof ( finished.verify_data ), + "client finished", digest, sizeof ( digest ) ); + + return tls_send_handshake ( tls, &finished, sizeof ( finished ) ); +} + +/** + * Receive new Change Cipher record + * + * @v tls TLS session + * @v data Plaintext record + * @v len Length of plaintext record + * @ret rc Return status code + */ +static int tls_new_change_cipher ( struct tls_session *tls, + void *data, size_t len ) { + int rc; + + if ( ( len != 1 ) || ( *( ( uint8_t * ) data ) != 1 ) ) { + DBGC ( tls, "TLS %p received invalid Change Cipher\n", tls ); + DBGC_HD ( tls, data, len ); + return -EINVAL; + } + + if ( ( rc = tls_change_cipher ( tls, &tls->rx_cipherspec_pending, + &tls->rx_cipherspec ) ) != 0 ) { + DBGC ( tls, "TLS %p could not activate RX cipher: %s\n", + tls, strerror ( rc ) ); + return rc; + } + tls->rx_seq = ~( ( uint64_t ) 0 ); + + return 0; +} + +/** + * Receive new Alert record + * + * @v tls TLS session + * @v data Plaintext record + * @v len Length of plaintext record + * @ret rc Return status code + */ +static int tls_new_alert ( struct tls_session *tls, void *data, size_t len ) { + struct { + uint8_t level; + uint8_t description; + char next[0]; + } __attribute__ (( packed )) *alert = data; + void *end = alert->next; + + /* Sanity check */ + if ( end != ( data + len ) ) { + DBGC ( tls, "TLS %p received overlength Alert\n", tls ); + DBGC_HD ( tls, data, len ); + return -EINVAL; + } + + switch ( alert->level ) { + case TLS_ALERT_WARNING: + DBGC ( tls, "TLS %p received warning alert %d\n", + tls, alert->description ); + return 0; + case TLS_ALERT_FATAL: + DBGC ( tls, "TLS %p received fatal alert %d\n", + tls, alert->description ); + return -EPERM; + default: + DBGC ( tls, "TLS %p received unknown alert level %d" + "(alert %d)\n", tls, alert->level, alert->description ); + return -EIO; + } +} + +/** + * Receive new Server Hello handshake record + * + * @v tls TLS session + * @v data Plaintext handshake record + * @v len Length of plaintext handshake record + * @ret rc Return status code + */ +static int tls_new_server_hello ( struct tls_session *tls, + void *data, size_t len ) { + struct { + uint16_t version; + uint8_t random[32]; + uint8_t session_id_len; + char next[0]; + } __attribute__ (( packed )) *hello_a = data; + struct { + uint8_t session_id[hello_a->session_id_len]; + uint16_t cipher_suite; + uint8_t compression_method; + char next[0]; + } __attribute__ (( packed )) *hello_b = ( void * ) &hello_a->next; + void *end = hello_b->next; + int rc; + + /* Sanity check */ + if ( end != ( data + len ) ) { + DBGC ( tls, "TLS %p received overlength Server Hello\n", tls ); + DBGC_HD ( tls, data, len ); + return -EINVAL; + } + + /* Check protocol version */ + if ( ntohs ( hello_a->version ) < TLS_VERSION_TLS_1_0 ) { + DBGC ( tls, "TLS %p does not support protocol version %d.%d\n", + tls, ( ntohs ( hello_a->version ) >> 8 ), + ( ntohs ( hello_a->version ) & 0xff ) ); + return -ENOTSUP; + } + + /* Copy out server random bytes */ + memcpy ( &tls->server_random, &hello_a->random, + sizeof ( tls->server_random ) ); + + /* Select cipher suite */ + if ( ( rc = tls_select_cipher ( tls, hello_b->cipher_suite ) ) != 0 ) + return rc; + + /* Generate secrets */ + tls_generate_master_secret ( tls ); + if ( ( rc = tls_generate_keys ( tls ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Receive new Certificate handshake record + * + * @v tls TLS session + * @v data Plaintext handshake record + * @v len Length of plaintext handshake record + * @ret rc Return status code + */ +static int tls_new_certificate ( struct tls_session *tls, + void *data, size_t len ) { + struct { + uint8_t length[3]; + uint8_t certificates[0]; + } __attribute__ (( packed )) *certificate = data; + struct { + uint8_t length[3]; + uint8_t certificate[0]; + } __attribute__ (( packed )) *element = + ( ( void * ) certificate->certificates ); + size_t elements_len = tls_uint24 ( certificate->length ); + void *end = ( certificate->certificates + elements_len ); + struct asn1_cursor cursor; + int rc; + + /* Sanity check */ + if ( end != ( data + len ) ) { + DBGC ( tls, "TLS %p received overlength Server Certificate\n", + tls ); + DBGC_HD ( tls, data, len ); + return -EINVAL; + } + + /* Traverse certificate chain */ + do { + cursor.data = element->certificate; + cursor.len = tls_uint24 ( element->length ); + if ( ( cursor.data + cursor.len ) > end ) { + DBGC ( tls, "TLS %p received corrupt Server " + "Certificate\n", tls ); + DBGC_HD ( tls, data, len ); + return -EINVAL; + } + + // HACK + if ( ( rc = x509_rsa_public_key ( &cursor, + &tls->rsa ) ) != 0 ) { + DBGC ( tls, "TLS %p cannot determine RSA public key: " + "%s\n", tls, strerror ( rc ) ); + return rc; + } + return 0; + + element = ( cursor.data + cursor.len ); + } while ( element != end ); + + return -EINVAL; +} + +/** + * Receive new Server Hello Done handshake record + * + * @v tls TLS session + * @v data Plaintext handshake record + * @v len Length of plaintext handshake record + * @ret rc Return status code + */ +static int tls_new_server_hello_done ( struct tls_session *tls, + void *data, size_t len ) { + struct { + char next[0]; + } __attribute__ (( packed )) *hello_done = data; + void *end = hello_done->next; + + /* Sanity check */ + if ( end != ( data + len ) ) { + DBGC ( tls, "TLS %p received overlength Server Hello Done\n", + tls ); + DBGC_HD ( tls, data, len ); + return -EINVAL; + } + + /* Check that we are ready to send the Client Key Exchange */ + if ( tls->tx_state != TLS_TX_NONE ) { + DBGC ( tls, "TLS %p received Server Hello Done while in " + "TX state %d\n", tls, tls->tx_state ); + return -EIO; + } + + /* Start sending the Client Key Exchange */ + tls->tx_state = TLS_TX_CLIENT_KEY_EXCHANGE; + + return 0; +} + +/** + * Receive new Finished handshake record + * + * @v tls TLS session + * @v data Plaintext handshake record + * @v len Length of plaintext handshake record + * @ret rc Return status code + */ +static int tls_new_finished ( struct tls_session *tls, + void *data, size_t len ) { + + /* FIXME: Handle this properly */ + tls->tx_state = TLS_TX_DATA; + ( void ) data; + ( void ) len; + return 0; +} + +/** + * Receive new Handshake record + * + * @v tls TLS session + * @v data Plaintext record + * @v len Length of plaintext record + * @ret rc Return status code + */ +static int tls_new_handshake ( struct tls_session *tls, + void *data, size_t len ) { + struct { + uint8_t type; + uint8_t length[3]; + uint8_t payload[0]; + } __attribute__ (( packed )) *handshake = data; + void *payload = &handshake->payload; + size_t payload_len = tls_uint24 ( handshake->length ); + void *end = ( payload + payload_len ); + int rc; + + /* Sanity check */ + if ( end != ( data + len ) ) { + DBGC ( tls, "TLS %p received overlength Handshake\n", tls ); + DBGC_HD ( tls, data, len ); + return -EINVAL; + } + + switch ( handshake->type ) { + case TLS_SERVER_HELLO: + rc = tls_new_server_hello ( tls, payload, payload_len ); + break; + case TLS_CERTIFICATE: + rc = tls_new_certificate ( tls, payload, payload_len ); + break; + case TLS_SERVER_HELLO_DONE: + rc = tls_new_server_hello_done ( tls, payload, payload_len ); + break; + case TLS_FINISHED: + rc = tls_new_finished ( tls, payload, payload_len ); + break; + default: + DBGC ( tls, "TLS %p ignoring handshake type %d\n", + tls, handshake->type ); + rc = 0; + break; + } + + /* Add to handshake digest (except for Hello Requests, which + * are explicitly excluded). + */ + if ( handshake->type != TLS_HELLO_REQUEST ) + tls_add_handshake ( tls, data, len ); + + return rc; +} + +/** + * Receive new record + * + * @v tls TLS session + * @v type Record type + * @v data Plaintext record + * @v len Length of plaintext record + * @ret rc Return status code + */ +static int tls_new_record ( struct tls_session *tls, + unsigned int type, void *data, size_t len ) { + + switch ( type ) { + case TLS_TYPE_CHANGE_CIPHER: + return tls_new_change_cipher ( tls, data, len ); + case TLS_TYPE_ALERT: + return tls_new_alert ( tls, data, len ); + case TLS_TYPE_HANDSHAKE: + return tls_new_handshake ( tls, data, len ); + case TLS_TYPE_DATA: + return xfer_deliver_raw ( &tls->plainstream.xfer, data, len ); + default: + /* RFC4346 says that we should just ignore unknown + * record types. + */ + DBGC ( tls, "TLS %p ignoring record type %d\n", tls, type ); + return 0; + } +} + +/****************************************************************************** + * + * Record encryption/decryption + * + ****************************************************************************** + */ + +/** + * Calculate HMAC + * + * @v tls TLS session + * @v cipherspec Cipher specification + * @v seq Sequence number + * @v tlshdr TLS header + * @v data Data + * @v len Length of data + * @v mac HMAC to fill in + */ +static void tls_hmac ( struct tls_session *tls __unused, + struct tls_cipherspec *cipherspec, + uint64_t seq, struct tls_header *tlshdr, + const void *data, size_t len, void *hmac ) { + struct digest_algorithm *digest = cipherspec->digest; + uint8_t digest_ctx[digest->ctxsize]; + + hmac_init ( digest, digest_ctx, cipherspec->mac_secret, + &digest->digestsize ); + seq = cpu_to_be64 ( seq ); + hmac_update ( digest, digest_ctx, &seq, sizeof ( seq ) ); + hmac_update ( digest, digest_ctx, tlshdr, sizeof ( *tlshdr ) ); + hmac_update ( digest, digest_ctx, data, len ); + hmac_final ( digest, digest_ctx, cipherspec->mac_secret, + &digest->digestsize, hmac ); +} + +/** + * Allocate and assemble stream-ciphered record from data and MAC portions + * + * @v tls TLS session + * @ret data Data + * @ret len Length of data + * @ret digest MAC digest + * @ret plaintext_len Length of plaintext record + * @ret plaintext Allocated plaintext record + */ +static void * __malloc tls_assemble_stream ( struct tls_session *tls, + const void *data, size_t len, + void *digest, size_t *plaintext_len ) { + size_t mac_len = tls->tx_cipherspec.digest->digestsize; + void *plaintext; + void *content; + void *mac; + + /* Calculate stream-ciphered struct length */ + *plaintext_len = ( len + mac_len ); + + /* Allocate stream-ciphered struct */ + plaintext = malloc ( *plaintext_len ); + if ( ! plaintext ) + return NULL; + content = plaintext; + mac = ( content + len ); + + /* Fill in stream-ciphered struct */ + memcpy ( content, data, len ); + memcpy ( mac, digest, mac_len ); + + return plaintext; +} + +/** + * Allocate and assemble block-ciphered record from data and MAC portions + * + * @v tls TLS session + * @ret data Data + * @ret len Length of data + * @ret digest MAC digest + * @ret plaintext_len Length of plaintext record + * @ret plaintext Allocated plaintext record + */ +static void * tls_assemble_block ( struct tls_session *tls, + const void *data, size_t len, + void *digest, size_t *plaintext_len ) { + size_t blocksize = tls->tx_cipherspec.cipher->blocksize; + size_t iv_len = blocksize; + size_t mac_len = tls->tx_cipherspec.digest->digestsize; + size_t padding_len; + void *plaintext; + void *iv; + void *content; + void *mac; + void *padding; + + /* FIXME: TLSv1.1 has an explicit IV */ + iv_len = 0; + + /* Calculate block-ciphered struct length */ + padding_len = ( ( blocksize - 1 ) & -( iv_len + len + mac_len + 1 ) ); + *plaintext_len = ( iv_len + len + mac_len + padding_len + 1 ); + + /* Allocate block-ciphered struct */ + plaintext = malloc ( *plaintext_len ); + if ( ! plaintext ) + return NULL; + iv = plaintext; + content = ( iv + iv_len ); + mac = ( content + len ); + padding = ( mac + mac_len ); + + /* Fill in block-ciphered struct */ + memset ( iv, 0, iv_len ); + memcpy ( content, data, len ); + memcpy ( mac, digest, mac_len ); + memset ( padding, padding_len, ( padding_len + 1 ) ); + + return plaintext; +} + +/** + * Send plaintext record + * + * @v tls TLS session + * @v type Record type + * @v data Plaintext record + * @v len Length of plaintext record + * @ret rc Return status code + */ +static int tls_send_plaintext ( struct tls_session *tls, unsigned int type, + const void *data, size_t len ) { + struct tls_header plaintext_tlshdr; + struct tls_header *tlshdr; + struct tls_cipherspec *cipherspec = &tls->tx_cipherspec; + void *plaintext = NULL; + size_t plaintext_len; + struct io_buffer *ciphertext = NULL; + size_t ciphertext_len; + size_t mac_len = cipherspec->digest->digestsize; + uint8_t mac[mac_len]; + int rc; + + /* Construct header */ + plaintext_tlshdr.type = type; + plaintext_tlshdr.version = htons ( TLS_VERSION_TLS_1_0 ); + plaintext_tlshdr.length = htons ( len ); + + /* Calculate MAC */ + tls_hmac ( tls, cipherspec, tls->tx_seq, &plaintext_tlshdr, + data, len, mac ); + + /* Allocate and assemble plaintext struct */ + if ( is_stream_cipher ( cipherspec->cipher ) ) { + plaintext = tls_assemble_stream ( tls, data, len, mac, + &plaintext_len ); + } else { + plaintext = tls_assemble_block ( tls, data, len, mac, + &plaintext_len ); + } + if ( ! plaintext ) { + DBGC ( tls, "TLS %p could not allocate %zd bytes for " + "plaintext\n", tls, plaintext_len ); + rc = -ENOMEM; + goto done; + } + + DBGC2 ( tls, "Sending plaintext data:\n" ); + DBGC2_HD ( tls, plaintext, plaintext_len ); + + /* Allocate ciphertext */ + ciphertext_len = ( sizeof ( *tlshdr ) + plaintext_len ); + ciphertext = xfer_alloc_iob ( &tls->cipherstream.xfer, + ciphertext_len ); + if ( ! ciphertext ) { + DBGC ( tls, "TLS %p could not allocate %zd bytes for " + "ciphertext\n", tls, ciphertext_len ); + rc = -ENOMEM; + goto done; + } + + /* Assemble ciphertext */ + tlshdr = iob_put ( ciphertext, sizeof ( *tlshdr ) ); + tlshdr->type = type; + tlshdr->version = htons ( TLS_VERSION_TLS_1_0 ); + tlshdr->length = htons ( plaintext_len ); + memcpy ( cipherspec->cipher_next_ctx, cipherspec->cipher_ctx, + cipherspec->cipher->ctxsize ); + cipher_encrypt ( cipherspec->cipher, cipherspec->cipher_next_ctx, + plaintext, iob_put ( ciphertext, plaintext_len ), + plaintext_len ); + + /* Free plaintext as soon as possible to conserve memory */ + free ( plaintext ); + plaintext = NULL; + + /* Send ciphertext */ + rc = xfer_deliver_iob ( &tls->cipherstream.xfer, ciphertext ); + ciphertext = NULL; + if ( rc != 0 ) { + DBGC ( tls, "TLS %p could not deliver ciphertext: %s\n", + tls, strerror ( rc ) ); + goto done; + } + + /* Update TX state machine to next record */ + tls->tx_seq += 1; + memcpy ( tls->tx_cipherspec.cipher_ctx, + tls->tx_cipherspec.cipher_next_ctx, + tls->tx_cipherspec.cipher->ctxsize ); + + done: + free ( plaintext ); + free_iob ( ciphertext ); + return rc; +} + +/** + * Split stream-ciphered record into data and MAC portions + * + * @v tls TLS session + * @v plaintext Plaintext record + * @v plaintext_len Length of record + * @ret data Data + * @ret len Length of data + * @ret digest MAC digest + * @ret rc Return status code + */ +static int tls_split_stream ( struct tls_session *tls, + void *plaintext, size_t plaintext_len, + void **data, size_t *len, void **digest ) { + void *content; + size_t content_len; + void *mac; + size_t mac_len; + + /* Decompose stream-ciphered data */ + mac_len = tls->rx_cipherspec.digest->digestsize; + if ( plaintext_len < mac_len ) { + DBGC ( tls, "TLS %p received underlength record\n", tls ); + DBGC_HD ( tls, plaintext, plaintext_len ); + return -EINVAL; + } + content_len = ( plaintext_len - mac_len ); + content = plaintext; + mac = ( content + content_len ); + + /* Fill in return values */ + *data = content; + *len = content_len; + *digest = mac; + + return 0; +} + +/** + * Split block-ciphered record into data and MAC portions + * + * @v tls TLS session + * @v plaintext Plaintext record + * @v plaintext_len Length of record + * @ret data Data + * @ret len Length of data + * @ret digest MAC digest + * @ret rc Return status code + */ +static int tls_split_block ( struct tls_session *tls, + void *plaintext, size_t plaintext_len, + void **data, size_t *len, + void **digest ) { + void *iv; + size_t iv_len; + void *content; + size_t content_len; + void *mac; + size_t mac_len; + void *padding; + size_t padding_len; + unsigned int i; + + /* Decompose block-ciphered data */ + if ( plaintext_len < 1 ) { + DBGC ( tls, "TLS %p received underlength record\n", tls ); + DBGC_HD ( tls, plaintext, plaintext_len ); + return -EINVAL; + } + iv_len = tls->rx_cipherspec.cipher->blocksize; + + /* FIXME: TLSv1.1 uses an explicit IV */ + iv_len = 0; + + mac_len = tls->rx_cipherspec.digest->digestsize; + padding_len = *( ( uint8_t * ) ( plaintext + plaintext_len - 1 ) ); + if ( plaintext_len < ( iv_len + mac_len + padding_len + 1 ) ) { + DBGC ( tls, "TLS %p received underlength record\n", tls ); + DBGC_HD ( tls, plaintext, plaintext_len ); + return -EINVAL; + } + content_len = ( plaintext_len - iv_len - mac_len - padding_len - 1 ); + iv = plaintext; + content = ( iv + iv_len ); + mac = ( content + content_len ); + padding = ( mac + mac_len ); + + /* Verify padding bytes */ + for ( i = 0 ; i < padding_len ; i++ ) { + if ( *( ( uint8_t * ) ( padding + i ) ) != padding_len ) { + DBGC ( tls, "TLS %p received bad padding\n", tls ); + DBGC_HD ( tls, plaintext, plaintext_len ); + return -EINVAL; + } + } + + /* Fill in return values */ + *data = content; + *len = content_len; + *digest = mac; + + return 0; +} + +/** + * Receive new ciphertext record + * + * @v tls TLS session + * @v tlshdr Record header + * @v ciphertext Ciphertext record + * @ret rc Return status code + */ +static int tls_new_ciphertext ( struct tls_session *tls, + struct tls_header *tlshdr, void *ciphertext ) { + struct tls_header plaintext_tlshdr; + struct tls_cipherspec *cipherspec = &tls->rx_cipherspec; + size_t record_len = ntohs ( tlshdr->length ); + void *plaintext = NULL; + void *data; + size_t len; + void *mac; + size_t mac_len = cipherspec->digest->digestsize; + uint8_t verify_mac[mac_len]; + int rc; + + /* Allocate buffer for plaintext */ + plaintext = malloc ( record_len ); + if ( ! plaintext ) { + DBGC ( tls, "TLS %p could not allocate %zd bytes for " + "decryption buffer\n", tls, record_len ); + rc = -ENOMEM; + goto done; + } + + /* Decrypt the record */ + cipher_decrypt ( cipherspec->cipher, cipherspec->cipher_ctx, + ciphertext, plaintext, record_len ); + + /* Split record into content and MAC */ + if ( is_stream_cipher ( cipherspec->cipher ) ) { + if ( ( rc = tls_split_stream ( tls, plaintext, record_len, + &data, &len, &mac ) ) != 0 ) + goto done; + } else { + if ( ( rc = tls_split_block ( tls, plaintext, record_len, + &data, &len, &mac ) ) != 0 ) + goto done; + } + + /* Verify MAC */ + plaintext_tlshdr.type = tlshdr->type; + plaintext_tlshdr.version = tlshdr->version; + plaintext_tlshdr.length = htons ( len ); + tls_hmac ( tls, cipherspec, tls->rx_seq, &plaintext_tlshdr, + data, len, verify_mac); + if ( memcmp ( mac, verify_mac, mac_len ) != 0 ) { + DBGC ( tls, "TLS %p failed MAC verification\n", tls ); + DBGC_HD ( tls, plaintext, record_len ); + goto done; + } + + DBGC2 ( tls, "Received plaintext data:\n" ); + DBGC2_HD ( tls, data, len ); + + /* Process plaintext record */ + if ( ( rc = tls_new_record ( tls, tlshdr->type, data, len ) ) != 0 ) + goto done; + + rc = 0; + done: + free ( plaintext ); + return rc; +} + +/****************************************************************************** + * + * Plaintext stream operations + * + ****************************************************************************** + */ + +/** + * Close interface + * + * @v xfer Plainstream data transfer interface + * @v rc Reason for close + */ +static void tls_plainstream_close ( struct xfer_interface *xfer, int rc ) { + struct tls_session *tls = + container_of ( xfer, struct tls_session, plainstream.xfer ); + + tls_close ( tls, rc ); +} + +/** + * Check flow control window + * + * @v xfer Plainstream data transfer interface + * @ret len Length of window + */ +static size_t tls_plainstream_window ( struct xfer_interface *xfer ) { + struct tls_session *tls = + container_of ( xfer, struct tls_session, plainstream.xfer ); + + /* Block window unless we are ready to accept data */ + if ( tls->tx_state != TLS_TX_DATA ) + return 0; + + return filter_window ( xfer ); +} + +/** + * Deliver datagram as raw data + * + * @v xfer Plainstream data transfer interface + * @v data Data buffer + * @v len Length of data buffer + * @ret rc Return status code + */ +static int tls_plainstream_deliver_raw ( struct xfer_interface *xfer, + const void *data, size_t len ) { + struct tls_session *tls = + container_of ( xfer, struct tls_session, plainstream.xfer ); + + /* Refuse unless we are ready to accept data */ + if ( tls->tx_state != TLS_TX_DATA ) + return -ENOTCONN; + + return tls_send_plaintext ( tls, TLS_TYPE_DATA, data, len ); +} + +/** TLS plaintext stream operations */ +static struct xfer_interface_operations tls_plainstream_operations = { + .close = tls_plainstream_close, + .vredirect = ignore_xfer_vredirect, + .window = tls_plainstream_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = xfer_deliver_as_raw, + .deliver_raw = tls_plainstream_deliver_raw, +}; + +/****************************************************************************** + * + * Ciphertext stream operations + * + ****************************************************************************** + */ + +/** + * Close interface + * + * @v xfer Plainstream data transfer interface + * @v rc Reason for close + */ +static void tls_cipherstream_close ( struct xfer_interface *xfer, int rc ) { + struct tls_session *tls = + container_of ( xfer, struct tls_session, cipherstream.xfer ); + + tls_close ( tls, rc ); +} + +/** + * Handle received TLS header + * + * @v tls TLS session + * @ret rc Returned status code + */ +static int tls_newdata_process_header ( struct tls_session *tls ) { + size_t data_len = ntohs ( tls->rx_header.length ); + + /* Allocate data buffer now that we know the length */ + assert ( tls->rx_data == NULL ); + tls->rx_data = malloc ( data_len ); + if ( ! tls->rx_data ) { + DBGC ( tls, "TLS %p could not allocate %zd bytes " + "for receive buffer\n", tls, data_len ); + return -ENOMEM; + } + + /* Move to data state */ + tls->rx_state = TLS_RX_DATA; + + return 0; +} + +/** + * Handle received TLS data payload + * + * @v tls TLS session + * @ret rc Returned status code + */ +static int tls_newdata_process_data ( struct tls_session *tls ) { + int rc; + + /* Process record */ + if ( ( rc = tls_new_ciphertext ( tls, &tls->rx_header, + tls->rx_data ) ) != 0 ) + return rc; + + /* Increment RX sequence number */ + tls->rx_seq += 1; + + /* Free data buffer */ + free ( tls->rx_data ); + tls->rx_data = NULL; + + /* Return to header state */ + tls->rx_state = TLS_RX_HEADER; + + return 0; +} + +/** + * Receive new ciphertext + * + * @v app Stream application + * @v data Data received + * @v len Length of received data + * @ret rc Return status code + */ +static int tls_cipherstream_deliver_raw ( struct xfer_interface *xfer, + const void *data, size_t len ) { + struct tls_session *tls = + container_of ( xfer, struct tls_session, cipherstream.xfer ); + size_t frag_len; + void *buf; + size_t buf_len; + int ( * process ) ( struct tls_session *tls ); + int rc; + + while ( len ) { + /* Select buffer according to current state */ + switch ( tls->rx_state ) { + case TLS_RX_HEADER: + buf = &tls->rx_header; + buf_len = sizeof ( tls->rx_header ); + process = tls_newdata_process_header; + break; + case TLS_RX_DATA: + buf = tls->rx_data; + buf_len = ntohs ( tls->rx_header.length ); + process = tls_newdata_process_data; + break; + default: + assert ( 0 ); + return -EINVAL; + } + + /* Copy data portion to buffer */ + frag_len = ( buf_len - tls->rx_rcvd ); + if ( frag_len > len ) + frag_len = len; + memcpy ( ( buf + tls->rx_rcvd ), data, frag_len ); + tls->rx_rcvd += frag_len; + data += frag_len; + len -= frag_len; + + /* Process data if buffer is now full */ + if ( tls->rx_rcvd == buf_len ) { + if ( ( rc = process ( tls ) ) != 0 ) { + tls_close ( tls, rc ); + return rc; + } + tls->rx_rcvd = 0; + } + } + + return 0; +} + +/** TLS ciphertext stream operations */ +static struct xfer_interface_operations tls_cipherstream_operations = { + .close = tls_cipherstream_close, + .vredirect = xfer_vreopen, + .window = filter_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = xfer_deliver_as_raw, + .deliver_raw = tls_cipherstream_deliver_raw, +}; + +/****************************************************************************** + * + * Controlling process + * + ****************************************************************************** + */ + +/** + * TLS TX state machine + * + * @v process TLS process + */ +static void tls_step ( struct process *process ) { + struct tls_session *tls = + container_of ( process, struct tls_session, process ); + int rc; + + /* Wait for cipherstream to become ready */ + if ( ! xfer_window ( &tls->cipherstream.xfer ) ) + return; + + switch ( tls->tx_state ) { + case TLS_TX_NONE: + /* Nothing to do */ + break; + case TLS_TX_CLIENT_HELLO: + /* Send Client Hello */ + if ( ( rc = tls_send_client_hello ( tls ) ) != 0 ) { + DBGC ( tls, "TLS %p could not send Client Hello: %s\n", + tls, strerror ( rc ) ); + goto err; + } + tls->tx_state = TLS_TX_NONE; + break; + case TLS_TX_CLIENT_KEY_EXCHANGE: + /* Send Client Key Exchange */ + if ( ( rc = tls_send_client_key_exchange ( tls ) ) != 0 ) { + DBGC ( tls, "TLS %p could send Client Key Exchange: " + "%s\n", tls, strerror ( rc ) ); + goto err; + } + tls->tx_state = TLS_TX_CHANGE_CIPHER; + break; + case TLS_TX_CHANGE_CIPHER: + /* Send Change Cipher, and then change the cipher in use */ + if ( ( rc = tls_send_change_cipher ( tls ) ) != 0 ) { + DBGC ( tls, "TLS %p could not send Change Cipher: " + "%s\n", tls, strerror ( rc ) ); + goto err; + } + if ( ( rc = tls_change_cipher ( tls, + &tls->tx_cipherspec_pending, + &tls->tx_cipherspec )) != 0 ){ + DBGC ( tls, "TLS %p could not activate TX cipher: " + "%s\n", tls, strerror ( rc ) ); + goto err; + } + tls->tx_seq = 0; + tls->tx_state = TLS_TX_FINISHED; + break; + case TLS_TX_FINISHED: + /* Send Finished */ + if ( ( rc = tls_send_finished ( tls ) ) != 0 ) { + DBGC ( tls, "TLS %p could not send Finished: %s\n", + tls, strerror ( rc ) ); + goto err; + } + tls->tx_state = TLS_TX_NONE; + break; + case TLS_TX_DATA: + /* Nothing to do */ + break; + default: + assert ( 0 ); + } + + return; + + err: + tls_close ( tls, rc ); +} + +/****************************************************************************** + * + * Instantiator + * + ****************************************************************************** + */ + +int add_tls ( struct xfer_interface *xfer, struct xfer_interface **next ) { + struct tls_session *tls; + + /* Allocate and initialise TLS structure */ + tls = malloc ( sizeof ( *tls ) ); + if ( ! tls ) + return -ENOMEM; + memset ( tls, 0, sizeof ( *tls ) ); + tls->refcnt.free = free_tls; + filter_init ( &tls->plainstream, &tls_plainstream_operations, + &tls->cipherstream, &tls_cipherstream_operations, + &tls->refcnt ); + tls_clear_cipher ( tls, &tls->tx_cipherspec ); + tls_clear_cipher ( tls, &tls->tx_cipherspec_pending ); + tls_clear_cipher ( tls, &tls->rx_cipherspec ); + tls_clear_cipher ( tls, &tls->rx_cipherspec_pending ); + tls->client_random.gmt_unix_time = 0; + tls_generate_random ( &tls->client_random.random, + ( sizeof ( tls->client_random.random ) ) ); + tls->pre_master_secret.version = htons ( TLS_VERSION_TLS_1_0 ); + tls_generate_random ( &tls->pre_master_secret.random, + ( sizeof ( tls->pre_master_secret.random ) ) ); + digest_init ( &md5_algorithm, tls->handshake_md5_ctx ); + digest_init ( &sha1_algorithm, tls->handshake_sha1_ctx ); + tls->tx_state = TLS_TX_CLIENT_HELLO; + process_init ( &tls->process, tls_step, &tls->refcnt ); + + /* Attach to parent interface, mortalise self, and return */ + xfer_plug_plug ( &tls->plainstream.xfer, xfer ); + *next = &tls->cipherstream.xfer; + ref_put ( &tls->refcnt ); + return 0; +} + diff --git a/debian/grub-extras/disabled/gpxe/src/net/udp.c b/debian/grub-extras/disabled/gpxe/src/net/udp.c new file mode 100644 index 0000000..771655e --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/udp.c @@ -0,0 +1,463 @@ +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <byteswap.h> +#include <errno.h> +#include <gpxe/tcpip.h> +#include <gpxe/iobuf.h> +#include <gpxe/xfer.h> +#include <gpxe/open.h> +#include <gpxe/uri.h> +#include <gpxe/udp.h> + +/** @file + * + * UDP protocol + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** + * A UDP connection + * + */ +struct udp_connection { + /** Reference counter */ + struct refcnt refcnt; + /** List of UDP connections */ + struct list_head list; + + /** Data transfer interface */ + struct xfer_interface xfer; + + /** Local socket address */ + struct sockaddr_tcpip local; + /** Remote socket address */ + struct sockaddr_tcpip peer; +}; + +/** + * List of registered UDP connections + */ +static LIST_HEAD ( udp_conns ); + +/* Forward declatations */ +static struct xfer_interface_operations udp_xfer_operations; +struct tcpip_protocol udp_protocol; + +/** + * Bind UDP connection to local port + * + * @v udp UDP connection + * @ret rc Return status code + * + * Opens the UDP connection and binds to the specified local port. If + * no local port is specified, the first available port will be used. + */ +static int udp_bind ( struct udp_connection *udp ) { + struct udp_connection *existing; + static uint16_t try_port = 1023; + + /* If no port specified, find the first available port */ + if ( ! udp->local.st_port ) { + while ( try_port ) { + try_port++; + if ( try_port < 1024 ) + continue; + udp->local.st_port = htons ( try_port ); + if ( udp_bind ( udp ) == 0 ) + return 0; + } + return -EADDRINUSE; + } + + /* Attempt bind to local port */ + list_for_each_entry ( existing, &udp_conns, list ) { + if ( existing->local.st_port == udp->local.st_port ) { + DBGC ( udp, "UDP %p could not bind: port %d in use\n", + udp, ntohs ( udp->local.st_port ) ); + return -EADDRINUSE; + } + } + + /* Add to UDP connection list */ + DBGC ( udp, "UDP %p bound to port %d\n", + udp, ntohs ( udp->local.st_port ) ); + + return 0; +} + +/** + * Open a UDP connection + * + * @v xfer Data transfer interface + * @v peer Peer socket address, or NULL + * @v local Local socket address, or NULL + * @v promisc Socket is promiscuous + * @ret rc Return status code + */ +static int udp_open_common ( struct xfer_interface *xfer, + struct sockaddr *peer, struct sockaddr *local, + int promisc ) { + struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer; + struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local; + struct udp_connection *udp; + int rc; + + /* Allocate and initialise structure */ + udp = zalloc ( sizeof ( *udp ) ); + if ( ! udp ) + return -ENOMEM; + DBGC ( udp, "UDP %p allocated\n", udp ); + xfer_init ( &udp->xfer, &udp_xfer_operations, &udp->refcnt ); + if ( st_peer ) + memcpy ( &udp->peer, st_peer, sizeof ( udp->peer ) ); + if ( st_local ) + memcpy ( &udp->local, st_local, sizeof ( udp->local ) ); + + /* Bind to local port */ + if ( ! promisc ) { + if ( ( rc = udp_bind ( udp ) ) != 0 ) + goto err; + } + + /* Attach parent interface, transfer reference to connection + * list and return + */ + xfer_plug_plug ( &udp->xfer, xfer ); + list_add ( &udp->list, &udp_conns ); + return 0; + + err: + ref_put ( &udp->refcnt ); + return rc; +} + +/** + * Open a UDP connection + * + * @v xfer Data transfer interface + * @v peer Peer socket address + * @v local Local socket address, or NULL + * @ret rc Return status code + */ +int udp_open ( struct xfer_interface *xfer, struct sockaddr *peer, + struct sockaddr *local ) { + return udp_open_common ( xfer, peer, local, 0 ); +} + +/** + * Open a promiscuous UDP connection + * + * @v xfer Data transfer interface + * @ret rc Return status code + * + * Promiscuous UDP connections are required in order to support the + * PXE API. + */ +int udp_open_promisc ( struct xfer_interface *xfer ) { + return udp_open_common ( xfer, NULL, NULL, 1 ); +} + +/** + * Close a UDP connection + * + * @v udp UDP connection + * @v rc Reason for close + */ +static void udp_close ( struct udp_connection *udp, int rc ) { + + /* Close data transfer interface */ + xfer_nullify ( &udp->xfer ); + xfer_close ( &udp->xfer, rc ); + + /* Remove from list of connections and drop list's reference */ + list_del ( &udp->list ); + ref_put ( &udp->refcnt ); + + DBGC ( udp, "UDP %p closed\n", udp ); +} + +/** + * Transmit data via a UDP connection to a specified address + * + * @v udp UDP connection + * @v iobuf I/O buffer + * @v src Source address, or NULL to use default + * @v dest Destination address, or NULL to use default + * @v netdev Network device, or NULL to use default + * @ret rc Return status code + */ +static int udp_tx ( struct udp_connection *udp, struct io_buffer *iobuf, + struct sockaddr_tcpip *src, struct sockaddr_tcpip *dest, + struct net_device *netdev ) { + struct udp_header *udphdr; + size_t len; + int rc; + + /* Check we can accommodate the header */ + if ( ( rc = iob_ensure_headroom ( iobuf, UDP_MAX_HLEN ) ) != 0 ) { + free_iob ( iobuf ); + return rc; + } + + /* Fill in default values if not explicitly provided */ + if ( ! src ) + src = &udp->local; + if ( ! dest ) + dest = &udp->peer; + + /* Add the UDP header */ + udphdr = iob_push ( iobuf, sizeof ( *udphdr ) ); + len = iob_len ( iobuf ); + udphdr->dest = dest->st_port; + udphdr->src = src->st_port; + udphdr->len = htons ( len ); + udphdr->chksum = 0; + udphdr->chksum = tcpip_chksum ( udphdr, len ); + + /* Dump debugging information */ + DBGC ( udp, "UDP %p TX %d->%d len %d\n", udp, + ntohs ( udphdr->src ), ntohs ( udphdr->dest ), + ntohs ( udphdr->len ) ); + + /* Send it to the next layer for processing */ + if ( ( rc = tcpip_tx ( iobuf, &udp_protocol, src, dest, netdev, + &udphdr->chksum ) ) != 0 ) { + DBGC ( udp, "UDP %p could not transmit packet: %s\n", + udp, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Identify UDP connection by local address + * + * @v local Local address + * @ret udp UDP connection, or NULL + */ +static struct udp_connection * udp_demux ( struct sockaddr_tcpip *local ) { + static const struct sockaddr_tcpip empty_sockaddr = { .pad = { 0, } }; + struct udp_connection *udp; + + list_for_each_entry ( udp, &udp_conns, list ) { + if ( ( ( udp->local.st_family == local->st_family ) || + ( udp->local.st_family == 0 ) ) && + ( ( udp->local.st_port == local->st_port ) || + ( udp->local.st_port == 0 ) ) && + ( ( memcmp ( udp->local.pad, local->pad, + sizeof ( udp->local.pad ) ) == 0 ) || + ( memcmp ( udp->local.pad, empty_sockaddr.pad, + sizeof ( udp->local.pad ) ) == 0 ) ) ) { + return udp; + } + } + return NULL; +} + +/** + * Process a received packet + * + * @v iobuf I/O buffer + * @v st_src Partially-filled source address + * @v st_dest Partially-filled destination address + * @v pshdr_csum Pseudo-header checksum + * @ret rc Return status code + */ +static int udp_rx ( struct io_buffer *iobuf, struct sockaddr_tcpip *st_src, + struct sockaddr_tcpip *st_dest, uint16_t pshdr_csum ) { + struct udp_header *udphdr = iobuf->data; + struct udp_connection *udp; + struct xfer_metadata meta; + size_t ulen; + unsigned int csum; + int rc = 0; + + /* Sanity check packet */ + if ( iob_len ( iobuf ) < sizeof ( *udphdr ) ) { + DBG ( "UDP packet too short at %zd bytes (min %zd bytes)\n", + iob_len ( iobuf ), sizeof ( *udphdr ) ); + + rc = -EINVAL; + goto done; + } + ulen = ntohs ( udphdr->len ); + if ( ulen < sizeof ( *udphdr ) ) { + DBG ( "UDP length too short at %zd bytes " + "(header is %zd bytes)\n", ulen, sizeof ( *udphdr ) ); + rc = -EINVAL; + goto done; + } + if ( ulen > iob_len ( iobuf ) ) { + DBG ( "UDP length too long at %zd bytes (packet is %zd " + "bytes)\n", ulen, iob_len ( iobuf ) ); + rc = -EINVAL; + goto done; + } + if ( udphdr->chksum ) { + csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data, ulen ); + if ( csum != 0 ) { + DBG ( "UDP checksum incorrect (is %04x including " + "checksum field, should be 0000)\n", csum ); + rc = -EINVAL; + goto done; + } + } + + /* Parse parameters from header and strip header */ + st_src->st_port = udphdr->src; + st_dest->st_port = udphdr->dest; + udp = udp_demux ( st_dest ); + iob_unput ( iobuf, ( iob_len ( iobuf ) - ulen ) ); + iob_pull ( iobuf, sizeof ( *udphdr ) ); + + /* Dump debugging information */ + DBGC ( udp, "UDP %p RX %d<-%d len %zd\n", udp, + ntohs ( udphdr->dest ), ntohs ( udphdr->src ), ulen ); + + /* Ignore if no matching connection found */ + if ( ! udp ) { + DBG ( "No UDP connection listening on port %d\n", + ntohs ( udphdr->dest ) ); + rc = -ENOTCONN; + goto done; + } + + /* Pass data to application */ + memset ( &meta, 0, sizeof ( meta ) ); + meta.src = ( struct sockaddr * ) st_src; + meta.dest = ( struct sockaddr * ) st_dest; + rc = xfer_deliver_iob_meta ( &udp->xfer, iob_disown ( iobuf ), &meta ); + + done: + free_iob ( iobuf ); + return rc; +} + +struct tcpip_protocol udp_protocol __tcpip_protocol = { + .name = "UDP", + .rx = udp_rx, + .tcpip_proto = IP_UDP, +}; + +/*************************************************************************** + * + * Data transfer interface + * + *************************************************************************** + */ + +/** + * Close interface + * + * @v xfer Data transfer interface + * @v rc Reason for close + */ +static void udp_xfer_close ( struct xfer_interface *xfer, int rc ) { + struct udp_connection *udp = + container_of ( xfer, struct udp_connection, xfer ); + + /* Close connection */ + udp_close ( udp, rc ); +} + +/** + * Allocate I/O buffer for UDP + * + * @v xfer Data transfer interface + * @v len Payload size + * @ret iobuf I/O buffer, or NULL + */ +static struct io_buffer * udp_alloc_iob ( struct xfer_interface *xfer, + size_t len ) { + struct udp_connection *udp = + container_of ( xfer, struct udp_connection, xfer ); + struct io_buffer *iobuf; + + iobuf = alloc_iob ( UDP_MAX_HLEN + len ); + if ( ! iobuf ) { + DBGC ( udp, "UDP %p cannot allocate buffer of length %zd\n", + udp, len ); + return NULL; + } + iob_reserve ( iobuf, UDP_MAX_HLEN ); + return iobuf; +} + +/** + * Deliver datagram as I/O buffer + * + * @v xfer Data transfer interface + * @v iobuf Datagram I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int udp_xfer_deliver_iob ( struct xfer_interface *xfer, + struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + struct udp_connection *udp = + container_of ( xfer, struct udp_connection, xfer ); + + /* Transmit data, if possible */ + udp_tx ( udp, iobuf, ( ( struct sockaddr_tcpip * ) meta->src ), + ( ( struct sockaddr_tcpip * ) meta->dest ), meta->netdev ); + + return 0; +} + +/** UDP data transfer interface operations */ +static struct xfer_interface_operations udp_xfer_operations = { + .close = udp_xfer_close, + .vredirect = ignore_xfer_vredirect, + .window = unlimited_xfer_window, + .alloc_iob = udp_alloc_iob, + .deliver_iob = udp_xfer_deliver_iob, + .deliver_raw = xfer_deliver_as_iob, +}; + +/*************************************************************************** + * + * Openers + * + *************************************************************************** + */ + +/** UDP socket opener */ +struct socket_opener udp_socket_opener __socket_opener = { + .semantics = UDP_SOCK_DGRAM, + .family = AF_INET, + .open = udp_open, +}; + +/** Linkage hack */ +int udp_sock_dgram = UDP_SOCK_DGRAM; + +/** + * Open UDP URI + * + * @v xfer Data transfer interface + * @v uri URI + * @ret rc Return status code + */ +static int udp_open_uri ( struct xfer_interface *xfer, struct uri *uri ) { + struct sockaddr_tcpip peer; + + /* Sanity check */ + if ( ! uri->host ) + return -EINVAL; + + memset ( &peer, 0, sizeof ( peer ) ); + peer.st_port = htons ( uri_port ( uri, 0 ) ); + return xfer_open_named_socket ( xfer, SOCK_DGRAM, + ( struct sockaddr * ) &peer, + uri->host, NULL ); +} + +/** UDP URI opener */ +struct uri_opener udp_uri_opener __uri_opener = { + .scheme = "udp", + .open = udp_open_uri, +}; diff --git a/debian/grub-extras/disabled/gpxe/src/net/udp/dhcp.c b/debian/grub-extras/disabled/gpxe/src/net/udp/dhcp.c new file mode 100644 index 0000000..aaf691f --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/udp/dhcp.c @@ -0,0 +1,1430 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> +#include <errno.h> +#include <assert.h> +#include <byteswap.h> +#include <gpxe/if_ether.h> +#include <gpxe/netdevice.h> +#include <gpxe/device.h> +#include <gpxe/xfer.h> +#include <gpxe/open.h> +#include <gpxe/job.h> +#include <gpxe/retry.h> +#include <gpxe/tcpip.h> +#include <gpxe/ip.h> +#include <gpxe/uuid.h> +#include <gpxe/timer.h> +#include <gpxe/settings.h> +#include <gpxe/dhcp.h> +#include <gpxe/dhcpopts.h> +#include <gpxe/dhcppkt.h> +#include <gpxe/features.h> + +/** @file + * + * Dynamic Host Configuration Protocol + * + */ + +struct dhcp_session; +static int dhcp_tx ( struct dhcp_session *dhcp ); + +/** + * DHCP operation types + * + * This table maps from DHCP message types (i.e. values of the @c + * DHCP_MESSAGE_TYPE option) to values of the "op" field within a DHCP + * packet. + */ +static const uint8_t dhcp_op[] = { + [DHCPDISCOVER] = BOOTP_REQUEST, + [DHCPOFFER] = BOOTP_REPLY, + [DHCPREQUEST] = BOOTP_REQUEST, + [DHCPDECLINE] = BOOTP_REQUEST, + [DHCPACK] = BOOTP_REPLY, + [DHCPNAK] = BOOTP_REPLY, + [DHCPRELEASE] = BOOTP_REQUEST, + [DHCPINFORM] = BOOTP_REQUEST, +}; + +/** Raw option data for options common to all DHCP requests */ +static uint8_t dhcp_request_options_data[] = { + DHCP_MAX_MESSAGE_SIZE, + DHCP_WORD ( ETH_MAX_MTU - 20 /* IP header */ - 8 /* UDP header */ ), + DHCP_CLIENT_ARCHITECTURE, DHCP_WORD ( 0 ), + DHCP_CLIENT_NDI, DHCP_OPTION ( 1 /* UNDI */ , 2, 1 /* v2.1 */ ), + DHCP_VENDOR_CLASS_ID, + DHCP_STRING ( 'P', 'X', 'E', 'C', 'l', 'i', 'e', 'n', 't', ':', + 'A', 'r', 'c', 'h', ':', '0', '0', '0', '0', '0', ':', + 'U', 'N', 'D', 'I', ':', '0', '0', '2', '0', '0', '1' ), + DHCP_USER_CLASS_ID, + DHCP_STRING ( 'g', 'P', 'X', 'E' ), + DHCP_PARAMETER_REQUEST_LIST, + DHCP_OPTION ( DHCP_SUBNET_MASK, DHCP_ROUTERS, DHCP_DNS_SERVERS, + DHCP_LOG_SERVERS, DHCP_HOST_NAME, DHCP_DOMAIN_NAME, + DHCP_ROOT_PATH, DHCP_VENDOR_ENCAP, DHCP_VENDOR_CLASS_ID, + DHCP_TFTP_SERVER_NAME, DHCP_BOOTFILE_NAME, + DHCP_EB_ENCAP, DHCP_ISCSI_INITIATOR_IQN ), + DHCP_END +}; + +/** Version number feature */ +FEATURE_VERSION ( VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH ); + +/** DHCP server address setting */ +struct setting dhcp_server_setting __setting = { + .name = "dhcp-server", + .description = "DHCP server address", + .tag = DHCP_SERVER_IDENTIFIER, + .type = &setting_type_ipv4, +}; + +/** DHCP user class setting */ +struct setting user_class_setting __setting = { + .name = "user-class", + .description = "User class identifier", + .tag = DHCP_USER_CLASS_ID, + .type = &setting_type_string, +}; + +/** + * Name a DHCP packet type + * + * @v msgtype DHCP message type + * @ret string DHCP mesasge type name + */ +static inline const char * dhcp_msgtype_name ( unsigned int msgtype ) { + switch ( msgtype ) { + case DHCPNONE: return "BOOTP"; /* Non-DHCP packet */ + case DHCPDISCOVER: return "DHCPDISCOVER"; + case DHCPOFFER: return "DHCPOFFER"; + case DHCPREQUEST: return "DHCPREQUEST"; + case DHCPDECLINE: return "DHCPDECLINE"; + case DHCPACK: return "DHCPACK"; + case DHCPNAK: return "DHCPNAK"; + case DHCPRELEASE: return "DHCPRELEASE"; + case DHCPINFORM: return "DHCPINFORM"; + default: return "DHCP<invalid>"; + } +} + +/** + * Calculate DHCP transaction ID for a network device + * + * @v netdev Network device + * @ret xid DHCP XID + * + * Extract the least significant bits of the hardware address for use + * as the transaction ID. + */ +static uint32_t dhcp_xid ( struct net_device *netdev ) { + uint32_t xid; + + memcpy ( &xid, ( netdev->ll_addr + netdev->ll_protocol->ll_addr_len + - sizeof ( xid ) ), sizeof ( xid ) ); + return xid; +} + +/**************************************************************************** + * + * DHCP session + * + */ + +struct dhcp_session; + +/** DHCP session state operations */ +struct dhcp_session_state { + /** State name */ + const char *name; + /** + * Construct transmitted packet + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer Destination address + */ + int ( * tx ) ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer ); + /** Handle received packet + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer DHCP server address + * @v msgtype DHCP message type + * @v server_id DHCP server ID + */ + void ( * rx ) ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer, + uint8_t msgtype, struct in_addr server_id ); + /** Handle timer expiry + * + * @v dhcp DHCP session + */ + void ( * expired ) ( struct dhcp_session *dhcp ); + /** Transmitted message type */ + uint8_t tx_msgtype; + /** Apply minimum timeout */ + uint8_t apply_min_timeout; +}; + +static struct dhcp_session_state dhcp_state_discover; +static struct dhcp_session_state dhcp_state_request; +static struct dhcp_session_state dhcp_state_proxy; +static struct dhcp_session_state dhcp_state_pxebs; + +/** A DHCP session */ +struct dhcp_session { + /** Reference counter */ + struct refcnt refcnt; + /** Job control interface */ + struct job_interface job; + /** Data transfer interface */ + struct xfer_interface xfer; + + /** Network device being configured */ + struct net_device *netdev; + /** Local socket address */ + struct sockaddr_in local; + /** State of the session */ + struct dhcp_session_state *state; + + /** Offered IP address */ + struct in_addr offer; + /** DHCP server */ + struct in_addr server; + /** DHCP offer priority */ + int priority; + + /** ProxyDHCP protocol extensions should be ignored */ + int no_pxedhcp; + /** ProxyDHCP server */ + struct in_addr proxy_server; + /** ProxyDHCP port */ + uint16_t proxy_port; + /** ProxyDHCP server priority */ + int proxy_priority; + + /** PXE Boot Server type */ + uint16_t pxe_type; + /** List of PXE Boot Servers to attempt */ + struct in_addr *pxe_attempt; + /** List of PXE Boot Servers to accept */ + struct in_addr *pxe_accept; + + /** Retransmission timer */ + struct retry_timer timer; + /** Start time of the current state (in ticks) */ + unsigned long start; +}; + +/** + * Free DHCP session + * + * @v refcnt Reference counter + */ +static void dhcp_free ( struct refcnt *refcnt ) { + struct dhcp_session *dhcp = + container_of ( refcnt, struct dhcp_session, refcnt ); + + netdev_put ( dhcp->netdev ); + free ( dhcp ); +} + +/** + * Mark DHCP session as complete + * + * @v dhcp DHCP session + * @v rc Return status code + */ +static void dhcp_finished ( struct dhcp_session *dhcp, int rc ) { + + /* Block futher incoming messages */ + job_nullify ( &dhcp->job ); + xfer_nullify ( &dhcp->xfer ); + + /* Stop retry timer */ + stop_timer ( &dhcp->timer ); + + /* Free resources and close interfaces */ + xfer_close ( &dhcp->xfer, rc ); + job_done ( &dhcp->job, rc ); +} + +/** + * Transition to new DHCP session state + * + * @v dhcp DHCP session + * @v state New session state + */ +static void dhcp_set_state ( struct dhcp_session *dhcp, + struct dhcp_session_state *state ) { + + DBGC ( dhcp, "DHCP %p entering %s state\n", dhcp, state->name ); + dhcp->state = state; + dhcp->start = currticks(); + stop_timer ( &dhcp->timer ); + dhcp->timer.min_timeout = + ( state->apply_min_timeout ? DHCP_MIN_TIMEOUT : 0 ); + dhcp->timer.max_timeout = DHCP_MAX_TIMEOUT; + start_timer_nodelay ( &dhcp->timer ); +} + +/**************************************************************************** + * + * DHCP state machine + * + */ + +/** + * Construct transmitted packet for DHCP discovery + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer Destination address + */ +static int dhcp_discovery_tx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt __unused, + struct sockaddr_in *peer ) { + + DBGC ( dhcp, "DHCP %p DHCPDISCOVER\n", dhcp ); + + /* Set server address */ + peer->sin_addr.s_addr = INADDR_BROADCAST; + peer->sin_port = htons ( BOOTPS_PORT ); + + return 0; +} + +/** + * Handle received packet during DHCP discovery + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer DHCP server address + * @v msgtype DHCP message type + * @v server_id DHCP server ID + */ +static void dhcp_discovery_rx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer, uint8_t msgtype, + struct in_addr server_id ) { + struct in_addr ip; + char vci[9]; /* "PXEClient" */ + int vci_len; + int has_pxeclient; + int pxeopts_len; + int has_pxeopts; + int8_t priority = 0; + uint8_t no_pxedhcp = 0; + unsigned long elapsed; + + DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp, + dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ), + ntohs ( peer->sin_port ) ); + if ( server_id.s_addr != peer->sin_addr.s_addr ) + DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) ); + + /* Identify offered IP address */ + ip = dhcppkt->dhcphdr->yiaddr; + if ( ip.s_addr ) + DBGC ( dhcp, " for %s", inet_ntoa ( ip ) ); + + /* Identify "PXEClient" vendor class */ + vci_len = dhcppkt_fetch ( dhcppkt, DHCP_VENDOR_CLASS_ID, + vci, sizeof ( vci ) ); + has_pxeclient = ( ( vci_len >= ( int ) sizeof ( vci ) ) && + ( strncmp ( "PXEClient", vci, sizeof (vci) ) == 0 )); + + /* Identify presence of vendor-specific options */ + pxeopts_len = dhcppkt_fetch ( dhcppkt, DHCP_VENDOR_ENCAP, NULL, 0 ); + has_pxeopts = ( pxeopts_len >= 0 ); + if ( has_pxeclient ) + DBGC ( dhcp, "%s", ( has_pxeopts ? " pxe" : " proxy" ) ); + + /* Identify priority */ + dhcppkt_fetch ( dhcppkt, DHCP_EB_PRIORITY, &priority, + sizeof ( priority ) ); + if ( priority ) + DBGC ( dhcp, " pri %d", priority ); + + /* Identify ignore-PXE flag */ + dhcppkt_fetch ( dhcppkt, DHCP_EB_NO_PXEDHCP, &no_pxedhcp, + sizeof ( no_pxedhcp ) ); + if ( no_pxedhcp ) + DBGC ( dhcp, " nopxe" ); + DBGC ( dhcp, "\n" ); + + /* Select as DHCP offer, if applicable */ + if ( ip.s_addr && ( peer->sin_port == htons ( BOOTPS_PORT ) ) && + ( ( msgtype == DHCPOFFER ) || ( ! msgtype /* BOOTP */ ) ) && + ( priority >= dhcp->priority ) ) { + dhcp->offer = ip; + dhcp->server = server_id; + dhcp->priority = priority; + dhcp->no_pxedhcp = no_pxedhcp; + } + + /* Select as ProxyDHCP offer, if applicable */ + if ( has_pxeclient && ( msgtype == DHCPOFFER ) && + ( priority >= dhcp->proxy_priority ) ) { + /* If the offer already includes the PXE options, then + * assume that we can send the ProxyDHCPREQUEST to + * port 67 (since the DHCPDISCOVER that triggered this + * ProxyDHCPOFFER was sent to port 67). Otherwise, + * send the ProxyDHCPREQUEST to port 4011. + */ + dhcp->proxy_server = server_id; + dhcp->proxy_port = ( has_pxeopts ? htons ( BOOTPS_PORT ) + : htons ( PXE_PORT ) ); + dhcp->proxy_priority = priority; + } + + /* We can exit the discovery state when we have a valid + * DHCPOFFER, and either: + * + * o The DHCPOFFER instructs us to ignore ProxyDHCPOFFERs, or + * o We have a valid ProxyDHCPOFFER, or + * o We have allowed sufficient time for ProxyDHCPOFFERs. + */ + + /* If we don't yet have a DHCPOFFER, do nothing */ + if ( ! dhcp->offer.s_addr ) + return; + + /* If we can't yet transition to DHCPREQUEST, do nothing */ + elapsed = ( currticks() - dhcp->start ); + if ( ! ( dhcp->no_pxedhcp || dhcp->proxy_server.s_addr || + ( elapsed > PROXYDHCP_MAX_TIMEOUT ) ) ) + return; + + /* Transition to DHCPREQUEST */ + dhcp_set_state ( dhcp, &dhcp_state_request ); +} + +/** + * Handle timer expiry during DHCP discovery + * + * @v dhcp DHCP session + */ +static void dhcp_discovery_expired ( struct dhcp_session *dhcp ) { + unsigned long elapsed = ( currticks() - dhcp->start ); + + /* Give up waiting for ProxyDHCP before we reach the failure point */ + if ( dhcp->offer.s_addr && ( elapsed > PROXYDHCP_MAX_TIMEOUT ) ) { + dhcp_set_state ( dhcp, &dhcp_state_request ); + return; + } + + /* Otherwise, retransmit current packet */ + dhcp_tx ( dhcp ); +} + +/** DHCP discovery state operations */ +static struct dhcp_session_state dhcp_state_discover = { + .name = "discovery", + .tx = dhcp_discovery_tx, + .rx = dhcp_discovery_rx, + .expired = dhcp_discovery_expired, + .tx_msgtype = DHCPDISCOVER, + .apply_min_timeout = 1, +}; + +/** + * Construct transmitted packet for DHCP request + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer Destination address + */ +static int dhcp_request_tx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer ) { + int rc; + + DBGC ( dhcp, "DHCP %p DHCPREQUEST to %s:%d", + dhcp, inet_ntoa ( dhcp->server ), BOOTPS_PORT ); + DBGC ( dhcp, " for %s\n", inet_ntoa ( dhcp->offer ) ); + + /* Set server ID */ + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_SERVER_IDENTIFIER, + &dhcp->server, + sizeof ( dhcp->server ) ) ) != 0 ) + return rc; + + /* Set requested IP address */ + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_REQUESTED_ADDRESS, + &dhcp->offer, + sizeof ( dhcp->offer ) ) ) != 0 ) + return rc; + + /* Set server address */ + peer->sin_addr.s_addr = INADDR_BROADCAST; + peer->sin_port = htons ( BOOTPS_PORT ); + + return 0; +} + +/** + * Handle received packet during DHCP request + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer DHCP server address + * @v msgtype DHCP message type + * @v server_id DHCP server ID + */ +static void dhcp_request_rx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer, uint8_t msgtype, + struct in_addr server_id ) { + struct in_addr ip; + struct settings *parent; + int rc; + + DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp, + dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ), + ntohs ( peer->sin_port ) ); + if ( server_id.s_addr != peer->sin_addr.s_addr ) + DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) ); + + /* Identify leased IP address */ + ip = dhcppkt->dhcphdr->yiaddr; + if ( ip.s_addr ) + DBGC ( dhcp, " for %s", inet_ntoa ( ip ) ); + DBGC ( dhcp, "\n" ); + + /* Filter out unacceptable responses */ + if ( peer->sin_port != htons ( BOOTPS_PORT ) ) + return; + if ( msgtype /* BOOTP */ && ( msgtype != DHCPACK ) ) + return; + if ( server_id.s_addr != dhcp->server.s_addr ) + return; + + /* Record assigned address */ + dhcp->local.sin_addr = ip; + + /* Register settings */ + parent = netdev_settings ( dhcp->netdev ); + if ( ( rc = register_settings ( &dhcppkt->settings, parent ) ) != 0 ){ + DBGC ( dhcp, "DHCP %p could not register settings: %s\n", + dhcp, strerror ( rc ) ); + dhcp_finished ( dhcp, rc ); + return; + } + + /* Start ProxyDHCPREQUEST if applicable */ + if ( dhcp->proxy_server.s_addr /* Have ProxyDHCP server */ && + ( ! dhcp->no_pxedhcp ) /* ProxyDHCP not disabled */ && + ( /* ProxyDHCP server is not just the DHCP server itself */ + ( dhcp->proxy_server.s_addr != dhcp->server.s_addr ) || + ( dhcp->proxy_port != htons ( BOOTPS_PORT ) ) ) ) { + dhcp_set_state ( dhcp, &dhcp_state_proxy ); + return; + } + + /* Terminate DHCP */ + dhcp_finished ( dhcp, 0 ); +} + +/** + * Handle timer expiry during DHCP discovery + * + * @v dhcp DHCP session + */ +static void dhcp_request_expired ( struct dhcp_session *dhcp ) { + + /* Retransmit current packet */ + dhcp_tx ( dhcp ); +} + +/** DHCP request state operations */ +static struct dhcp_session_state dhcp_state_request = { + .name = "request", + .tx = dhcp_request_tx, + .rx = dhcp_request_rx, + .expired = dhcp_request_expired, + .tx_msgtype = DHCPREQUEST, + .apply_min_timeout = 0, +}; + +/** + * Construct transmitted packet for ProxyDHCP request + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer Destination address + */ +static int dhcp_proxy_tx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer ) { + int rc; + + DBGC ( dhcp, "DHCP %p ProxyDHCP REQUEST to %s:%d\n", dhcp, + inet_ntoa ( dhcp->proxy_server ), ntohs ( dhcp->proxy_port ) ); + + /* Set server ID */ + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_SERVER_IDENTIFIER, + &dhcp->proxy_server, + sizeof ( dhcp->proxy_server ) ) ) != 0 ) + return rc; + + /* Set server address */ + peer->sin_addr = dhcp->proxy_server; + peer->sin_port = dhcp->proxy_port; + + return 0; +} + +/** + * Handle received packet during ProxyDHCP request + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer DHCP server address + * @v msgtype DHCP message type + * @v server_id DHCP server ID + */ +static void dhcp_proxy_rx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer, uint8_t msgtype, + struct in_addr server_id ) { + int rc; + + DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp, + dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ), + ntohs ( peer->sin_port ) ); + if ( server_id.s_addr != peer->sin_addr.s_addr ) + DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) ); + DBGC ( dhcp, "\n" ); + + /* Filter out unacceptable responses */ + if ( peer->sin_port != dhcp->proxy_port ) + return; + if ( msgtype != DHCPACK ) + return; + if ( server_id.s_addr /* Linux PXE server omits server ID */ && + ( server_id.s_addr != dhcp->proxy_server.s_addr ) ) + return; + + /* Register settings */ + dhcppkt->settings.name = PROXYDHCP_SETTINGS_NAME; + if ( ( rc = register_settings ( &dhcppkt->settings, NULL ) ) != 0 ) { + DBGC ( dhcp, "DHCP %p could not register settings: %s\n", + dhcp, strerror ( rc ) ); + dhcp_finished ( dhcp, rc ); + return; + } + + /* Terminate DHCP */ + dhcp_finished ( dhcp, 0 ); +} + +/** + * Handle timer expiry during ProxyDHCP request + * + * @v dhcp DHCP session + */ +static void dhcp_proxy_expired ( struct dhcp_session *dhcp ) { + unsigned long elapsed = ( currticks() - dhcp->start ); + + /* Give up waiting for ProxyDHCP before we reach the failure point */ + if ( elapsed > PROXYDHCP_MAX_TIMEOUT ) { + dhcp_finished ( dhcp, 0 ); + return; + } + + /* Retransmit current packet */ + dhcp_tx ( dhcp ); +} + +/** ProxyDHCP request state operations */ +static struct dhcp_session_state dhcp_state_proxy = { + .name = "ProxyDHCP", + .tx = dhcp_proxy_tx, + .rx = dhcp_proxy_rx, + .expired = dhcp_proxy_expired, + .tx_msgtype = DHCPREQUEST, + .apply_min_timeout = 0, +}; + +/** + * Construct transmitted packet for PXE Boot Server Discovery + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer Destination address + */ +static int dhcp_pxebs_tx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer ) { + struct dhcp_pxe_boot_menu_item menu_item = { 0, 0 }; + int rc; + + /* Set server address */ + peer->sin_addr = *(dhcp->pxe_attempt); + peer->sin_port = ( ( peer->sin_addr.s_addr == INADDR_BROADCAST ) ? + htons ( BOOTPS_PORT ) : htons ( PXE_PORT ) ); + + DBGC ( dhcp, "DHCP %p PXEBS REQUEST to %s:%d for type %d\n", + dhcp, inet_ntoa ( peer->sin_addr ), ntohs ( peer->sin_port ), + ntohs ( dhcp->pxe_type ) ); + + /* Set boot menu item */ + menu_item.type = dhcp->pxe_type; + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_PXE_BOOT_MENU_ITEM, + &menu_item, sizeof ( menu_item ) ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Check to see if PXE Boot Server address is acceptable + * + * @v dhcp DHCP session + * @v bs Boot Server address + * @ret accept Boot Server is acceptable + */ +static int dhcp_pxebs_accept ( struct dhcp_session *dhcp, + struct in_addr bs ) { + struct in_addr *accept; + + /* Accept if we have no acceptance filter */ + if ( ! dhcp->pxe_accept ) + return 1; + + /* Scan through acceptance list */ + for ( accept = dhcp->pxe_accept ; accept->s_addr ; accept++ ) { + if ( accept->s_addr == bs.s_addr ) + return 1; + } + + DBGC ( dhcp, "DHCP %p rejecting server %s\n", + dhcp, inet_ntoa ( bs ) ); + return 0; +} + +/** + * Handle received packet during PXE Boot Server Discovery + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer DHCP server address + * @v msgtype DHCP message type + * @v server_id DHCP server ID + */ +static void dhcp_pxebs_rx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer, uint8_t msgtype, + struct in_addr server_id ) { + struct dhcp_pxe_boot_menu_item menu_item = { 0, 0 }; + int rc; + + DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp, + dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ), + ntohs ( peer->sin_port ) ); + if ( server_id.s_addr != peer->sin_addr.s_addr ) + DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) ); + + /* Identify boot menu item */ + dhcppkt_fetch ( dhcppkt, DHCP_PXE_BOOT_MENU_ITEM, + &menu_item, sizeof ( menu_item ) ); + if ( menu_item.type ) + DBGC ( dhcp, " for type %d", ntohs ( menu_item.type ) ); + DBGC ( dhcp, "\n" ); + + /* Filter out unacceptable responses */ + if ( ( peer->sin_port != htons ( BOOTPS_PORT ) ) && + ( peer->sin_port != htons ( PXE_PORT ) ) ) + return; + if ( msgtype != DHCPACK ) + return; + if ( menu_item.type != dhcp->pxe_type ) + return; + if ( ! dhcp_pxebs_accept ( dhcp, ( server_id.s_addr ? + server_id : peer->sin_addr ) ) ) + return; + + /* Register settings */ + dhcppkt->settings.name = PXEBS_SETTINGS_NAME; + if ( ( rc = register_settings ( &dhcppkt->settings, NULL ) ) != 0 ) { + DBGC ( dhcp, "DHCP %p could not register settings: %s\n", + dhcp, strerror ( rc ) ); + dhcp_finished ( dhcp, rc ); + return; + } + + /* Terminate DHCP */ + dhcp_finished ( dhcp, 0 ); +} + +/** + * Handle timer expiry during PXE Boot Server Discovery + * + * @v dhcp DHCP session + */ +static void dhcp_pxebs_expired ( struct dhcp_session *dhcp ) { + unsigned long elapsed = ( currticks() - dhcp->start ); + + /* Give up waiting before we reach the failure point, and fail + * over to the next server in the attempt list + */ + if ( elapsed > PXEBS_MAX_TIMEOUT ) { + dhcp->pxe_attempt++; + if ( dhcp->pxe_attempt->s_addr ) { + dhcp_set_state ( dhcp, &dhcp_state_pxebs ); + return; + } else { + dhcp_finished ( dhcp, -ETIMEDOUT ); + return; + } + } + + /* Retransmit current packet */ + dhcp_tx ( dhcp ); +} + +/** PXE Boot Server Discovery state operations */ +static struct dhcp_session_state dhcp_state_pxebs = { + .name = "PXEBS", + .tx = dhcp_pxebs_tx, + .rx = dhcp_pxebs_rx, + .expired = dhcp_pxebs_expired, + .tx_msgtype = DHCPREQUEST, + .apply_min_timeout = 1, +}; + +/**************************************************************************** + * + * Packet construction + * + */ + +/** + * Construct DHCP client hardware address field and broadcast flag + * + * @v netdev Network device + * @v hlen DHCP hardware address length to fill in + * @v flags DHCP flags to fill in + * @ret chaddr DHCP client hardware address + */ +void * dhcp_chaddr ( struct net_device *netdev, uint8_t *hlen, + uint16_t *flags ) { + struct ll_protocol *ll_protocol = netdev->ll_protocol; + typeof ( ( ( struct dhcphdr * ) NULL )->chaddr ) chaddr; + + /* If the link-layer address cannot fit into the chaddr field + * (as is the case for IPoIB) then try using the hardware + * address instead. If we do this, set the broadcast flag, + * since chaddr then does not represent a valid link-layer + * address for the return path. + * + * If even the hardware address is too large, use an empty + * chaddr field and set the broadcast flag. + * + * This goes against RFC4390, but RFC4390 mandates that we use + * a DHCP client identifier that conforms with RFC4361, which + * we cannot do without either persistent (NIC-independent) + * storage, or by eliminating the hardware address completely + * from the DHCP packet, which seems unfriendly to users. + */ + if ( ( *hlen = ll_protocol->ll_addr_len ) <= sizeof ( chaddr ) ) { + return netdev->ll_addr; + } + *flags = htons ( BOOTP_FL_BROADCAST ); + if ( ( *hlen = ll_protocol->hw_addr_len ) <= sizeof ( chaddr ) ) { + return netdev->hw_addr; + } else { + *hlen = 0; + return NULL; + } +} + +/** + * Create a DHCP packet + * + * @v dhcppkt DHCP packet structure to fill in + * @v netdev Network device + * @v msgtype DHCP message type + * @v options Initial options to include (or NULL) + * @v options_len Length of initial options + * @v data Buffer for DHCP packet + * @v max_len Size of DHCP packet buffer + * @ret rc Return status code + * + * Creates a DHCP packet in the specified buffer, and initialise a + * DHCP packet structure. + */ +int dhcp_create_packet ( struct dhcp_packet *dhcppkt, + struct net_device *netdev, uint8_t msgtype, + const void *options, size_t options_len, + void *data, size_t max_len ) { + struct dhcphdr *dhcphdr = data; + void *chaddr; + int rc; + + /* Sanity check */ + if ( max_len < ( sizeof ( *dhcphdr ) + options_len ) ) + return -ENOSPC; + + /* Initialise DHCP packet content */ + memset ( dhcphdr, 0, max_len ); + dhcphdr->xid = dhcp_xid ( netdev ); + dhcphdr->magic = htonl ( DHCP_MAGIC_COOKIE ); + dhcphdr->htype = ntohs ( netdev->ll_protocol->ll_proto ); + dhcphdr->op = dhcp_op[msgtype]; + chaddr = dhcp_chaddr ( netdev, &dhcphdr->hlen, &dhcphdr->flags ); + memcpy ( dhcphdr->chaddr, chaddr, dhcphdr->hlen ); + memcpy ( dhcphdr->options, options, options_len ); + + /* Initialise DHCP packet structure */ + memset ( dhcppkt, 0, sizeof ( *dhcppkt ) ); + dhcppkt_init ( dhcppkt, data, max_len ); + + /* Set DHCP_MESSAGE_TYPE option */ + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_MESSAGE_TYPE, + &msgtype, sizeof ( msgtype ) ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Create DHCP request packet + * + * @v dhcppkt DHCP packet structure to fill in + * @v netdev Network device + * @v msgtype DHCP message type + * @v ciaddr Client IP address + * @v data Buffer for DHCP packet + * @v max_len Size of DHCP packet buffer + * @ret rc Return status code + * + * Creates a DHCP request packet in the specified buffer, and + * initialise a DHCP packet structure. + */ +int dhcp_create_request ( struct dhcp_packet *dhcppkt, + struct net_device *netdev, unsigned int msgtype, + struct in_addr ciaddr, void *data, size_t max_len ) { + struct device_description *desc = &netdev->dev->desc; + struct dhcp_netdev_desc dhcp_desc; + struct dhcp_client_id client_id; + struct dhcp_client_uuid client_uuid; + uint8_t *dhcp_features; + size_t dhcp_features_len; + size_t ll_addr_len; + ssize_t len; + int rc; + + /* Create DHCP packet */ + if ( ( rc = dhcp_create_packet ( dhcppkt, netdev, msgtype, + dhcp_request_options_data, + sizeof ( dhcp_request_options_data ), + data, max_len ) ) != 0 ) { + DBG ( "DHCP could not create DHCP packet: %s\n", + strerror ( rc ) ); + return rc; + } + + /* Set client IP address */ + dhcppkt->dhcphdr->ciaddr = ciaddr; + + /* Add options to identify the feature list */ +#if 0 + dhcp_features = table_start ( DHCP_FEATURES ); + dhcp_features_len = table_num_entries ( DHCP_FEATURES ); + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_EB_ENCAP, dhcp_features, + dhcp_features_len ) ) != 0 ) { + DBG ( "DHCP could not set features list option: %s\n", + strerror ( rc ) ); + return rc; + } +#endif + + /* Add options to identify the network device */ + dhcp_desc.type = desc->bus_type; + dhcp_desc.vendor = htons ( desc->vendor ); + dhcp_desc.device = htons ( desc->device ); + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_EB_BUS_ID, &dhcp_desc, + sizeof ( dhcp_desc ) ) ) != 0 ) { + DBG ( "DHCP could not set bus ID option: %s\n", + strerror ( rc ) ); + return rc; + } + + /* Add DHCP client identifier. Required for Infiniband, and + * doesn't hurt other link layers. + */ + client_id.ll_proto = ntohs ( netdev->ll_protocol->ll_proto ); + ll_addr_len = netdev->ll_protocol->ll_addr_len; + assert ( ll_addr_len <= sizeof ( client_id.ll_addr ) ); + memcpy ( client_id.ll_addr, netdev->ll_addr, ll_addr_len ); + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_CLIENT_ID, &client_id, + ( ll_addr_len + 1 ) ) ) != 0 ) { + DBG ( "DHCP could not set client ID: %s\n", + strerror ( rc ) ); + return rc; + } + +#if 0 + /* Add client UUID, if we have one. Required for PXE. */ + client_uuid.type = DHCP_CLIENT_UUID_TYPE; + if ( ( len = fetch_uuid_setting ( NULL, &uuid_setting, + &client_uuid.uuid ) ) >= 0 ) { + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_CLIENT_UUID, + &client_uuid, + sizeof ( client_uuid ) ) ) != 0 ) { + DBG ( "DHCP could not set client UUID: %s\n", + strerror ( rc ) ); + return rc; + } + } +#endif + + /* Add user class, if we have one. */ + if ( ( len = fetch_setting_len ( NULL, &user_class_setting ) ) >= 0 ) { + char user_class[len]; + fetch_setting ( NULL, &user_class_setting, user_class, + sizeof ( user_class ) ); + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_USER_CLASS_ID, + &user_class, + sizeof ( user_class ) ) ) != 0 ) { + DBG ( "DHCP could not set user class: %s\n", + strerror ( rc ) ); + return rc; + } + } + + return 0; +} + +/**************************************************************************** + * + * Data transfer interface + * + */ + +/** + * Transmit DHCP request + * + * @v dhcp DHCP session + * @ret rc Return status code + */ +static int dhcp_tx ( struct dhcp_session *dhcp ) { + static struct sockaddr_in peer = { + .sin_family = AF_INET, + }; + struct xfer_metadata meta = { + .netdev = dhcp->netdev, + .src = ( struct sockaddr * ) &dhcp->local, + .dest = ( struct sockaddr * ) &peer, + }; + struct io_buffer *iobuf; + uint8_t msgtype = dhcp->state->tx_msgtype; + struct dhcp_packet dhcppkt; + int rc; + + /* Start retry timer. Do this first so that failures to + * transmit will be retried. + */ + start_timer ( &dhcp->timer ); + + /* Allocate buffer for packet */ + iobuf = xfer_alloc_iob ( &dhcp->xfer, DHCP_MIN_LEN ); + if ( ! iobuf ) + return -ENOMEM; + + /* Create basic DHCP packet in temporary buffer */ + if ( ( rc = dhcp_create_request ( &dhcppkt, dhcp->netdev, msgtype, + dhcp->local.sin_addr, iobuf->data, + iob_tailroom ( iobuf ) ) ) != 0 ) { + DBGC ( dhcp, "DHCP %p could not construct DHCP request: %s\n", + dhcp, strerror ( rc ) ); + goto done; + } + + /* Fill in packet based on current state */ + if ( ( rc = dhcp->state->tx ( dhcp, &dhcppkt, &peer ) ) != 0 ) { + DBGC ( dhcp, "DHCP %p could not fill DHCP request: %s\n", + dhcp, strerror ( rc ) ); + goto done; + } + + /* Transmit the packet */ + iob_put ( iobuf, dhcppkt.len ); + if ( ( rc = xfer_deliver_iob_meta ( &dhcp->xfer, iob_disown ( iobuf ), + &meta ) ) != 0 ) { + DBGC ( dhcp, "DHCP %p could not transmit UDP packet: %s\n", + dhcp, strerror ( rc ) ); + goto done; + } + + done: + free_iob ( iobuf ); + return rc; +} + +/** + * Receive new data + * + * @v xfer Data transfer interface + * @v iobuf I/O buffer + * @v meta Transfer metadata + * @ret rc Return status code + */ +static int dhcp_deliver_iob ( struct xfer_interface *xfer, + struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + struct dhcp_session *dhcp = + container_of ( xfer, struct dhcp_session, xfer ); + struct sockaddr_in *peer; + size_t data_len; + struct dhcp_packet *dhcppkt; + struct dhcphdr *dhcphdr; + uint8_t msgtype = 0; + struct in_addr server_id = { 0 }; + int rc = 0; + + /* Sanity checks */ + if ( ! meta->src ) { + DBGC ( dhcp, "DHCP %p received packet without source port\n", + dhcp ); + rc = -EINVAL; + goto err_no_src; + } + peer = ( struct sockaddr_in * ) meta->src; + + /* Create a DHCP packet containing the I/O buffer contents. + * Whilst we could just use the original buffer in situ, that + * would waste the unused space in the packet buffer, and also + * waste a relatively scarce fully-aligned I/O buffer. + */ + data_len = iob_len ( iobuf ); + dhcppkt = zalloc ( sizeof ( *dhcppkt ) + data_len ); + if ( ! dhcppkt ) { + rc = -ENOMEM; + goto err_alloc_dhcppkt; + } + dhcphdr = ( ( ( void * ) dhcppkt ) + sizeof ( *dhcppkt ) ); + memcpy ( dhcphdr, iobuf->data, data_len ); + dhcppkt_init ( dhcppkt, dhcphdr, data_len ); + + /* Identify message type */ + dhcppkt_fetch ( dhcppkt, DHCP_MESSAGE_TYPE, &msgtype, + sizeof ( msgtype ) ); + + /* Identify server ID */ + dhcppkt_fetch ( dhcppkt, DHCP_SERVER_IDENTIFIER, + &server_id, sizeof ( server_id ) ); + + /* Check for matching transaction ID */ + if ( dhcphdr->xid != dhcp_xid ( dhcp->netdev ) ) { + DBGC ( dhcp, "DHCP %p %s from %s:%d has bad transaction " + "ID\n", dhcp, dhcp_msgtype_name ( msgtype ), + inet_ntoa ( peer->sin_addr ), + ntohs ( peer->sin_port ) ); + rc = -EINVAL; + goto err_xid; + }; + + /* Handle packet based on current state */ + dhcp->state->rx ( dhcp, dhcppkt, peer, msgtype, server_id ); + + err_xid: + dhcppkt_put ( dhcppkt ); + err_alloc_dhcppkt: + err_no_src: + free_iob ( iobuf ); + return rc; +} + +/** DHCP data transfer interface operations */ +static struct xfer_interface_operations dhcp_xfer_operations = { + .close = ignore_xfer_close, + .vredirect = xfer_vreopen, + .window = unlimited_xfer_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = dhcp_deliver_iob, + .deliver_raw = xfer_deliver_as_iob, +}; + +/** + * Handle DHCP retry timer expiry + * + * @v timer DHCP retry timer + * @v fail Failure indicator + */ +static void dhcp_timer_expired ( struct retry_timer *timer, int fail ) { + struct dhcp_session *dhcp = + container_of ( timer, struct dhcp_session, timer ); + + /* If we have failed, terminate DHCP */ + if ( fail ) { + dhcp_finished ( dhcp, -ETIMEDOUT ); + return; + } + + /* Handle timer expiry based on current state */ + dhcp->state->expired ( dhcp ); +} + +/**************************************************************************** + * + * Job control interface + * + */ + +/** + * Handle kill() event received via job control interface + * + * @v job DHCP job control interface + */ +static void dhcp_job_kill ( struct job_interface *job ) { + struct dhcp_session *dhcp = + container_of ( job, struct dhcp_session, job ); + + /* Terminate DHCP session */ + dhcp_finished ( dhcp, -ECANCELED ); +} + +/** DHCP job control interface operations */ +static struct job_interface_operations dhcp_job_operations = { + .done = ignore_job_done, + .kill = dhcp_job_kill, + .progress = ignore_job_progress, +}; + +/**************************************************************************** + * + * Instantiators + * + */ + +/** + * DHCP peer address for socket opening + * + * This is a dummy address; the only useful portion is the socket + * family (so that we get a UDP connection). The DHCP client will set + * the IP address and source port explicitly on each transmission. + */ +static struct sockaddr dhcp_peer = { + .sa_family = AF_INET, +}; + +/** + * Start DHCP state machine on a network device + * + * @v job Job control interface + * @v netdev Network device + * @ret rc Return status code + * + * Starts DHCP on the specified network device. If successful, the + * DHCPACK (and ProxyDHCPACK, if applicable) will be registered as + * option sources. + */ +int start_dhcp ( struct job_interface *job, struct net_device *netdev ) { + struct dhcp_session *dhcp; + int rc; + + /* Allocate and initialise structure */ + dhcp = zalloc ( sizeof ( *dhcp ) ); + if ( ! dhcp ) + return -ENOMEM; + dhcp->refcnt.free = dhcp_free; + job_init ( &dhcp->job, &dhcp_job_operations, &dhcp->refcnt ); + xfer_init ( &dhcp->xfer, &dhcp_xfer_operations, &dhcp->refcnt ); + dhcp->netdev = netdev_get ( netdev ); + dhcp->local.sin_family = AF_INET; + dhcp->local.sin_port = htons ( BOOTPC_PORT ); + dhcp->timer.expired = dhcp_timer_expired; + + /* Instantiate child objects and attach to our interfaces */ + if ( ( rc = xfer_open_socket ( &dhcp->xfer, SOCK_DGRAM, &dhcp_peer, + ( struct sockaddr * ) &dhcp->local ) ) != 0 ) + goto err; + + /* Enter DHCPDISCOVER state */ + dhcp_set_state ( dhcp, &dhcp_state_discover ); + + /* Attach parent interface, mortalise self, and return */ + job_plug_plug ( &dhcp->job, job ); + ref_put ( &dhcp->refcnt ); + return 0; + + err: + dhcp_finished ( dhcp, rc ); + ref_put ( &dhcp->refcnt ); + return rc; +} + +/** + * Retrieve list of PXE boot servers for a given server type + * + * @v dhcp DHCP session + * @v raw DHCP PXE boot server list + * @v raw_len Length of DHCP PXE boot server list + * @v ip IP address list to fill in + * + * The caller must ensure that the IP address list has sufficient + * space. + */ +static void pxebs_list ( struct dhcp_session *dhcp, void *raw, + size_t raw_len, struct in_addr *ip ) { + struct dhcp_pxe_boot_server *server = raw; + size_t server_len; + unsigned int i; + + while ( raw_len ) { + if ( raw_len < sizeof ( *server ) ) { + DBGC ( dhcp, "DHCP %p malformed PXE server list\n", + dhcp ); + break; + } + server_len = offsetof ( typeof ( *server ), + ip[ server->num_ip ] ); + if ( raw_len < server_len ) { + DBGC ( dhcp, "DHCP %p malformed PXE server list\n", + dhcp ); + break; + } + if ( server->type == dhcp->pxe_type ) { + for ( i = 0 ; i < server->num_ip ; i++ ) + *(ip++) = server->ip[i]; + } + server = ( ( ( void * ) server ) + server_len ); + raw_len -= server_len; + } +} + +/** + * Start PXE Boot Server Discovery on a network device + * + * @v job Job control interface + * @v netdev Network device + * @v pxe_type PXE server type + * @ret rc Return status code + * + * Starts PXE Boot Server Discovery on the specified network device. + * If successful, the Boot Server ACK will be registered as an option + * source. + */ +int start_pxebs ( struct job_interface *job, struct net_device *netdev, + unsigned int pxe_type ) { + struct setting pxe_discovery_control_setting = + { .tag = DHCP_PXE_DISCOVERY_CONTROL }; + struct setting pxe_boot_servers_setting = + { .tag = DHCP_PXE_BOOT_SERVERS }; + struct setting pxe_boot_server_mcast_setting = + { .tag = DHCP_PXE_BOOT_SERVER_MCAST }; + ssize_t pxebs_list_len; + struct dhcp_session *dhcp; + struct in_addr *ip; + unsigned int pxe_discovery_control; + int rc; + + /* Get upper bound for PXE boot server IP address list */ + pxebs_list_len = fetch_setting_len ( NULL, &pxe_boot_servers_setting ); + if ( pxebs_list_len < 0 ) + pxebs_list_len = 0; + + /* Allocate and initialise structure */ + dhcp = zalloc ( sizeof ( *dhcp ) + sizeof ( *ip ) /* mcast */ + + sizeof ( *ip ) /* bcast */ + pxebs_list_len + + sizeof ( *ip ) /* terminator */ ); + if ( ! dhcp ) + return -ENOMEM; + dhcp->refcnt.free = dhcp_free; + job_init ( &dhcp->job, &dhcp_job_operations, &dhcp->refcnt ); + xfer_init ( &dhcp->xfer, &dhcp_xfer_operations, &dhcp->refcnt ); + dhcp->netdev = netdev_get ( netdev ); + dhcp->local.sin_family = AF_INET; + fetch_ipv4_setting ( netdev_settings ( netdev ), &ip_setting, + &dhcp->local.sin_addr ); + dhcp->local.sin_port = htons ( BOOTPC_PORT ); + dhcp->pxe_type = htons ( pxe_type ); + dhcp->timer.expired = dhcp_timer_expired; + + /* Construct PXE boot server IP address lists */ + pxe_discovery_control = + fetch_uintz_setting ( NULL, &pxe_discovery_control_setting ); + ip = ( ( ( void * ) dhcp ) + sizeof ( *dhcp ) ); + dhcp->pxe_attempt = ip; + if ( ! ( pxe_discovery_control & PXEBS_NO_MULTICAST ) ) { + fetch_ipv4_setting ( NULL, &pxe_boot_server_mcast_setting, ip); + if ( ip->s_addr ) + ip++; + } + if ( ! ( pxe_discovery_control & PXEBS_NO_BROADCAST ) ) + (ip++)->s_addr = INADDR_BROADCAST; + if ( pxe_discovery_control & PXEBS_NO_UNKNOWN_SERVERS ) + dhcp->pxe_accept = ip; + if ( pxebs_list_len ) { + uint8_t buf[pxebs_list_len]; + + fetch_setting ( NULL, &pxe_boot_servers_setting, + buf, sizeof ( buf ) ); + pxebs_list ( dhcp, buf, sizeof ( buf ), ip ); + } + if ( ! dhcp->pxe_attempt->s_addr ) { + DBGC ( dhcp, "DHCP %p has no PXE boot servers for type %04x\n", + dhcp, pxe_type ); + rc = -EINVAL; + goto err; + } + + /* Dump out PXE server lists */ + DBGC ( dhcp, "DHCP %p attempting", dhcp ); + for ( ip = dhcp->pxe_attempt ; ip->s_addr ; ip++ ) + DBGC ( dhcp, " %s", inet_ntoa ( *ip ) ); + DBGC ( dhcp, "\n" ); + if ( dhcp->pxe_accept ) { + DBGC ( dhcp, "DHCP %p accepting", dhcp ); + for ( ip = dhcp->pxe_accept ; ip->s_addr ; ip++ ) + DBGC ( dhcp, " %s", inet_ntoa ( *ip ) ); + DBGC ( dhcp, "\n" ); + } + + /* Instantiate child objects and attach to our interfaces */ + if ( ( rc = xfer_open_socket ( &dhcp->xfer, SOCK_DGRAM, &dhcp_peer, + ( struct sockaddr * ) &dhcp->local ) ) != 0 ) + goto err; + + /* Enter PXEBS state */ + dhcp_set_state ( dhcp, &dhcp_state_pxebs ); + + /* Attach parent interface, mortalise self, and return */ + job_plug_plug ( &dhcp->job, job ); + ref_put ( &dhcp->refcnt ); + return 0; + + err: + dhcp_finished ( dhcp, rc ); + ref_put ( &dhcp->refcnt ); + return rc; +} diff --git a/debian/grub-extras/disabled/gpxe/src/net/udp/dns.c b/debian/grub-extras/disabled/gpxe/src/net/udp/dns.c new file mode 100644 index 0000000..3bb6829 --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/udp/dns.c @@ -0,0 +1,602 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * Portions copyright (C) 2004 Anselm M. Hoffmeister + * <stockholm@users.sourceforge.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <byteswap.h> +#include <gpxe/refcnt.h> +#include <gpxe/xfer.h> +#include <gpxe/open.h> +#include <gpxe/resolv.h> +#include <gpxe/retry.h> +#include <gpxe/tcpip.h> +#include <gpxe/settings.h> +#include <gpxe/features.h> +#include <gpxe/dns.h> + +/** @file + * + * DNS protocol + * + */ + +FEATURE ( FEATURE_PROTOCOL, "DNS", DHCP_EB_FEATURE_DNS, 1 ); + +/** The DNS server */ +static struct sockaddr_tcpip nameserver = { + .st_port = htons ( DNS_PORT ), +}; + +/** The local domain */ +static char *localdomain; + +/** A DNS request */ +struct dns_request { + /** Reference counter */ + struct refcnt refcnt; + /** Name resolution interface */ + struct resolv_interface resolv; + /** Data transfer interface */ + struct xfer_interface socket; + /** Retry timer */ + struct retry_timer timer; + + /** Socket address to fill in with resolved address */ + struct sockaddr sa; + /** Current query packet */ + struct dns_query query; + /** Location of query info structure within current packet + * + * The query info structure is located immediately after the + * compressed name. + */ + struct dns_query_info *qinfo; + /** Recursion counter */ + unsigned int recursion; +}; + +/** + * Mark DNS request as complete + * + * @v dns DNS request + * @v rc Return status code + */ +static void dns_done ( struct dns_request *dns, int rc ) { + + /* Stop the retry timer */ + stop_timer ( &dns->timer ); + + /* Close data transfer interface */ + xfer_nullify ( &dns->socket ); + xfer_close ( &dns->socket, rc ); + + /* Mark name resolution as complete */ + resolv_done ( &dns->resolv, &dns->sa, rc ); +} + +/** + * Compare DNS reply name against the query name from the original request + * + * @v dns DNS request + * @v reply DNS reply + * @v rname Reply name + * @ret zero Names match + * @ret non-zero Names do not match + */ +static int dns_name_cmp ( struct dns_request *dns, + const struct dns_header *reply, + const char *rname ) { + const char *qname = dns->query.payload; + int i; + + while ( 1 ) { + /* Obtain next section of rname */ + while ( ( *rname ) & 0xc0 ) { + rname = ( ( ( char * ) reply ) + + ( ntohs( *((uint16_t *)rname) ) & ~0xc000 )); + } + /* Check that lengths match */ + if ( *rname != *qname ) + return -1; + /* If length is zero, we have reached the end */ + if ( ! *qname ) + return 0; + /* Check that data matches */ + for ( i = *qname + 1; i > 0 ; i-- ) { + if ( *(rname++) != *(qname++) ) + return -1; + } + } +} + +/** + * Skip over a (possibly compressed) DNS name + * + * @v name DNS name + * @ret name Next DNS name + */ +static const char * dns_skip_name ( const char *name ) { + while ( 1 ) { + if ( ! *name ) { + /* End of name */ + return ( name + 1); + } + if ( *name & 0xc0 ) { + /* Start of a compressed name */ + return ( name + 2 ); + } + /* Uncompressed name portion */ + name += *name + 1; + } +} + +/** + * Find an RR in a reply packet corresponding to our query + * + * @v dns DNS request + * @v reply DNS reply + * @ret rr DNS RR, or NULL if not found + */ +static union dns_rr_info * dns_find_rr ( struct dns_request *dns, + const struct dns_header *reply ) { + int i, cmp; + const char *p = ( ( char * ) reply ) + sizeof ( struct dns_header ); + union dns_rr_info *rr_info; + + /* Skip over the questions section */ + for ( i = ntohs ( reply->qdcount ) ; i > 0 ; i-- ) { + p = dns_skip_name ( p ) + sizeof ( struct dns_query_info ); + } + + /* Process the answers section */ + for ( i = ntohs ( reply->ancount ) ; i > 0 ; i-- ) { + cmp = dns_name_cmp ( dns, reply, p ); + p = dns_skip_name ( p ); + rr_info = ( ( union dns_rr_info * ) p ); + if ( cmp == 0 ) + return rr_info; + p += ( sizeof ( rr_info->common ) + + ntohs ( rr_info->common.rdlength ) ); + } + + return NULL; +} + +/** + * Append DHCP domain name if available and name is not fully qualified + * + * @v string Name as a NUL-terminated string + * @ret fqdn Fully-qualified domain name, malloc'd copy + * + * The caller must free fqdn which is allocated even if the name is already + * fully qualified. + */ +static char * dns_qualify_name ( const char *string ) { + char *fqdn; + + /* Leave unchanged if already fully-qualified or no local domain */ + if ( ( ! localdomain ) || ( strchr ( string, '.' ) != 0 ) ) + return strdup ( string ); + + /* Append local domain to name */ + return grub_xasprintf ( "%s.%s", string, localdomain ); +} + +/** + * Convert a standard NUL-terminated string to a DNS name + * + * @v string Name as a NUL-terminated string + * @v buf Buffer in which to place DNS name + * @ret next Byte following constructed DNS name + * + * DNS names consist of "<length>element" pairs. + */ +static char * dns_make_name ( const char *string, char *buf ) { + char *length_byte = buf++; + char c; + + while ( ( c = *(string++) ) ) { + if ( c == '.' ) { + *length_byte = buf - length_byte - 1; + length_byte = buf; + } + *(buf++) = c; + } + *length_byte = buf - length_byte - 1; + *(buf++) = '\0'; + return buf; +} + +/** + * Convert an uncompressed DNS name to a NUL-terminated string + * + * @v name DNS name + * @ret string NUL-terminated string + * + * Produce a printable version of a DNS name. Used only for debugging. + */ +static inline char * dns_unmake_name ( char *name ) { + char *p; + unsigned int len; + + p = name; + while ( ( len = *p ) ) { + *(p++) = '.'; + p += len; + } + + return name + 1; +} + +/** + * Decompress a DNS name + * + * @v reply DNS replay + * @v name DNS name + * @v buf Buffer into which to decompress DNS name + * @ret next Byte following decompressed DNS name + */ +static char * dns_decompress_name ( const struct dns_header *reply, + const char *name, char *buf ) { + int i, len; + + do { + /* Obtain next section of name */ + while ( ( *name ) & 0xc0 ) { + name = ( ( char * ) reply + + ( ntohs ( *((uint16_t *)name) ) & ~0xc000 ) ); + } + /* Copy data */ + len = *name; + for ( i = len + 1 ; i > 0 ; i-- ) { + *(buf++) = *(name++); + } + } while ( len ); + return buf; +} + +/** + * Send next packet in DNS request + * + * @v dns DNS request + */ +static int dns_send_packet ( struct dns_request *dns ) { + static unsigned int qid = 0; + size_t qlen; + + /* Increment query ID */ + dns->query.dns.id = htons ( ++qid ); + + DBGC ( dns, "DNS %p sending query ID %d\n", dns, qid ); + + /* Start retransmission timer */ + start_timer ( &dns->timer ); + + /* Send the data */ + qlen = ( ( ( void * ) dns->qinfo ) - ( ( void * ) &dns->query ) + + sizeof ( dns->qinfo ) ); + return xfer_deliver_raw ( &dns->socket, &dns->query, qlen ); +} + +/** + * Handle DNS retransmission timer expiry + * + * @v timer Retry timer + * @v fail Failure indicator + */ +static void dns_timer_expired ( struct retry_timer *timer, int fail ) { + struct dns_request *dns = + container_of ( timer, struct dns_request, timer ); + + if ( fail ) { + dns_done ( dns, -ETIMEDOUT ); + } else { + dns_send_packet ( dns ); + } +} + +/** + * Receive new data + * + * @v socket UDP socket + * @v data DNS reply + * @v len Length of DNS reply + * @ret rc Return status code + */ +static int dns_xfer_deliver_raw ( struct xfer_interface *socket, + const void *data, size_t len ) { + struct dns_request *dns = + container_of ( socket, struct dns_request, socket ); + const struct dns_header *reply = data; + union dns_rr_info *rr_info; + struct sockaddr_in *sin; + unsigned int qtype = dns->qinfo->qtype; + + /* Sanity check */ + if ( len < sizeof ( *reply ) ) { + DBGC ( dns, "DNS %p received underlength packet length %zd\n", + dns, len ); + return -EINVAL; + } + + /* Check reply ID matches query ID */ + if ( reply->id != dns->query.dns.id ) { + DBGC ( dns, "DNS %p received unexpected reply ID %d " + "(wanted %d)\n", dns, ntohs ( reply->id ), + ntohs ( dns->query.dns.id ) ); + return -EINVAL; + } + + DBGC ( dns, "DNS %p received reply ID %d\n", dns, ntohs ( reply->id )); + + /* Stop the retry timer. After this point, each code path + * must either restart the timer by calling dns_send_packet(), + * or mark the DNS operation as complete by calling + * dns_done() + */ + stop_timer ( &dns->timer ); + + /* Search through response for useful answers. Do this + * multiple times, to take advantage of useful nameservers + * which send us e.g. the CNAME *and* the A record for the + * pointed-to name. + */ + while ( ( rr_info = dns_find_rr ( dns, reply ) ) ) { + switch ( rr_info->common.type ) { + + case htons ( DNS_TYPE_A ): + + /* Found the target A record */ + DBGC ( dns, "DNS %p found address %s\n", + dns, inet_ntoa ( rr_info->a.in_addr ) ); + sin = ( struct sockaddr_in * ) &dns->sa; + sin->sin_family = AF_INET; + sin->sin_addr = rr_info->a.in_addr; + + /* Mark operation as complete */ + dns_done ( dns, 0 ); + return 0; + + case htons ( DNS_TYPE_CNAME ): + + /* Found a CNAME record; update query and recurse */ + DBGC ( dns, "DNS %p found CNAME\n", dns ); + dns->qinfo = ( void * ) dns_decompress_name ( reply, + rr_info->cname.cname, + dns->query.payload ); + dns->qinfo->qtype = htons ( DNS_TYPE_A ); + dns->qinfo->qclass = htons ( DNS_CLASS_IN ); + + /* Terminate the operation if we recurse too far */ + if ( ++dns->recursion > DNS_MAX_CNAME_RECURSION ) { + DBGC ( dns, "DNS %p recursion exceeded\n", + dns ); + dns_done ( dns, -ELOOP ); + return 0; + } + break; + + default: + DBGC ( dns, "DNS %p got unknown record type %d\n", + dns, ntohs ( rr_info->common.type ) ); + break; + } + } + + /* Determine what to do next based on the type of query we + * issued and the reponse we received + */ + switch ( qtype ) { + + case htons ( DNS_TYPE_A ): + /* We asked for an A record and got nothing; + * try the CNAME. + */ + DBGC ( dns, "DNS %p found no A record; trying CNAME\n", dns ); + dns->qinfo->qtype = htons ( DNS_TYPE_CNAME ); + dns_send_packet ( dns ); + return 0; + + case htons ( DNS_TYPE_CNAME ): + /* We asked for a CNAME record. If we got a response + * (i.e. if the next A query is already set up), then + * issue it, otherwise abort. + */ + if ( dns->qinfo->qtype == htons ( DNS_TYPE_A ) ) { + dns_send_packet ( dns ); + return 0; + } else { + DBGC ( dns, "DNS %p found no CNAME record\n", dns ); + dns_done ( dns, -ENXIO ); + return 0; + } + + default: + assert ( 0 ); + dns_done ( dns, -EINVAL ); + return 0; + } +} + +/** + * Receive new data + * + * @v socket UDP socket + * @v rc Reason for close + */ +static void dns_xfer_close ( struct xfer_interface *socket, int rc ) { + struct dns_request *dns = + container_of ( socket, struct dns_request, socket ); + + if ( ! rc ) + rc = -ECONNABORTED; + + dns_done ( dns, rc ); +} + +/** DNS socket operations */ +static struct xfer_interface_operations dns_socket_operations = { + .close = dns_xfer_close, + .vredirect = xfer_vreopen, + .window = unlimited_xfer_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = xfer_deliver_as_raw, + .deliver_raw = dns_xfer_deliver_raw, +}; + +/** + * Resolve name using DNS + * + * @v resolv Name resolution interface + * @v name Name to resolve + * @v sa Socket address to fill in + * @ret rc Return status code + */ +static int dns_resolv ( struct resolv_interface *resolv, + const char *name, struct sockaddr *sa ) { + struct dns_request *dns; + char *fqdn; + int rc; + + /* Fail immediately if no DNS servers */ + if ( ! nameserver.st_family ) { + DBG ( "DNS not attempting to resolve \"%s\": " + "no DNS servers\n", name ); + rc = -ENXIO; + goto err_no_nameserver; + } + + /* Ensure fully-qualified domain name if DHCP option was given */ + fqdn = dns_qualify_name ( name ); + if ( ! fqdn ) { + rc = -ENOMEM; + goto err_qualify_name; + } + + /* Allocate DNS structure */ + dns = zalloc ( sizeof ( *dns ) ); + if ( ! dns ) { + rc = -ENOMEM; + goto err_alloc_dns; + } + resolv_init ( &dns->resolv, &null_resolv_ops, &dns->refcnt ); + xfer_init ( &dns->socket, &dns_socket_operations, &dns->refcnt ); + dns->timer.expired = dns_timer_expired; + memcpy ( &dns->sa, sa, sizeof ( dns->sa ) ); + + /* Create query */ + dns->query.dns.flags = htons ( DNS_FLAG_QUERY | DNS_FLAG_OPCODE_QUERY | + DNS_FLAG_RD ); + dns->query.dns.qdcount = htons ( 1 ); + dns->qinfo = ( void * ) dns_make_name ( fqdn, dns->query.payload ); + dns->qinfo->qtype = htons ( DNS_TYPE_A ); + dns->qinfo->qclass = htons ( DNS_CLASS_IN ); + + /* Open UDP connection */ + if ( ( rc = xfer_open_socket ( &dns->socket, SOCK_DGRAM, + ( struct sockaddr * ) &nameserver, + NULL ) ) != 0 ) { + DBGC ( dns, "DNS %p could not open socket: %s\n", + dns, strerror ( rc ) ); + goto err_open_socket; + } + + /* Send first DNS packet */ + dns_send_packet ( dns ); + + /* Attach parent interface, mortalise self, and return */ + resolv_plug_plug ( &dns->resolv, resolv ); + ref_put ( &dns->refcnt ); + free ( fqdn ); + return 0; + + err_open_socket: + err_alloc_dns: + ref_put ( &dns->refcnt ); + err_qualify_name: + free ( fqdn ); + err_no_nameserver: + return rc; +} + +/** DNS name resolver */ +struct resolver dns_resolver __resolver ( RESOLV_NORMAL ) = { + .name = "DNS", + .resolv = dns_resolv, +}; + +/****************************************************************************** + * + * Settings + * + ****************************************************************************** + */ + +/** DNS server setting */ +struct setting dns_setting __setting = { + .name = "dns", + .description = "DNS server", + .tag = DHCP_DNS_SERVERS, + .type = &setting_type_ipv4, +}; + +/** Domain name setting */ +struct setting domain_setting __setting = { + .name = "domain", + .description = "Local domain", + .tag = DHCP_DOMAIN_NAME, + .type = &setting_type_string, +}; + +/** + * Apply DNS settings + * + * @ret rc Return status code + */ +static int apply_dns_settings ( void ) { + struct sockaddr_in *sin_nameserver = + ( struct sockaddr_in * ) &nameserver; + int len; + + if ( ( len = fetch_ipv4_setting ( NULL, &dns_setting, + &sin_nameserver->sin_addr ) ) >= 0 ){ + sin_nameserver->sin_family = AF_INET; + DBG ( "DNS using nameserver %s\n", + inet_ntoa ( sin_nameserver->sin_addr ) ); + } + + /* Get local domain DHCP option */ + if ( ( len = fetch_string_setting_copy ( NULL, &domain_setting, + &localdomain ) ) >= 0 ) + DBG ( "DNS local domain %s\n", localdomain ); + + return 0; +} + +/** DNS settings applicator */ +struct settings_applicator dns_applicator __settings_applicator = { + .apply = apply_dns_settings, +}; diff --git a/debian/grub-extras/disabled/gpxe/src/net/udp/slam.c b/debian/grub-extras/disabled/gpxe/src/net/udp/slam.c new file mode 100644 index 0000000..396f69b --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/udp/slam.c @@ -0,0 +1,812 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <errno.h> +#include <assert.h> +#include <byteswap.h> +#include <gpxe/features.h> +#include <gpxe/iobuf.h> +#include <gpxe/bitmap.h> +#include <gpxe/xfer.h> +#include <gpxe/open.h> +#include <gpxe/uri.h> +#include <gpxe/tcpip.h> +#include <gpxe/timer.h> +#include <gpxe/retry.h> + +/** @file + * + * Scalable Local Area Multicast protocol + * + * The SLAM protocol is supported only by Etherboot; it was designed + * and implemented by Eric Biederman. A server implementation is + * available in contrib/mini-slamd. There does not appear to be any + * documentation beyond a few sparse comments in Etherboot's + * proto_slam.c. + * + * SLAM packets use three types of data field: + * + * Nul : A single NUL (0) byte, used as a list terminator + * + * Raw : A block of raw data + * + * Int : A variable-length integer, in big-endian order. The length + * of the integer is encoded in the most significant three bits. + * + * Packets received by the client have the following layout: + * + * Int : Transaction identifier. This is an opaque value. + * + * Int : Total number of bytes in the transfer. + * + * Int : Block size, in bytes. + * + * Int : Packet sequence number within the transfer (if this packet + * contains data). + * + * Raw : Packet data (if this packet contains data). + * + * Packets transmitted by the client consist of a run-length-encoded + * representation of the received-blocks bitmap, looking something + * like: + * + * Int : Number of consecutive successfully-received packets + * Int : Number of consecutive missing packets + * Int : Number of consecutive successfully-received packets + * Int : Number of consecutive missing packets + * .... + * Nul + * + */ + +FEATURE ( FEATURE_PROTOCOL, "SLAM", DHCP_EB_FEATURE_SLAM, 1 ); + +/** Default SLAM server port */ +#define SLAM_DEFAULT_PORT 10000 + +/** Default SLAM multicast IP address */ +#define SLAM_DEFAULT_MULTICAST_IP \ + ( ( 239 << 24 ) | ( 255 << 16 ) | ( 1 << 8 ) | ( 1 << 0 ) ) + +/** Default SLAM multicast port */ +#define SLAM_DEFAULT_MULTICAST_PORT 10000 + +/** Maximum SLAM header length */ +#define SLAM_MAX_HEADER_LEN ( 7 /* transaction id */ + 7 /* total_bytes */ + \ + 7 /* block_size */ ) + +/** Maximum number of blocks to request per NACK + * + * This is a policy decision equivalent to selecting a TCP window + * size. + */ +#define SLAM_MAX_BLOCKS_PER_NACK 4 + +/** Maximum SLAM NACK length + * + * We only ever send a NACK for a single range of up to @c + * SLAM_MAX_BLOCKS_PER_NACK blocks. + */ +#define SLAM_MAX_NACK_LEN ( 7 /* block */ + 7 /* #blocks */ + 1 /* NUL */ ) + +/** SLAM slave timeout */ +#define SLAM_SLAVE_TIMEOUT ( 1 * TICKS_PER_SEC ) + +/** A SLAM request */ +struct slam_request { + /** Reference counter */ + struct refcnt refcnt; + + /** Data transfer interface */ + struct xfer_interface xfer; + /** Unicast socket */ + struct xfer_interface socket; + /** Multicast socket */ + struct xfer_interface mc_socket; + + /** Master client retry timer */ + struct retry_timer master_timer; + /** Slave client retry timer */ + struct retry_timer slave_timer; + + /** Cached header */ + uint8_t header[SLAM_MAX_HEADER_LEN]; + /** Size of cached header */ + size_t header_len; + /** Total number of bytes in transfer */ + unsigned long total_bytes; + /** Transfer block size */ + unsigned long block_size; + /** Number of blocks in transfer */ + unsigned long num_blocks; + /** Block bitmap */ + struct bitmap bitmap; + /** NACK sent flag */ + int nack_sent; +}; + +/** + * Free a SLAM request + * + * @v refcnt Reference counter + */ +static void slam_free ( struct refcnt *refcnt ) { + struct slam_request *slam = + container_of ( refcnt, struct slam_request, refcnt ); + + bitmap_free ( &slam->bitmap ); + free ( slam ); +} + +/** + * Mark SLAM request as complete + * + * @v slam SLAM request + * @v rc Return status code + */ +static void slam_finished ( struct slam_request *slam, int rc ) { + static const uint8_t slam_disconnect[] = { 0 }; + + DBGC ( slam, "SLAM %p finished with status code %d (%s)\n", + slam, rc, strerror ( rc ) ); + + /* Send a disconnect message if we ever sent anything to the + * server. + */ + if ( slam->nack_sent ) { + xfer_deliver_raw ( &slam->socket, slam_disconnect, + sizeof ( slam_disconnect ) ); + } + + /* Stop the retry timers */ + stop_timer ( &slam->master_timer ); + stop_timer ( &slam->slave_timer ); + + /* Close all data transfer interfaces */ + xfer_nullify ( &slam->socket ); + xfer_close ( &slam->socket, rc ); + xfer_nullify ( &slam->mc_socket ); + xfer_close ( &slam->mc_socket, rc ); + xfer_nullify ( &slam->xfer ); + xfer_close ( &slam->xfer, rc ); +} + +/**************************************************************************** + * + * TX datapath + * + */ + +/** + * Add a variable-length value to a SLAM packet + * + * @v slam SLAM request + * @v iobuf I/O buffer + * @v value Value to add + * @ret rc Return status code + * + * Adds a variable-length value to the end of an I/O buffer. Will + * always leave at least one byte of tailroom in the I/O buffer (to + * allow space for the terminating NUL). + */ +static int slam_put_value ( struct slam_request *slam, + struct io_buffer *iobuf, unsigned long value ) { + uint8_t *data; + size_t len; + unsigned int i; + + /* Calculate variable length required to store value. Always + * leave at least one byte in the I/O buffer. + */ + len = ( ( flsl ( value ) + 10 ) / 8 ); + if ( len >= iob_tailroom ( iobuf ) ) { + DBGC2 ( slam, "SLAM %p cannot add %zd-byte value\n", + slam, len ); + return -ENOBUFS; + } + /* There is no valid way within the protocol that we can end + * up trying to push a full-sized long (i.e. without space for + * the length encoding). + */ + assert ( len <= sizeof ( value ) ); + + /* Add value */ + data = iob_put ( iobuf, len ); + for ( i = len ; i-- ; ) { + data[i] = value; + value >>= 8; + } + *data |= ( len << 5 ); + assert ( value == 0 ); + + return 0; +} + +/** + * Send SLAM NACK packet + * + * @v slam SLAM request + * @ret rc Return status code + */ +static int slam_tx_nack ( struct slam_request *slam ) { + struct io_buffer *iobuf; + unsigned long first_block; + unsigned long num_blocks; + uint8_t *nul; + int rc; + + /* Mark NACK as sent, so that we know we have to disconnect later */ + slam->nack_sent = 1; + + /* Allocate I/O buffer */ + iobuf = xfer_alloc_iob ( &slam->socket, SLAM_MAX_NACK_LEN ); + if ( ! iobuf ) { + DBGC ( slam, "SLAM %p could not allocate I/O buffer\n", + slam ); + return -ENOMEM; + } + + /* Construct NACK. We always request only a single packet; + * this allows us to force multicast-TFTP-style flow control + * on the SLAM server, which will otherwise just blast the + * data out as fast as it can. On a gigabit network, without + * RX checksumming, this would inevitably cause packet drops. + */ + first_block = bitmap_first_gap ( &slam->bitmap ); + for ( num_blocks = 1 ; ; num_blocks++ ) { + if ( num_blocks >= SLAM_MAX_BLOCKS_PER_NACK ) + break; + if ( ( first_block + num_blocks ) >= slam->num_blocks ) + break; + if ( bitmap_test ( &slam->bitmap, + ( first_block + num_blocks ) ) ) + break; + } + if ( first_block ) { + DBGCP ( slam, "SLAM %p transmitting NACK for blocks " + "%ld-%ld\n", slam, first_block, + ( first_block + num_blocks - 1 ) ); + } else { + DBGC ( slam, "SLAM %p transmitting initial NACK for blocks " + "0-%ld\n", slam, ( num_blocks - 1 ) ); + } + if ( ( rc = slam_put_value ( slam, iobuf, first_block ) ) != 0 ) + return rc; + if ( ( rc = slam_put_value ( slam, iobuf, num_blocks ) ) != 0 ) + return rc; + nul = iob_put ( iobuf, 1 ); + *nul = 0; + + /* Transmit packet */ + return xfer_deliver_iob ( &slam->socket, iobuf ); +} + +/** + * Handle SLAM master client retry timer expiry + * + * @v timer Master retry timer + * @v fail Failure indicator + */ +static void slam_master_timer_expired ( struct retry_timer *timer, + int fail ) { + struct slam_request *slam = + container_of ( timer, struct slam_request, master_timer ); + + if ( fail ) { + /* Allow timer to stop running. We will terminate the + * connection only if the slave timer times out. + */ + DBGC ( slam, "SLAM %p giving up acting as master client\n", + slam ); + } else { + /* Retransmit NACK */ + start_timer ( timer ); + slam_tx_nack ( slam ); + } +} + +/** + * Handle SLAM slave client retry timer expiry + * + * @v timer Master retry timer + * @v fail Failure indicator + */ +static void slam_slave_timer_expired ( struct retry_timer *timer, + int fail ) { + struct slam_request *slam = + container_of ( timer, struct slam_request, slave_timer ); + + if ( fail ) { + /* Terminate connection */ + slam_finished ( slam, -ETIMEDOUT ); + } else { + /* Try sending a NACK */ + DBGC ( slam, "SLAM %p trying to become master client\n", + slam ); + start_timer ( timer ); + slam_tx_nack ( slam ); + } +} + +/**************************************************************************** + * + * RX datapath + * + */ + +/** + * Read and strip a variable-length value from a SLAM packet + * + * @v slam SLAM request + * @v iobuf I/O buffer + * @v value Value to fill in, or NULL to ignore value + * @ret rc Return status code + * + * Reads a variable-length value from the start of the I/O buffer. + */ +static int slam_pull_value ( struct slam_request *slam, + struct io_buffer *iobuf, + unsigned long *value ) { + uint8_t *data; + size_t len; + + /* Sanity check */ + if ( iob_len ( iobuf ) == 0 ) { + DBGC ( slam, "SLAM %p empty value\n", slam ); + return -EINVAL; + } + + /* Read and verify length of value */ + data = iobuf->data; + len = ( *data >> 5 ); + if ( ( len == 0 ) || + ( value && ( len > sizeof ( *value ) ) ) ) { + DBGC ( slam, "SLAM %p invalid value length %zd bytes\n", + slam, len ); + return -EINVAL; + } + if ( len > iob_len ( iobuf ) ) { + DBGC ( slam, "SLAM %p value extends beyond I/O buffer\n", + slam ); + return -EINVAL; + } + + /* Read value */ + iob_pull ( iobuf, len ); + *value = ( *data & 0x1f ); + while ( --len ) { + *value <<= 8; + *value |= *(++data); + } + + return 0; +} + +/** + * Read and strip SLAM header + * + * @v slam SLAM request + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int slam_pull_header ( struct slam_request *slam, + struct io_buffer *iobuf ) { + void *header = iobuf->data; + int rc; + + /* If header matches cached header, just pull it and return */ + if ( ( slam->header_len <= iob_len ( iobuf ) ) && + ( memcmp ( slam->header, iobuf->data, slam->header_len ) == 0 )){ + iob_pull ( iobuf, slam->header_len ); + return 0; + } + + DBGC ( slam, "SLAM %p detected changed header; resetting\n", slam ); + + /* Read and strip transaction ID, total number of bytes, and + * block size. + */ + if ( ( rc = slam_pull_value ( slam, iobuf, NULL ) ) != 0 ) + return rc; + if ( ( rc = slam_pull_value ( slam, iobuf, + &slam->total_bytes ) ) != 0 ) + return rc; + if ( ( rc = slam_pull_value ( slam, iobuf, + &slam->block_size ) ) != 0 ) + return rc; + + /* Update the cached header */ + slam->header_len = ( iobuf->data - header ); + assert ( slam->header_len <= sizeof ( slam->header ) ); + memcpy ( slam->header, header, slam->header_len ); + + /* Calculate number of blocks */ + slam->num_blocks = ( ( slam->total_bytes + slam->block_size - 1 ) / + slam->block_size ); + + DBGC ( slam, "SLAM %p has total bytes %ld, block size %ld, num " + "blocks %ld\n", slam, slam->total_bytes, slam->block_size, + slam->num_blocks ); + + /* Discard and reset the bitmap */ + bitmap_free ( &slam->bitmap ); + memset ( &slam->bitmap, 0, sizeof ( slam->bitmap ) ); + + /* Allocate a new bitmap */ + if ( ( rc = bitmap_resize ( &slam->bitmap, + slam->num_blocks ) ) != 0 ) { + /* Failure to allocate a bitmap is fatal */ + DBGC ( slam, "SLAM %p could not allocate bitmap for %ld " + "blocks: %s\n", slam, slam->num_blocks, + strerror ( rc ) ); + slam_finished ( slam, rc ); + return rc; + } + + /* Notify recipient of file size */ + xfer_seek ( &slam->xfer, slam->total_bytes, SEEK_SET ); + + return 0; +} + +/** + * Receive SLAM data packet + * + * @v mc_socket SLAM multicast socket + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int slam_mc_socket_deliver ( struct xfer_interface *mc_socket, + struct io_buffer *iobuf, + struct xfer_metadata *rx_meta __unused ) { + struct slam_request *slam = + container_of ( mc_socket, struct slam_request, mc_socket ); + struct xfer_metadata meta; + unsigned long packet; + size_t len; + int rc; + + /* Stop the master client timer. Restart the slave client timer. */ + stop_timer ( &slam->master_timer ); + stop_timer ( &slam->slave_timer ); + start_timer_fixed ( &slam->slave_timer, SLAM_SLAVE_TIMEOUT ); + + /* Read and strip packet header */ + if ( ( rc = slam_pull_header ( slam, iobuf ) ) != 0 ) + goto err_discard; + + /* Read and strip packet number */ + if ( ( rc = slam_pull_value ( slam, iobuf, &packet ) ) != 0 ) + goto err_discard; + + /* Sanity check packet number */ + if ( packet >= slam->num_blocks ) { + DBGC ( slam, "SLAM %p received out-of-range packet %ld " + "(num_blocks=%ld)\n", slam, packet, slam->num_blocks ); + rc = -EINVAL; + goto err_discard; + } + + /* Sanity check length */ + len = iob_len ( iobuf ); + if ( len > slam->block_size ) { + DBGC ( slam, "SLAM %p received oversize packet of %zd bytes " + "(block_size=%ld)\n", slam, len, slam->block_size ); + rc = -EINVAL; + goto err_discard; + } + if ( ( packet != ( slam->num_blocks - 1 ) ) && + ( len < slam->block_size ) ) { + DBGC ( slam, "SLAM %p received short packet of %zd bytes " + "(block_size=%ld)\n", slam, len, slam->block_size ); + rc = -EINVAL; + goto err_discard; + } + + /* If we have already seen this packet, discard it */ + if ( bitmap_test ( &slam->bitmap, packet ) ) { + goto discard; + } + + /* Pass to recipient */ + memset ( &meta, 0, sizeof ( meta ) ); + meta.whence = SEEK_SET; + meta.offset = ( packet * slam->block_size ); + if ( ( rc = xfer_deliver_iob_meta ( &slam->xfer, iobuf, + &meta ) ) != 0 ) + goto err; + + /* Mark block as received */ + bitmap_set ( &slam->bitmap, packet ); + + /* If we have received all blocks, terminate */ + if ( bitmap_full ( &slam->bitmap ) ) + slam_finished ( slam, 0 ); + + return 0; + + err_discard: + discard: + free_iob ( iobuf ); + err: + return rc; +} + +/** + * Receive SLAM non-data packet + * + * @v socket SLAM unicast socket + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int slam_socket_deliver ( struct xfer_interface *socket, + struct io_buffer *iobuf, + struct xfer_metadata *rx_meta __unused ) { + struct slam_request *slam = + container_of ( socket, struct slam_request, socket ); + int rc; + + /* Restart the master client timer */ + stop_timer ( &slam->master_timer ); + start_timer ( &slam->master_timer ); + + /* Read and strip packet header */ + if ( ( rc = slam_pull_header ( slam, iobuf ) ) != 0 ) + goto discard; + + /* Sanity check */ + if ( iob_len ( iobuf ) != 0 ) { + DBGC ( slam, "SLAM %p received trailing garbage:\n", slam ); + DBGC_HD ( slam, iobuf->data, iob_len ( iobuf ) ); + rc = -EINVAL; + goto discard; + } + + /* Discard packet */ + free_iob ( iobuf ); + + /* Send NACK in reply */ + slam_tx_nack ( slam ); + + return 0; + + discard: + free_iob ( iobuf ); + return rc; + +} + +/** + * Close SLAM unicast socket + * + * @v socket SLAM unicast socket + * @v rc Reason for close + */ +static void slam_socket_close ( struct xfer_interface *socket, int rc ) { + struct slam_request *slam = + container_of ( socket, struct slam_request, socket ); + + DBGC ( slam, "SLAM %p unicast socket closed: %s\n", + slam, strerror ( rc ) ); + + slam_finished ( slam, rc ); +} + +/** SLAM unicast socket data transfer operations */ +static struct xfer_interface_operations slam_socket_operations = { + .close = slam_socket_close, + .vredirect = xfer_vreopen, + .window = unlimited_xfer_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = slam_socket_deliver, + .deliver_raw = xfer_deliver_as_iob, +}; + +/** + * Close SLAM multicast socket + * + * @v mc_socket SLAM multicast socket + * @v rc Reason for close + */ +static void slam_mc_socket_close ( struct xfer_interface *mc_socket, int rc ){ + struct slam_request *slam = + container_of ( mc_socket, struct slam_request, mc_socket ); + + DBGC ( slam, "SLAM %p multicast socket closed: %s\n", + slam, strerror ( rc ) ); + + slam_finished ( slam, rc ); +} + +/** SLAM multicast socket data transfer operations */ +static struct xfer_interface_operations slam_mc_socket_operations = { + .close = slam_mc_socket_close, + .vredirect = xfer_vreopen, + .window = unlimited_xfer_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = slam_mc_socket_deliver, + .deliver_raw = xfer_deliver_as_iob, +}; + +/**************************************************************************** + * + * Data transfer interface + * + */ + +/** + * Close SLAM data transfer interface + * + * @v xfer SLAM data transfer interface + * @v rc Reason for close + */ +static void slam_xfer_close ( struct xfer_interface *xfer, int rc ) { + struct slam_request *slam = + container_of ( xfer, struct slam_request, xfer ); + + DBGC ( slam, "SLAM %p data transfer interface closed: %s\n", + slam, strerror ( rc ) ); + + slam_finished ( slam, rc ); +} + +/** SLAM data transfer operations */ +static struct xfer_interface_operations slam_xfer_operations = { + .close = slam_xfer_close, + .vredirect = ignore_xfer_vredirect, + .window = unlimited_xfer_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = xfer_deliver_as_raw, + .deliver_raw = ignore_xfer_deliver_raw, +}; + +/** + * Parse SLAM URI multicast address + * + * @v slam SLAM request + * @v path Path portion of x-slam:// URI + * @v address Socket address to fill in + * @ret rc Return status code + */ +static int slam_parse_multicast_address ( struct slam_request *slam, + const char *path, + struct sockaddr_in *address ) { + char path_dup[ strlen ( path ) /* no +1 */ ]; + char *sep; + char *end; + + /* Create temporary copy of path, minus the leading '/' */ + assert ( *path == '/' ); + memcpy ( path_dup, ( path + 1 ) , sizeof ( path_dup ) ); + + /* Parse port, if present */ + sep = strchr ( path_dup, ':' ); + if ( sep ) { + *(sep++) = '\0'; + address->sin_port = htons ( strtoul ( sep, &end, 0 ) ); + if ( *end != '\0' ) { + DBGC ( slam, "SLAM %p invalid multicast port " + "\"%s\"\n", slam, sep ); + return -EINVAL; + } + } + + /* Parse address */ + if ( inet_aton ( path_dup, &address->sin_addr ) == 0 ) { + DBGC ( slam, "SLAM %p invalid multicast address \"%s\"\n", + slam, path_dup ); + return -EINVAL; + } + + return 0; +} + +/** + * Initiate a SLAM request + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int slam_open ( struct xfer_interface *xfer, struct uri *uri ) { + static const struct sockaddr_in default_multicast = { + .sin_family = AF_INET, + .sin_port = htons ( SLAM_DEFAULT_MULTICAST_PORT ), + .sin_addr = { htonl ( SLAM_DEFAULT_MULTICAST_IP ) }, + }; + struct slam_request *slam; + struct sockaddr_tcpip server; + struct sockaddr_in multicast; + int rc; + + /* Sanity checks */ + if ( ! uri->host ) + return -EINVAL; + + /* Allocate and populate structure */ + slam = zalloc ( sizeof ( *slam ) ); + if ( ! slam ) + return -ENOMEM; + slam->refcnt.free = slam_free; + xfer_init ( &slam->xfer, &slam_xfer_operations, &slam->refcnt ); + xfer_init ( &slam->socket, &slam_socket_operations, &slam->refcnt ); + xfer_init ( &slam->mc_socket, &slam_mc_socket_operations, + &slam->refcnt ); + slam->master_timer.expired = slam_master_timer_expired; + slam->slave_timer.expired = slam_slave_timer_expired; + /* Fake an invalid cached header of { 0x00, ... } */ + slam->header_len = 1; + /* Fake parameters for initial NACK */ + slam->num_blocks = 1; + if ( ( rc = bitmap_resize ( &slam->bitmap, 1 ) ) != 0 ) { + DBGC ( slam, "SLAM %p could not allocate initial bitmap: " + "%s\n", slam, strerror ( rc ) ); + goto err; + } + + /* Open unicast socket */ + memset ( &server, 0, sizeof ( server ) ); + server.st_port = htons ( uri_port ( uri, SLAM_DEFAULT_PORT ) ); + if ( ( rc = xfer_open_named_socket ( &slam->socket, SOCK_DGRAM, + ( struct sockaddr * ) &server, + uri->host, NULL ) ) != 0 ) { + DBGC ( slam, "SLAM %p could not open unicast socket: %s\n", + slam, strerror ( rc ) ); + goto err; + } + + /* Open multicast socket */ + memcpy ( &multicast, &default_multicast, sizeof ( multicast ) ); + if ( uri->path && + ( ( rc = slam_parse_multicast_address ( slam, uri->path, + &multicast ) ) != 0 ) ) { + goto err; + } + if ( ( rc = xfer_open_socket ( &slam->mc_socket, SOCK_DGRAM, + ( struct sockaddr * ) &multicast, + ( struct sockaddr * ) &multicast ) ) != 0 ) { + DBGC ( slam, "SLAM %p could not open multicast socket: %s\n", + slam, strerror ( rc ) ); + goto err; + } + + /* Start slave retry timer */ + start_timer_fixed ( &slam->slave_timer, SLAM_SLAVE_TIMEOUT ); + + /* Attach to parent interface, mortalise self, and return */ + xfer_plug_plug ( &slam->xfer, xfer ); + ref_put ( &slam->refcnt ); + return 0; + + err: + slam_finished ( slam, rc ); + ref_put ( &slam->refcnt ); + return rc; +} + +/** SLAM URI opener */ +struct uri_opener slam_uri_opener __uri_opener = { + .scheme = "x-slam", + .open = slam_open, +}; diff --git a/debian/grub-extras/disabled/gpxe/src/net/udp/tftp.c b/debian/grub-extras/disabled/gpxe/src/net/udp/tftp.c new file mode 100644 index 0000000..bb031ec --- /dev/null +++ b/debian/grub-extras/disabled/gpxe/src/net/udp/tftp.c @@ -0,0 +1,1205 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <strings.h> +#include <byteswap.h> +#include <errno.h> +#include <assert.h> +#include <gpxe/refcnt.h> +#include <gpxe/xfer.h> +#include <gpxe/open.h> +#include <gpxe/uri.h> +#include <gpxe/tcpip.h> +#include <gpxe/retry.h> +#include <gpxe/features.h> +#include <gpxe/bitmap.h> +#include <gpxe/settings.h> +#include <gpxe/dhcp.h> +#include <gpxe/uri.h> +#include <gpxe/tftp.h> + +/** @file + * + * TFTP protocol + * + */ + +FEATURE ( FEATURE_PROTOCOL, "TFTP", DHCP_EB_FEATURE_TFTP, 1 ); + +/* TFTP-specific error codes */ +#define ETFTP_INVALID_BLKSIZE EUNIQ_01 +#define ETFTP_INVALID_TSIZE EUNIQ_02 +#define ETFTP_MC_NO_PORT EUNIQ_03 +#define ETFTP_MC_NO_MC EUNIQ_04 +#define ETFTP_MC_INVALID_MC EUNIQ_05 +#define ETFTP_MC_INVALID_IP EUNIQ_06 +#define ETFTP_MC_INVALID_PORT EUNIQ_07 + +/** + * A TFTP request + * + * This data structure holds the state for an ongoing TFTP transfer. + */ +struct tftp_request { + /** Reference count */ + struct refcnt refcnt; + /** Data transfer interface */ + struct xfer_interface xfer; + + /** URI being fetched */ + struct uri *uri; + /** Transport layer interface */ + struct xfer_interface socket; + /** Multicast transport layer interface */ + struct xfer_interface mc_socket; + + /** Data block size + * + * This is the "blksize" option negotiated with the TFTP + * server. (If the TFTP server does not support TFTP options, + * this will default to 512). + */ + unsigned int blksize; + /** File size + * + * This is the value returned in the "tsize" option from the + * TFTP server. If the TFTP server does not support the + * "tsize" option, this value will be zero. + */ + unsigned long tsize; + + /** Server port + * + * This is the port to which RRQ packets are sent. + */ + unsigned int port; + /** Peer address + * + * The peer address is determined by the first response + * received to the TFTP RRQ. + */ + struct sockaddr_tcpip peer; + /** Request flags */ + unsigned int flags; + /** MTFTP timeout count */ + unsigned int mtftp_timeouts; + + /** Block bitmap */ + struct bitmap bitmap; + /** Maximum known length + * + * We don't always know the file length in advance. In + * particular, if the TFTP server doesn't support the tsize + * option, or we are using MTFTP, then we don't know the file + * length until we see the end-of-file block (which, in the + * case of MTFTP, may not be the last block we see). + * + * This value is updated whenever we obtain information about + * the file length. + */ + size_t filesize; + /** Retransmission timer */ + struct retry_timer timer; +}; + +/** TFTP request flags */ +enum { + /** Send ACK packets */ + TFTP_FL_SEND_ACK = 0x0001, + /** Request blksize and tsize options */ + TFTP_FL_RRQ_SIZES = 0x0002, + /** Request multicast option */ + TFTP_FL_RRQ_MULTICAST = 0x0004, + /** Perform MTFTP recovery on timeout */ + TFTP_FL_MTFTP_RECOVERY = 0x0008, +}; + +/** Maximum number of MTFTP open requests before falling back to TFTP */ +#define MTFTP_MAX_TIMEOUTS 3 + +/** + * Free TFTP request + * + * @v refcnt Reference counter + */ +static void tftp_free ( struct refcnt *refcnt ) { + struct tftp_request *tftp = + container_of ( refcnt, struct tftp_request, refcnt ); + + uri_put ( tftp->uri ); + bitmap_free ( &tftp->bitmap ); + free ( tftp ); +} + +/** + * Mark TFTP request as complete + * + * @v tftp TFTP connection + * @v rc Return status code + */ +static void tftp_done ( struct tftp_request *tftp, int rc ) { + + DBGC ( tftp, "TFTP %p finished with status %d (%s)\n", + tftp, rc, strerror ( rc ) ); + + /* Stop the retry timer */ + stop_timer ( &tftp->timer ); + + /* Close all data transfer interfaces */ + xfer_nullify ( &tftp->socket ); + xfer_close ( &tftp->socket, rc ); + xfer_nullify ( &tftp->mc_socket ); + xfer_close ( &tftp->mc_socket, rc ); + xfer_nullify ( &tftp->xfer ); + xfer_close ( &tftp->xfer, rc ); +} + +/** + * Reopen TFTP socket + * + * @v tftp TFTP connection + * @ret rc Return status code + */ +static int tftp_reopen ( struct tftp_request *tftp ) { + struct sockaddr_tcpip server; + int rc; + + /* Close socket */ + xfer_close ( &tftp->socket, 0 ); + + /* Disable ACK sending. */ + tftp->flags &= ~TFTP_FL_SEND_ACK; + + /* Reset peer address */ + memset ( &tftp->peer, 0, sizeof ( tftp->peer ) ); + + /* Open socket */ + memset ( &server, 0, sizeof ( server ) ); + server.st_port = htons ( tftp->port ); + if ( ( rc = xfer_open_named_socket ( &tftp->socket, SOCK_DGRAM, + ( struct sockaddr * ) &server, + tftp->uri->host, NULL ) ) != 0 ) { + DBGC ( tftp, "TFTP %p could not open socket: %s\n", + tftp, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Reopen TFTP multicast socket + * + * @v tftp TFTP connection + * @v local Local socket address + * @ret rc Return status code + */ +static int tftp_reopen_mc ( struct tftp_request *tftp, + struct sockaddr *local ) { + int rc; + + /* Close multicast socket */ + xfer_close ( &tftp->mc_socket, 0 ); + + /* Open multicast socket. We never send via this socket, so + * use the local address as the peer address (since the peer + * address cannot be NULL). + */ + if ( ( rc = xfer_open_socket ( &tftp->mc_socket, SOCK_DGRAM, + local, local ) ) != 0 ) { + DBGC ( tftp, "TFTP %p could not open multicast " + "socket: %s\n", tftp, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Presize TFTP receive buffers and block bitmap + * + * @v tftp TFTP connection + * @v filesize Known minimum file size + * @ret rc Return status code + */ +static int tftp_presize ( struct tftp_request *tftp, size_t filesize ) { + unsigned int num_blocks; + int rc; + + /* Do nothing if we are already large enough */ + if ( filesize <= tftp->filesize ) + return 0; + + /* Record filesize */ + tftp->filesize = filesize; + + /* Notify recipient of file size */ + xfer_seek ( &tftp->xfer, filesize, SEEK_SET ); + xfer_seek ( &tftp->xfer, 0, SEEK_SET ); + + /* Calculate expected number of blocks. Note that files whose + * length is an exact multiple of the blocksize will have a + * trailing zero-length block, which must be included. + */ + num_blocks = ( ( filesize / tftp->blksize ) + 1 ); + if ( ( rc = bitmap_resize ( &tftp->bitmap, num_blocks ) ) != 0 ) { + DBGC ( tftp, "TFTP %p could not resize bitmap to %d blocks: " + "%s\n", tftp, num_blocks, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * TFTP requested blocksize + * + * This is treated as a global configuration parameter. + */ +static unsigned int tftp_request_blksize = TFTP_MAX_BLKSIZE; + +/** + * Set TFTP request blocksize + * + * @v blksize Requested block size + */ +void tftp_set_request_blksize ( unsigned int blksize ) { + if ( blksize < TFTP_DEFAULT_BLKSIZE ) + blksize = TFTP_DEFAULT_BLKSIZE; + tftp_request_blksize = blksize; +} + +/** + * MTFTP multicast receive address + * + * This is treated as a global configuration parameter. + */ +static struct sockaddr_in tftp_mtftp_socket = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl ( 0xefff0101 ), + .sin_port = htons ( 3001 ), +}; + +/** + * Set MTFTP multicast address + * + * @v address Multicast IPv4 address + */ +void tftp_set_mtftp_address ( struct in_addr address ) { + tftp_mtftp_socket.sin_addr = address; +} + +/** + * Set MTFTP multicast port + * + * @v port Multicast port + */ +void tftp_set_mtftp_port ( unsigned int port ) { + tftp_mtftp_socket.sin_port = htons ( port ); +} + +/** + * Transmit RRQ + * + * @v tftp TFTP connection + * @ret rc Return status code + */ +static int tftp_send_rrq ( struct tftp_request *tftp ) { + struct tftp_rrq *rrq; + const char *path; + size_t len; + struct io_buffer *iobuf; + + /* Strip initial '/' if present. If we were opened via the + * URI interface, then there will be an initial '/', since a + * full tftp:// URI provides no way to specify a non-absolute + * path. However, many TFTP servers (particularly Windows + * TFTP servers) complain about having an initial '/', and it + * violates user expectations to have a '/' silently added to + * the DHCP-specified filename. + */ + path = tftp->uri->path; + if ( *path == '/' ) + path++; + + DBGC ( tftp, "TFTP %p requesting \"%s\"\n", tftp, path ); + + /* Allocate buffer */ + len = ( sizeof ( *rrq ) + strlen ( path ) + 1 /* NUL */ + + 5 + 1 /* "octet" + NUL */ + + 7 + 1 + 5 + 1 /* "blksize" + NUL + ddddd + NUL */ + + 5 + 1 + 1 + 1 /* "tsize" + NUL + "0" + NUL */ + + 9 + 1 + 1 /* "multicast" + NUL + NUL */ ); + iobuf = xfer_alloc_iob ( &tftp->socket, len ); + if ( ! iobuf ) + return -ENOMEM; + + /* Build request */ + rrq = iob_put ( iobuf, sizeof ( *rrq ) ); + rrq->opcode = htons ( TFTP_RRQ ); + iob_put ( iobuf, snprintf ( iobuf->tail, iob_tailroom ( iobuf ), + "%s%coctet", path, 0 ) + 1 ); + if ( tftp->flags & TFTP_FL_RRQ_SIZES ) { + iob_put ( iobuf, snprintf ( iobuf->tail, + iob_tailroom ( iobuf ), + "blksize%c%d%ctsize%c0", 0, + tftp_request_blksize, 0, 0 ) + 1 ); + } + if ( tftp->flags & TFTP_FL_RRQ_MULTICAST ) { + iob_put ( iobuf, snprintf ( iobuf->tail, + iob_tailroom ( iobuf ), + "multicast%c", 0 ) + 1 ); + } + + /* RRQ always goes to the address specified in the initial + * xfer_open() call + */ + return xfer_deliver_iob ( &tftp->socket, iobuf ); +} + +/** + * Transmit ACK + * + * @v tftp TFTP connection + * @ret rc Return status code + */ +static int tftp_send_ack ( struct tftp_request *tftp ) { + struct tftp_ack *ack; + struct io_buffer *iobuf; + struct xfer_metadata meta = { + .dest = ( struct sockaddr * ) &tftp->peer, + }; + unsigned int block; + + /* Determine next required block number */ + block = bitmap_first_gap ( &tftp->bitmap ); + DBGC2 ( tftp, "TFTP %p sending ACK for block %d\n", tftp, block ); + + /* Allocate buffer */ + iobuf = xfer_alloc_iob ( &tftp->socket, sizeof ( *ack ) ); + if ( ! iobuf ) + return -ENOMEM; + + /* Build ACK */ + ack = iob_put ( iobuf, sizeof ( *ack ) ); + ack->opcode = htons ( TFTP_ACK ); + ack->block = htons ( block ); + + /* ACK always goes to the peer recorded from the RRQ response */ + return xfer_deliver_iob_meta ( &tftp->socket, iobuf, &meta ); +} + +/** + * Transmit next relevant packet + * + * @v tftp TFTP connection + * @ret rc Return status code + */ +static int tftp_send_packet ( struct tftp_request *tftp ) { + + /* Update retransmission timer */ + stop_timer ( &tftp->timer ); + start_timer ( &tftp->timer ); + + /* Send RRQ or ACK as appropriate */ + if ( ! tftp->peer.st_family ) { + return tftp_send_rrq ( tftp ); + } else { + if ( tftp->flags & TFTP_FL_SEND_ACK ) { + return tftp_send_ack ( tftp ); + } else { + return 0; + } + } +} + +/** + * Handle TFTP retransmission timer expiry + * + * @v timer Retry timer + * @v fail Failure indicator + */ +static void tftp_timer_expired ( struct retry_timer *timer, int fail ) { + struct tftp_request *tftp = + container_of ( timer, struct tftp_request, timer ); + int rc; + + /* If we are doing MTFTP, attempt the various recovery strategies */ + if ( tftp->flags & TFTP_FL_MTFTP_RECOVERY ) { + if ( tftp->peer.st_family ) { + /* If we have received any response from the server, + * try resending the RRQ to restart the download. + */ + DBGC ( tftp, "TFTP %p attempting reopen\n", tftp ); + if ( ( rc = tftp_reopen ( tftp ) ) != 0 ) + goto err; + } else { + /* Fall back to plain TFTP after several attempts */ + tftp->mtftp_timeouts++; + DBGC ( tftp, "TFTP %p timeout %d waiting for MTFTP " + "open\n", tftp, tftp->mtftp_timeouts ); + + if ( tftp->mtftp_timeouts > MTFTP_MAX_TIMEOUTS ) { + DBGC ( tftp, "TFTP %p falling back to plain " + "TFTP\n", tftp ); + tftp->flags = TFTP_FL_RRQ_SIZES; + + /* Close multicast socket */ + xfer_close ( &tftp->mc_socket, 0 ); + + /* Reset retry timer */ + start_timer_nodelay ( &tftp->timer ); + + /* The blocksize may change: discard + * the block bitmap + */ + bitmap_free ( &tftp->bitmap ); + memset ( &tftp->bitmap, 0, + sizeof ( tftp->bitmap ) ); + + /* Reopen on standard TFTP port */ + tftp->port = TFTP_PORT; + if ( ( rc = tftp_reopen ( tftp ) ) != 0 ) + goto err; + } + } + } else { + /* Not doing MTFTP (or have fallen back to plain + * TFTP); fail as per normal. + */ + if ( fail ) { + rc = -ETIMEDOUT; + goto err; + } + } + tftp_send_packet ( tftp ); + return; + + err: + tftp_done ( tftp, rc ); +} + +/** + * Process TFTP "blksize" option + * + * @v tftp TFTP connection + * @v value Option value + * @ret rc Return status code + */ +static int tftp_process_blksize ( struct tftp_request *tftp, + const char *value ) { + char *end; + + tftp->blksize = strtoul ( value, &end, 10 ); + if ( *end ) { + DBGC ( tftp, "TFTP %p got invalid blksize \"%s\"\n", + tftp, value ); + return -( EINVAL | ETFTP_INVALID_BLKSIZE ); + } + DBGC ( tftp, "TFTP %p blksize=%d\n", tftp, tftp->blksize ); + + return 0; +} + +/** + * Process TFTP "tsize" option + * + * @v tftp TFTP connection + * @v value Option value + * @ret rc Return status code + */ +static int tftp_process_tsize ( struct tftp_request *tftp, + const char *value ) { + char *end; + + tftp->tsize = strtoul ( value, &end, 10 ); + if ( *end ) { + DBGC ( tftp, "TFTP %p got invalid tsize \"%s\"\n", + tftp, value ); + return -( EINVAL | ETFTP_INVALID_TSIZE ); + } + DBGC ( tftp, "TFTP %p tsize=%ld\n", tftp, tftp->tsize ); + + return 0; +} + +/** + * Process TFTP "multicast" option + * + * @v tftp TFTP connection + * @v value Option value + * @ret rc Return status code + */ +static int tftp_process_multicast ( struct tftp_request *tftp, + const char *value ) { + union { + struct sockaddr sa; + struct sockaddr_in sin; + } socket; + char buf[ strlen ( value ) + 1 ]; + char *addr; + char *port; + char *port_end; + char *mc; + char *mc_end; + int rc; + + /* Split value into "addr,port,mc" fields */ + memcpy ( buf, value, sizeof ( buf ) ); + addr = buf; + port = strchr ( addr, ',' ); + if ( ! port ) { + DBGC ( tftp, "TFTP %p multicast missing port,mc\n", tftp ); + return -( EINVAL | ETFTP_MC_NO_PORT ); + } + *(port++) = '\0'; + mc = strchr ( port, ',' ); + if ( ! mc ) { + DBGC ( tftp, "TFTP %p multicast missing mc\n", tftp ); + return -( EINVAL | ETFTP_MC_NO_MC ); + } + *(mc++) = '\0'; + + /* Parse parameters */ + if ( strtoul ( mc, &mc_end, 0 ) == 0 ) + tftp->flags &= ~TFTP_FL_SEND_ACK; + if ( *mc_end ) { + DBGC ( tftp, "TFTP %p multicast invalid mc %s\n", tftp, mc ); + return -( EINVAL | ETFTP_MC_INVALID_MC ); + } + DBGC ( tftp, "TFTP %p is%s the master client\n", + tftp, ( ( tftp->flags & TFTP_FL_SEND_ACK ) ? "" : " not" ) ); + if ( *addr && *port ) { + socket.sin.sin_family = AF_INET; + if ( inet_aton ( addr, &socket.sin.sin_addr ) == 0 ) { + DBGC ( tftp, "TFTP %p multicast invalid IP address " + "%s\n", tftp, addr ); + return -( EINVAL | ETFTP_MC_INVALID_IP ); + } + DBGC ( tftp, "TFTP %p multicast IP address %s\n", + tftp, inet_ntoa ( socket.sin.sin_addr ) ); + socket.sin.sin_port = htons ( strtoul ( port, &port_end, 0 ) ); + if ( *port_end ) { + DBGC ( tftp, "TFTP %p multicast invalid port %s\n", + tftp, port ); + return -( EINVAL | ETFTP_MC_INVALID_PORT ); + } + DBGC ( tftp, "TFTP %p multicast port %d\n", + tftp, ntohs ( socket.sin.sin_port ) ); + if ( ( rc = tftp_reopen_mc ( tftp, &socket.sa ) ) != 0 ) + return rc; + } + + return 0; +} + +/** A TFTP option */ +struct tftp_option { + /** Option name */ + const char *name; + /** Option processor + * + * @v tftp TFTP connection + * @v value Option value + * @ret rc Return status code + */ + int ( * process ) ( struct tftp_request *tftp, const char *value ); +}; + +/** Recognised TFTP options */ +static struct tftp_option tftp_options[] = { + { "blksize", tftp_process_blksize }, + { "tsize", tftp_process_tsize }, + { "multicast", tftp_process_multicast }, + { NULL, NULL } +}; + +/** + * Process TFTP option + * + * @v tftp TFTP connection + * @v name Option name + * @v value Option value + * @ret rc Return status code + */ +static int tftp_process_option ( struct tftp_request *tftp, + const char *name, const char *value ) { + struct tftp_option *option; + + for ( option = tftp_options ; option->name ; option++ ) { + if ( strcasecmp ( name, option->name ) == 0 ) + return option->process ( tftp, value ); + } + + DBGC ( tftp, "TFTP %p received unknown option \"%s\" = \"%s\"\n", + tftp, name, value ); + + /* Unknown options should be silently ignored */ + return 0; +} + +/** + * Receive OACK + * + * @v tftp TFTP connection + * @v buf Temporary data buffer + * @v len Length of temporary data buffer + * @ret rc Return status code + */ +static int tftp_rx_oack ( struct tftp_request *tftp, void *buf, size_t len ) { + struct tftp_oack *oack = buf; + char *end = buf + len; + char *name; + char *value; + char *next; + int rc = 0; + + /* Sanity check */ + if ( len < sizeof ( *oack ) ) { + DBGC ( tftp, "TFTP %p received underlength OACK packet " + "length %zd\n", tftp, len ); + rc = -EINVAL; + goto done; + } + + /* Process each option in turn */ + for ( name = oack->data ; name < end ; name = next ) { + + /* Parse option name and value + * + * We treat parsing errors as non-fatal, because there + * exists at least one TFTP server (IBM Tivoli PXE + * Server 5.1.0.3) that has been observed to send + * malformed OACKs containing trailing garbage bytes. + */ + value = ( name + strnlen ( name, ( end - name ) ) + 1 ); + if ( value > end ) { + DBGC ( tftp, "TFTP %p received OACK with malformed " + "option name:\n", tftp ); + DBGC_HD ( tftp, oack, len ); + break; + } + if ( value == end ) { + DBGC ( tftp, "TFTP %p received OACK missing value " + "for option \"%s\"\n", tftp, name ); + DBGC_HD ( tftp, oack, len ); + break; + } + next = ( value + strnlen ( value, ( end - value ) ) + 1 ); + if ( next > end ) { + DBGC ( tftp, "TFTP %p received OACK with malformed " + "value for option \"%s\":\n", tftp, name ); + DBGC_HD ( tftp, oack, len ); + break; + } + + /* Process option */ + if ( ( rc = tftp_process_option ( tftp, name, value ) ) != 0 ) + goto done; + } + + /* Process tsize information, if available */ + if ( tftp->tsize ) { + if ( ( rc = tftp_presize ( tftp, tftp->tsize ) ) != 0 ) + goto done; + } + + /* Request next data block */ + tftp_send_packet ( tftp ); + + done: + if ( rc ) + tftp_done ( tftp, rc ); + return rc; +} + +/** + * Receive DATA + * + * @v tftp TFTP connection + * @v iobuf I/O buffer + * @ret rc Return status code + * + * Takes ownership of I/O buffer. + */ +static int tftp_rx_data ( struct tftp_request *tftp, + struct io_buffer *iobuf ) { + struct tftp_data *data = iobuf->data; + struct xfer_metadata meta; + int block; + off_t offset; + size_t data_len; + int rc; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *data ) ) { + DBGC ( tftp, "TFTP %p received underlength DATA packet " + "length %zd\n", tftp, iob_len ( iobuf ) ); + rc = -EINVAL; + goto done; + } + if ( data->block == 0 ) { + DBGC ( tftp, "TFTP %p received data block 0\n", tftp ); + rc = -EINVAL; + goto done; + } + + /* Extract data */ + block = ( ntohs ( data->block ) - 1 ); + offset = ( block * tftp->blksize ); + iob_pull ( iobuf, sizeof ( *data ) ); + data_len = iob_len ( iobuf ); + if ( data_len > tftp->blksize ) { + DBGC ( tftp, "TFTP %p received overlength DATA packet " + "length %zd\n", tftp, data_len ); + rc = -EINVAL; + goto done; + } + + /* Deliver data */ + memset ( &meta, 0, sizeof ( meta ) ); + meta.whence = SEEK_SET; + meta.offset = offset; + if ( ( rc = xfer_deliver_iob_meta ( &tftp->xfer, iob_disown ( iobuf ), + &meta ) ) != 0 ) { + DBGC ( tftp, "TFTP %p could not deliver data: %s\n", + tftp, strerror ( rc ) ); + goto done; + } + + /* Ensure block bitmap is ready */ + if ( ( rc = tftp_presize ( tftp, ( offset + data_len ) ) ) != 0 ) + goto done; + + /* Mark block as received */ + bitmap_set ( &tftp->bitmap, block ); + + /* Acknowledge block */ + tftp_send_packet ( tftp ); + + /* If all blocks have been received, finish. */ + if ( bitmap_full ( &tftp->bitmap ) ) + tftp_done ( tftp, 0 ); + + done: + free_iob ( iobuf ); + if ( rc ) + tftp_done ( tftp, rc ); + return rc; +} + +/** Translation between TFTP errors and internal error numbers */ +static const int tftp_errors[] = { + [TFTP_ERR_FILE_NOT_FOUND] = ENOENT, + [TFTP_ERR_ACCESS_DENIED] = EACCES, + [TFTP_ERR_ILLEGAL_OP] = ENOTSUP, +}; + +/** + * Receive ERROR + * + * @v tftp TFTP connection + * @v buf Temporary data buffer + * @v len Length of temporary data buffer + * @ret rc Return status code + */ +static int tftp_rx_error ( struct tftp_request *tftp, void *buf, size_t len ) { + struct tftp_error *error = buf; + unsigned int err; + int rc = 0; + + /* Sanity check */ + if ( len < sizeof ( *error ) ) { + DBGC ( tftp, "TFTP %p received underlength ERROR packet " + "length %zd\n", tftp, len ); + return -EINVAL; + } + + DBGC ( tftp, "TFTP %p received ERROR packet with code %d, message " + "\"%s\"\n", tftp, ntohs ( error->errcode ), error->errmsg ); + + /* Determine final operation result */ + err = ntohs ( error->errcode ); + if ( err < ( sizeof ( tftp_errors ) / sizeof ( tftp_errors[0] ) ) ) + rc = -tftp_errors[err]; + if ( ! rc ) + rc = -ENOTSUP; + + /* Close TFTP request */ + tftp_done ( tftp, rc ); + + return 0; +} + +/** + * Receive new data + * + * @v tftp TFTP connection + * @v iobuf I/O buffer + * @v meta Transfer metadata + * @ret rc Return status code + */ +static int tftp_rx ( struct tftp_request *tftp, + struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + struct sockaddr_tcpip *st_src; + struct tftp_common *common = iobuf->data; + size_t len = iob_len ( iobuf ); + int rc = -EINVAL; + + /* Sanity checks */ + if ( len < sizeof ( *common ) ) { + DBGC ( tftp, "TFTP %p received underlength packet length " + "%zd\n", tftp, len ); + goto done; + } + if ( ! meta->src ) { + DBGC ( tftp, "TFTP %p received packet without source port\n", + tftp ); + goto done; + } + + /* Filter by TID. Set TID on first response received */ + st_src = ( struct sockaddr_tcpip * ) meta->src; + if ( ! tftp->peer.st_family ) { + memcpy ( &tftp->peer, st_src, sizeof ( tftp->peer ) ); + DBGC ( tftp, "TFTP %p using remote port %d\n", tftp, + ntohs ( tftp->peer.st_port ) ); + } else if ( memcmp ( &tftp->peer, st_src, + sizeof ( tftp->peer ) ) != 0 ) { + DBGC ( tftp, "TFTP %p received packet from wrong source (got " + "%d, wanted %d)\n", tftp, ntohs ( st_src->st_port ), + ntohs ( tftp->peer.st_port ) ); + goto done; + } + + switch ( common->opcode ) { + case htons ( TFTP_OACK ): + rc = tftp_rx_oack ( tftp, iobuf->data, len ); + break; + case htons ( TFTP_DATA ): + rc = tftp_rx_data ( tftp, iob_disown ( iobuf ) ); + break; + case htons ( TFTP_ERROR ): + rc = tftp_rx_error ( tftp, iobuf->data, len ); + break; + default: + DBGC ( tftp, "TFTP %p received strange packet type %d\n", + tftp, ntohs ( common->opcode ) ); + break; + }; + + done: + free_iob ( iobuf ); + return rc; +} + +/** + * Receive new data via socket + * + * @v socket Transport layer interface + * @v iobuf I/O buffer + * @v meta Transfer metadata + * @ret rc Return status code + */ +static int tftp_socket_deliver_iob ( struct xfer_interface *socket, + struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + struct tftp_request *tftp = + container_of ( socket, struct tftp_request, socket ); + + /* Enable sending ACKs when we receive a unicast packet. This + * covers three cases: + * + * 1. Standard TFTP; we should always send ACKs, and will + * always receive a unicast packet before we need to send the + * first ACK. + * + * 2. RFC2090 multicast TFTP; the only unicast packets we will + * receive are the OACKs; enable sending ACKs here (before + * processing the OACK) and disable it when processing the + * multicast option if we are not the master client. + * + * 3. MTFTP; receiving a unicast datagram indicates that we + * are the "master client" and should send ACKs. + */ + tftp->flags |= TFTP_FL_SEND_ACK; + + return tftp_rx ( tftp, iobuf, meta ); +} + +/** TFTP socket operations */ +static struct xfer_interface_operations tftp_socket_operations = { + .close = ignore_xfer_close, + .vredirect = xfer_vreopen, + .window = unlimited_xfer_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = tftp_socket_deliver_iob, + .deliver_raw = xfer_deliver_as_iob, +}; + +/** + * Receive new data via multicast socket + * + * @v mc_socket Multicast transport layer interface + * @v iobuf I/O buffer + * @v meta Transfer metadata + * @ret rc Return status code + */ +static int tftp_mc_socket_deliver_iob ( struct xfer_interface *mc_socket, + struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + struct tftp_request *tftp = + container_of ( mc_socket, struct tftp_request, mc_socket ); + + return tftp_rx ( tftp, iobuf, meta ); +} + +/** TFTP multicast socket operations */ +static struct xfer_interface_operations tftp_mc_socket_operations = { + .close = ignore_xfer_close, + .vredirect = xfer_vreopen, + .window = unlimited_xfer_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = tftp_mc_socket_deliver_iob, + .deliver_raw = xfer_deliver_as_iob, +}; + +/** + * Close TFTP data transfer interface + * + * @v xfer Data transfer interface + * @v rc Reason for close + */ +static void tftp_xfer_close ( struct xfer_interface *xfer, int rc ) { + struct tftp_request *tftp = + container_of ( xfer, struct tftp_request, xfer ); + + DBGC ( tftp, "TFTP %p interface closed: %s\n", + tftp, strerror ( rc ) ); + + tftp_done ( tftp, rc ); +} + +/** + * Check flow control window + * + * @v xfer Data transfer interface + * @ret len Length of window + */ +static size_t tftp_xfer_window ( struct xfer_interface *xfer ) { + struct tftp_request *tftp = + container_of ( xfer, struct tftp_request, xfer ); + + /* We abuse this data-xfer method to convey the blocksize to + * the caller. This really should be done using some kind of + * stat() method, but we don't yet have the facility to do + * that. + */ + return tftp->blksize; +} + +/** TFTP data transfer interface operations */ +static struct xfer_interface_operations tftp_xfer_operations = { + .close = tftp_xfer_close, + .vredirect = ignore_xfer_vredirect, + .window = tftp_xfer_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = xfer_deliver_as_raw, + .deliver_raw = ignore_xfer_deliver_raw, +}; + +/** + * Initiate TFTP/TFTM/MTFTP download + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int tftp_core_open ( struct xfer_interface *xfer, struct uri *uri, + unsigned int default_port, + struct sockaddr *multicast, + unsigned int flags ) { + struct tftp_request *tftp; + int rc; + + /* Sanity checks */ + if ( ! uri->host ) + return -EINVAL; + if ( ! uri->path ) + return -EINVAL; + + /* Allocate and populate TFTP structure */ + tftp = zalloc ( sizeof ( *tftp ) ); + if ( ! tftp ) + return -ENOMEM; + tftp->refcnt.free = tftp_free; + xfer_init ( &tftp->xfer, &tftp_xfer_operations, &tftp->refcnt ); + tftp->uri = uri_get ( uri ); + xfer_init ( &tftp->socket, &tftp_socket_operations, &tftp->refcnt ); + xfer_init ( &tftp->mc_socket, &tftp_mc_socket_operations, + &tftp->refcnt ); + tftp->blksize = TFTP_DEFAULT_BLKSIZE; + tftp->flags = flags; + tftp->timer.expired = tftp_timer_expired; + + /* Open socket */ + tftp->port = uri_port ( tftp->uri, default_port ); + if ( ( rc = tftp_reopen ( tftp ) ) != 0 ) + goto err; + + /* Open multicast socket */ + if ( multicast ) { + if ( ( rc = tftp_reopen_mc ( tftp, multicast ) ) != 0 ) + goto err; + } + + /* Start timer to initiate RRQ */ + start_timer_nodelay ( &tftp->timer ); + + /* Attach to parent interface, mortalise self, and return */ + xfer_plug_plug ( &tftp->xfer, xfer ); + ref_put ( &tftp->refcnt ); + return 0; + + err: + DBGC ( tftp, "TFTP %p could not create request: %s\n", + tftp, strerror ( rc ) ); + tftp_done ( tftp, rc ); + ref_put ( &tftp->refcnt ); + return rc; +} + +/** + * Initiate TFTP download + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int tftp_open ( struct xfer_interface *xfer, struct uri *uri ) { + return tftp_core_open ( xfer, uri, TFTP_PORT, NULL, + TFTP_FL_RRQ_SIZES ); + +} + +/** TFTP URI opener */ +struct uri_opener tftp_uri_opener __uri_opener = { + .scheme = "tftp", + .open = tftp_open, +}; + +/** + * Initiate TFTM download + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int tftm_open ( struct xfer_interface *xfer, struct uri *uri ) { + return tftp_core_open ( xfer, uri, TFTP_PORT, NULL, + ( TFTP_FL_RRQ_SIZES | + TFTP_FL_RRQ_MULTICAST ) ); + +} + +/** TFTM URI opener */ +struct uri_opener tftm_uri_opener __uri_opener = { + .scheme = "tftm", + .open = tftm_open, +}; + +/** + * Initiate MTFTP download + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int mtftp_open ( struct xfer_interface *xfer, struct uri *uri ) { + return tftp_core_open ( xfer, uri, MTFTP_PORT, + ( struct sockaddr * ) &tftp_mtftp_socket, + TFTP_FL_MTFTP_RECOVERY ); +} + +/** MTFTP URI opener */ +struct uri_opener mtftp_uri_opener __uri_opener = { + .scheme = "mtftp", + .open = mtftp_open, +}; + +/****************************************************************************** + * + * Settings + * + ****************************************************************************** + */ + +/** TFTP server setting */ +struct setting next_server_setting __setting = { + .name = "next-server", + .description = "TFTP server", + .tag = DHCP_EB_SIADDR, + .type = &setting_type_ipv4, +}; + +/** + * Apply TFTP configuration settings + * + * @ret rc Return status code + */ +static int tftp_apply_settings ( void ) { + static struct in_addr tftp_server = { 0 }; + struct in_addr last_tftp_server; + char uri_string[32]; + struct uri *uri; + + /* Retrieve TFTP server setting */ + last_tftp_server = tftp_server; + fetch_ipv4_setting ( NULL, &next_server_setting, &tftp_server ); + + /* If TFTP server setting has changed, set the current working + * URI to match. Do it only when the TFTP server has changed + * to try to minimise surprises to the user, who probably + * won't expect the CWURI to change just because they updated + * an unrelated setting and triggered all the settings + * applicators. + */ + if ( tftp_server.s_addr != last_tftp_server.s_addr ) { + snprintf ( uri_string, sizeof ( uri_string ), + "tftp://%s/", inet_ntoa ( tftp_server ) ); + uri = parse_uri ( uri_string ); + if ( ! uri ) + return -ENOMEM; + churi ( uri ); + uri_put ( uri ); + } + + return 0; +} + +/** TFTP settings applicator */ +struct settings_applicator tftp_settings_applicator __settings_applicator = { + .apply = tftp_apply_settings, +}; |