diff options
author | H. Peter Anvin <hpa@zytor.com> | 2010-02-10 15:59:46 -0800 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2010-02-10 15:59:46 -0800 |
commit | e8d0e6fb18aae44474e864d8040d5ed44ff6acab (patch) | |
tree | 547115848f342060950112d85b83706ef12fd800 /gpxe/src/net | |
parent | 5396e250a679c368b3dd353c730358d6c54960c8 (diff) | |
parent | 2dafb8402ed666b37f96f6e1579a99b8a8c85452 (diff) | |
download | syslinux-e8d0e6fb18aae44474e864d8040d5ed44ff6acab.tar.gz |
Merge branch 'fsc' into pathbased
Resolved Conflicts:
core/fs/ext2/ext2.c
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'gpxe/src/net')
47 files changed, 10711 insertions, 373 deletions
diff --git a/gpxe/src/net/80211/net80211.c b/gpxe/src/net/80211/net80211.c new file mode 100644 index 00000000..1c54597f --- /dev/null +++ b/gpxe/src/net/80211/net80211.c @@ -0,0 +1,2829 @@ +/* + * The gPXE 802.11 MAC layer. + * + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <byteswap.h> +#include <stdlib.h> +#include <gpxe/settings.h> +#include <gpxe/if_arp.h> +#include <gpxe/ethernet.h> +#include <gpxe/ieee80211.h> +#include <gpxe/netdevice.h> +#include <gpxe/net80211.h> +#include <gpxe/sec80211.h> +#include <gpxe/timer.h> +#include <gpxe/nap.h> +#include <unistd.h> +#include <errno.h> + +/** @file + * + * 802.11 device management + */ + +/* Disambiguate the EINVAL's a bit */ +#define EINVAL_PKT_TOO_SHORT ( EINVAL | EUNIQ_01 ) +#define EINVAL_PKT_VERSION ( EINVAL | EUNIQ_02 ) +#define EINVAL_PKT_NOT_DATA ( EINVAL | EUNIQ_03 ) +#define EINVAL_PKT_NOT_FROMDS ( EINVAL | EUNIQ_04 ) +#define EINVAL_PKT_LLC_HEADER ( EINVAL | EUNIQ_05 ) +#define EINVAL_CRYPTO_REQUEST ( EINVAL | EUNIQ_06 ) +#define EINVAL_ACTIVE_SCAN ( EINVAL | EUNIQ_07 ) + +/* + * 802.11 error codes: The AP can give us a status code explaining why + * authentication failed, or a reason code explaining why we were + * deauthenticated/disassociated. These codes range from 0-63 (the + * field is 16 bits wide, but only up to 45 or so are defined yet; we + * allow up to 63 for extensibility). This is encoded into an error + * code as such: + * + * status & 0x1f goes here --vv-- + * Status code 0-31: ECONNREFUSED | EUNIQ_(status & 0x1f) (0e1a6038) + * Status code 32-63: EHOSTUNREACH | EUNIQ_(status & 0x1f) (171a6011) + * Reason code 0-31: ECONNRESET | EUNIQ_(reason & 0x1f) (0f1a6039) + * Reason code 32-63: ENETRESET | EUNIQ_(reason & 0x1f) (271a6001) + * + * The POSIX error codes more or less convey the appropriate message + * (status codes occur when we can't associate at all, reason codes + * when we lose association unexpectedly) and let us extract the + * complete 802.11 error code from the rc value. + */ + +/** Make return status code from 802.11 status code */ +#define E80211_STATUS( stat ) ( ((stat & 0x20)? EHOSTUNREACH : ECONNREFUSED) \ + | ((stat & 0x1f) << 8) ) + +/** Make return status code from 802.11 reason code */ +#define E80211_REASON( reas ) ( ((reas & 0x20)? ENETRESET : ECONNRESET) \ + | ((reas & 0x1f) << 8) ) + + +/** List of 802.11 devices */ +static struct list_head net80211_devices = LIST_HEAD_INIT ( net80211_devices ); + +/** Set of device operations that does nothing */ +static struct net80211_device_operations net80211_null_ops; + +/** Information associated with a received management packet + * + * This is used to keep beacon signal strengths in a parallel queue to + * the beacons themselves. + */ +struct net80211_rx_info { + int signal; + struct list_head list; +}; + +/** Context for a probe operation */ +struct net80211_probe_ctx { + /** 802.11 device to probe on */ + struct net80211_device *dev; + + /** Value of keep_mgmt before probe was started */ + int old_keep_mgmt; + + /** If scanning actively, pointer to probe packet to send */ + struct io_buffer *probe; + + /** If non-"", the ESSID to limit ourselves to */ + const char *essid; + + /** Time probe was started */ + u32 ticks_start; + + /** Time last useful beacon was received */ + u32 ticks_beacon; + + /** Time channel was last changed */ + u32 ticks_channel; + + /** Time to stay on each channel */ + u32 hop_time; + + /** Channels to hop by when changing channel */ + int hop_step; + + /** List of best beacons for each network found so far */ + struct list_head *beacons; +}; + +/** Context for the association task */ +struct net80211_assoc_ctx { + /** Next authentication method to try using */ + int method; + + /** Time (in ticks) of the last sent association-related packet */ + int last_packet; + + /** Number of times we have tried sending it */ + int times_tried; +}; + +/** + * @defgroup net80211_netdev Network device interface functions + * @{ + */ +static int net80211_netdev_open ( struct net_device *netdev ); +static void net80211_netdev_close ( struct net_device *netdev ); +static int net80211_netdev_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ); +static void net80211_netdev_poll ( struct net_device *netdev ); +static void net80211_netdev_irq ( struct net_device *netdev, int enable ); +/** @} */ + +/** + * @defgroup net80211_linklayer 802.11 link-layer protocol functions + * @{ + */ +static int net80211_ll_push ( struct net_device *netdev, + struct io_buffer *iobuf, const void *ll_dest, + const void *ll_source, uint16_t net_proto ); +static int net80211_ll_pull ( struct net_device *netdev, + struct io_buffer *iobuf, const void **ll_dest, + const void **ll_source, uint16_t * net_proto ); +/** @} */ + +/** + * @defgroup net80211_help 802.11 helper functions + * @{ + */ +static void net80211_add_channels ( struct net80211_device *dev, int start, + int len, int txpower ); +static void net80211_filter_hw_channels ( struct net80211_device *dev ); +static void net80211_set_rtscts_rate ( struct net80211_device *dev ); +static int net80211_process_capab ( struct net80211_device *dev, + u16 capab ); +static int net80211_process_ie ( struct net80211_device *dev, + union ieee80211_ie *ie, void *ie_end ); +static union ieee80211_ie * +net80211_marshal_request_info ( struct net80211_device *dev, + union ieee80211_ie *ie ); +/** @} */ + +/** + * @defgroup net80211_assoc_ll 802.11 association handling functions + * @{ + */ +static void net80211_step_associate ( struct process *proc ); +static void net80211_handle_auth ( struct net80211_device *dev, + struct io_buffer *iob ); +static void net80211_handle_assoc_reply ( struct net80211_device *dev, + struct io_buffer *iob ); +static int net80211_send_disassoc ( struct net80211_device *dev, int reason, + int deauth ); +static void net80211_handle_mgmt ( struct net80211_device *dev, + struct io_buffer *iob, int signal ); +/** @} */ + +/** + * @defgroup net80211_frag 802.11 fragment handling functions + * @{ + */ +static void net80211_free_frags ( struct net80211_device *dev, int fcid ); +static struct io_buffer *net80211_accum_frags ( struct net80211_device *dev, + int fcid, int nfrags, int size ); +static void net80211_rx_frag ( struct net80211_device *dev, + struct io_buffer *iob, int signal ); +/** @} */ + +/** + * @defgroup net80211_settings 802.11 settings handlers + * @{ + */ +static int net80211_check_settings_update ( void ); + +/** 802.11 settings applicator + * + * When the SSID is changed, this will cause any open devices to + * re-associate; when the encryption key is changed, we similarly + * update their state. + */ +struct settings_applicator net80211_applicator __settings_applicator = { + .apply = net80211_check_settings_update, +}; + +/** The network name to associate with + * + * If this is blank, we scan for all networks and use the one with the + * greatest signal strength. + */ +struct setting net80211_ssid_setting __setting = { + .name = "ssid", + .description = "802.11 SSID (network name)", + .type = &setting_type_string, +}; + +/** Whether to use active scanning + * + * In order to associate with a hidden SSID, it's necessary to use an + * active scan (send probe packets). If this setting is nonzero, an + * active scan on the 2.4GHz band will be used to associate. + */ +struct setting net80211_active_setting __setting = { + .name = "active-scan", + .description = "Use an active scan during 802.11 association", + .type = &setting_type_int8, +}; + +/** The cryptographic key to use + * + * For hex WEP keys, as is common, this must be entered using the + * normal gPXE method for entering hex settings; an ASCII string of + * hex characters will not behave as expected. + */ +struct setting net80211_key_setting __setting = { + .name = "key", + .description = "Encryption key for protected 802.11 networks", + .type = &setting_type_string, +}; + +/** @} */ + + +/* ---------- net_device wrapper ---------- */ + +/** + * Open 802.11 device and start association + * + * @v netdev Wrapping network device + * @ret rc Return status code + * + * This sets up a default conservative set of channels for probing, + * and starts the auto-association task unless the @c + * NET80211_NO_ASSOC flag is set in the wrapped 802.11 device's @c + * state field. + */ +static int net80211_netdev_open ( struct net_device *netdev ) +{ + struct net80211_device *dev = netdev->priv; + int rc = 0; + + if ( dev->op == &net80211_null_ops ) + return -EFAULT; + + if ( dev->op->open ) + rc = dev->op->open ( dev ); + + if ( rc < 0 ) + return rc; + + if ( ! ( dev->state & NET80211_NO_ASSOC ) ) + net80211_autoassociate ( dev ); + + return 0; +} + +/** + * Close 802.11 device + * + * @v netdev Wrapping network device. + * + * If the association task is running, this will stop it. + */ +static void net80211_netdev_close ( struct net_device *netdev ) +{ + struct net80211_device *dev = netdev->priv; + + if ( dev->state & NET80211_WORKING ) + process_del ( &dev->proc_assoc ); + + /* Send disassociation frame to AP, to be polite */ + if ( dev->state & NET80211_ASSOCIATED ) + net80211_send_disassoc ( dev, IEEE80211_REASON_LEAVING, 0 ); + + if ( dev->handshaker && dev->handshaker->stop && + dev->handshaker->started ) + dev->handshaker->stop ( dev ); + + free ( dev->crypto ); + free ( dev->handshaker ); + dev->crypto = NULL; + dev->handshaker = NULL; + + netdev_link_down ( netdev ); + dev->state = 0; + + if ( dev->op->close ) + dev->op->close ( dev ); +} + +/** + * Transmit packet on 802.11 device + * + * @v netdev Wrapping network device + * @v iobuf I/O buffer + * @ret rc Return status code + * + * If encryption is enabled for the currently associated network, the + * packet will be encrypted prior to transmission. + */ +static int net80211_netdev_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ) +{ + struct net80211_device *dev = netdev->priv; + struct ieee80211_frame *hdr = iobuf->data; + int rc = -ENOSYS; + + if ( dev->crypto && ! ( hdr->fc & IEEE80211_FC_PROTECTED ) && + ( ( hdr->fc & IEEE80211_FC_TYPE ) == IEEE80211_TYPE_DATA ) ) { + struct io_buffer *niob = dev->crypto->encrypt ( dev->crypto, + iobuf ); + if ( ! niob ) + return -ENOMEM; /* only reason encryption could fail */ + + /* Free the non-encrypted iob */ + netdev_tx_complete ( netdev, iobuf ); + + /* Transmit the encrypted iob; the Protected flag is + set, so we won't recurse into here again */ + netdev_tx ( netdev, niob ); + + /* Don't transmit the freed packet */ + return 0; + } + + if ( dev->op->transmit ) + rc = dev->op->transmit ( dev, iobuf ); + + return rc; +} + +/** + * Poll 802.11 device for received packets and completed transmissions + * + * @v netdev Wrapping network device + */ +static void net80211_netdev_poll ( struct net_device *netdev ) +{ + struct net80211_device *dev = netdev->priv; + + if ( dev->op->poll ) + dev->op->poll ( dev ); +} + +/** + * Enable or disable interrupts for 802.11 device + * + * @v netdev Wrapping network device + * @v enable Whether to enable interrupts + */ +static void net80211_netdev_irq ( struct net_device *netdev, int enable ) +{ + struct net80211_device *dev = netdev->priv; + + if ( dev->op->irq ) + dev->op->irq ( dev, enable ); +} + +/** Network device operations for a wrapped 802.11 device */ +static struct net_device_operations net80211_netdev_ops = { + .open = net80211_netdev_open, + .close = net80211_netdev_close, + .transmit = net80211_netdev_transmit, + .poll = net80211_netdev_poll, + .irq = net80211_netdev_irq, +}; + + +/* ---------- 802.11 link-layer protocol ---------- */ + +/** 802.11 broadcast MAC address */ +static u8 net80211_ll_broadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + +/** + * Determine whether a transmission rate uses ERP/OFDM + * + * @v rate Rate in 100 kbps units + * @ret is_erp TRUE if the rate is an ERP/OFDM rate + * + * 802.11b supports rates of 1.0, 2.0, 5.5, and 11.0 Mbps; any other + * rate than these on the 2.4GHz spectrum is an ERP (802.11g) rate. + */ +static inline int net80211_rate_is_erp ( u16 rate ) +{ + if ( rate == 10 || rate == 20 || rate == 55 || rate == 110 ) + return 0; + return 1; +} + + +/** + * Calculate one frame's contribution to 802.11 duration field + * + * @v dev 802.11 device + * @v bytes Amount of data to calculate duration for + * @ret dur Duration field in microseconds + * + * To avoid multiple stations attempting to transmit at once, 802.11 + * provides that every packet shall include a duration field + * specifying a length of time for which the wireless medium will be + * reserved after it is transmitted. The duration is measured in + * microseconds and is calculated with respect to the current + * physical-layer parameters of the 802.11 device. + * + * For an unfragmented data or management frame, or the last fragment + * of a fragmented frame, the duration captures only the 10 data bytes + * of one ACK; call once with bytes = 10. + * + * For a fragment of a data or management rame that will be followed + * by more fragments, the duration captures an ACK, the following + * fragment, and its ACK; add the results of three calls, two with + * bytes = 10 and one with bytes set to the next fragment's size. + * + * For an RTS control frame, the duration captures the responding CTS, + * the frame being sent, and its ACK; add the results of three calls, + * two with bytes = 10 and one with bytes set to the next frame's size + * (assuming unfragmented). + * + * For a CTS-to-self control frame, the duration captures the frame + * being protected and its ACK; add the results of two calls, one with + * bytes = 10 and one with bytes set to the next frame's size. + * + * No other frame types are currently supported by gPXE. + */ +u16 net80211_duration ( struct net80211_device *dev, int bytes, u16 rate ) +{ + struct net80211_channel *chan = &dev->channels[dev->channel]; + u32 kbps = rate * 100; + + if ( chan->band == NET80211_BAND_5GHZ || net80211_rate_is_erp ( rate ) ) { + /* OFDM encoding (802.11a/g) */ + int bits_per_symbol = ( kbps * 4 ) / 1000; /* 4us/symbol */ + int bits = 22 + ( bytes << 3 ); /* 22-bit PLCP */ + int symbols = ( bits + bits_per_symbol - 1 ) / bits_per_symbol; + + return 16 + 20 + ( symbols * 4 ); /* 16us SIFS, 20us preamble */ + } else { + /* CCK encoding (802.11b) */ + int phy_time = 144 + 48; /* preamble + PLCP */ + int bits = bytes << 3; + int data_time = ( bits * 1000 + kbps - 1 ) / kbps; + + if ( dev->phy_flags & NET80211_PHY_USE_SHORT_PREAMBLE ) + phy_time >>= 1; + + return 10 + phy_time + data_time; /* 10us SIFS */ + } +} + +/** + * Add 802.11 link-layer header + * + * @v netdev Wrapping network device + * @v iobuf I/O buffer + * @v ll_dest Link-layer destination address + * @v ll_source Link-layer source address + * @v net_proto Network-layer protocol, in network byte order + * @ret rc Return status code + * + * This adds both the 802.11 frame header and the 802.2 LLC/SNAP + * header used on data packets. + * + * We also check here for state of the link that would make it invalid + * to send a data packet; every data packet must pass through here, + * and no non-data packet (e.g. management frame) should. + */ +static int net80211_ll_push ( struct net_device *netdev, + struct io_buffer *iobuf, const void *ll_dest, + const void *ll_source, uint16_t net_proto ) +{ + struct net80211_device *dev = netdev->priv; + struct ieee80211_frame *hdr = iob_push ( iobuf, + IEEE80211_LLC_HEADER_LEN + + IEEE80211_TYP_FRAME_HEADER_LEN ); + struct ieee80211_llc_snap_header *lhdr = + ( void * ) hdr + IEEE80211_TYP_FRAME_HEADER_LEN; + + /* We can't send data packets if we're not associated. */ + if ( ! ( dev->state & NET80211_ASSOCIATED ) ) { + if ( dev->assoc_rc ) + return dev->assoc_rc; + return -ENETUNREACH; + } + + hdr->fc = IEEE80211_THIS_VERSION | IEEE80211_TYPE_DATA | + IEEE80211_STYPE_DATA | IEEE80211_FC_TODS; + + /* We don't send fragmented frames, so duration is the time + for an SIFS + 10-byte ACK. */ + hdr->duration = net80211_duration ( dev, 10, dev->rates[dev->rate] ); + + memcpy ( hdr->addr1, dev->bssid, ETH_ALEN ); + memcpy ( hdr->addr2, ll_source, ETH_ALEN ); + memcpy ( hdr->addr3, ll_dest, ETH_ALEN ); + + hdr->seq = IEEE80211_MAKESEQ ( ++dev->last_tx_seqnr, 0 ); + + lhdr->dsap = IEEE80211_LLC_DSAP; + lhdr->ssap = IEEE80211_LLC_SSAP; + lhdr->ctrl = IEEE80211_LLC_CTRL; + memset ( lhdr->oui, 0x00, 3 ); + lhdr->ethertype = net_proto; + + return 0; +} + +/** + * Remove 802.11 link-layer header + * + * @v netdev Wrapping network device + * @v iobuf I/O buffer + * @ret ll_dest Link-layer destination address + * @ret ll_source Link-layer source + * @ret net_proto Network-layer protocol, in network byte order + * @ret rc Return status code + * + * This expects and removes both the 802.11 frame header and the 802.2 + * LLC/SNAP header that are used on data packets. + */ +static int net80211_ll_pull ( struct net_device *netdev __unused, + struct io_buffer *iobuf, + const void **ll_dest, const void **ll_source, + uint16_t * net_proto ) +{ + struct ieee80211_frame *hdr = iobuf->data; + struct ieee80211_llc_snap_header *lhdr = + ( void * ) hdr + IEEE80211_TYP_FRAME_HEADER_LEN; + + /* Bunch of sanity checks */ + if ( iob_len ( iobuf ) < IEEE80211_TYP_FRAME_HEADER_LEN + + IEEE80211_LLC_HEADER_LEN ) { + DBGC ( netdev->priv, "802.11 %p packet too short (%zd bytes)\n", + netdev->priv, iob_len ( iobuf ) ); + return -EINVAL_PKT_TOO_SHORT; + } + + if ( ( hdr->fc & IEEE80211_FC_VERSION ) != IEEE80211_THIS_VERSION ) { + DBGC ( netdev->priv, "802.11 %p packet invalid version %04x\n", + netdev->priv, hdr->fc & IEEE80211_FC_VERSION ); + return -EINVAL_PKT_VERSION; + } + + if ( ( hdr->fc & IEEE80211_FC_TYPE ) != IEEE80211_TYPE_DATA || + ( hdr->fc & IEEE80211_FC_SUBTYPE ) != IEEE80211_STYPE_DATA ) { + DBGC ( netdev->priv, "802.11 %p packet not data/data (fc=%04x)\n", + netdev->priv, hdr->fc ); + return -EINVAL_PKT_NOT_DATA; + } + + if ( ( hdr->fc & ( IEEE80211_FC_TODS | IEEE80211_FC_FROMDS ) ) != + IEEE80211_FC_FROMDS ) { + DBGC ( netdev->priv, "802.11 %p packet not from DS (fc=%04x)\n", + netdev->priv, hdr->fc ); + return -EINVAL_PKT_NOT_FROMDS; + } + + if ( lhdr->dsap != IEEE80211_LLC_DSAP || lhdr->ssap != IEEE80211_LLC_SSAP || + lhdr->ctrl != IEEE80211_LLC_CTRL || lhdr->oui[0] || lhdr->oui[1] || + lhdr->oui[2] ) { + DBGC ( netdev->priv, "802.11 %p LLC header is not plain EtherType " + "encapsulator: %02x->%02x [%02x] %02x:%02x:%02x %04x\n", + netdev->priv, lhdr->dsap, lhdr->ssap, lhdr->ctrl, + lhdr->oui[0], lhdr->oui[1], lhdr->oui[2], lhdr->ethertype ); + return -EINVAL_PKT_LLC_HEADER; + } + + iob_pull ( iobuf, sizeof ( *hdr ) + sizeof ( *lhdr ) ); + + *ll_dest = hdr->addr1; + *ll_source = hdr->addr3; + *net_proto = lhdr->ethertype; + return 0; +} + +/** 802.11 link-layer protocol */ +static struct ll_protocol net80211_ll_protocol __ll_protocol = { + .name = "802.11", + .push = net80211_ll_push, + .pull = net80211_ll_pull, + .init_addr = eth_init_addr, + .ntoa = eth_ntoa, + .mc_hash = eth_mc_hash, + .eth_addr = eth_eth_addr, + .ll_proto = htons ( ARPHRD_ETHER ), /* "encapsulated Ethernet" */ + .hw_addr_len = ETH_ALEN, + .ll_addr_len = ETH_ALEN, + .ll_header_len = IEEE80211_TYP_FRAME_HEADER_LEN + + IEEE80211_LLC_HEADER_LEN, +}; + + +/* ---------- 802.11 network management API ---------- */ + +/** + * Get 802.11 device from wrapping network device + * + * @v netdev Wrapping network device + * @ret dev 802.11 device wrapped by network device, or NULL + * + * Returns NULL if the network device does not wrap an 802.11 device. + */ +struct net80211_device * net80211_get ( struct net_device *netdev ) +{ + struct net80211_device *dev; + + list_for_each_entry ( dev, &net80211_devices, list ) { + if ( netdev->priv == dev ) + return netdev->priv; + } + + return NULL; +} + +/** + * Set state of 802.11 device keeping management frames + * + * @v dev 802.11 device + * @v enable Whether to keep management frames + * @ret oldenab Whether management frames were enabled before this call + * + * If enable is TRUE, beacon, probe, and action frames will be kept + * and may be retrieved by calling net80211_mgmt_dequeue(). + */ +int net80211_keep_mgmt ( struct net80211_device *dev, int enable ) +{ + int oldenab = dev->keep_mgmt; + + dev->keep_mgmt = enable; + return oldenab; +} + +/** + * Get 802.11 management frame + * + * @v dev 802.11 device + * @ret signal Signal strength of returned management frame + * @ret iob I/O buffer, or NULL if no management frame is queued + * + * Frames will only be returned by this function if + * net80211_keep_mgmt() has been previously called with enable set to + * TRUE. + * + * The calling function takes ownership of the returned I/O buffer. + */ +struct io_buffer * net80211_mgmt_dequeue ( struct net80211_device *dev, + int *signal ) +{ + struct io_buffer *iobuf; + struct net80211_rx_info *rxi; + + list_for_each_entry ( rxi, &dev->mgmt_info_queue, list ) { + list_del ( &rxi->list ); + if ( signal ) + *signal = rxi->signal; + free ( rxi ); + + list_for_each_entry ( iobuf, &dev->mgmt_queue, list ) { + list_del ( &iobuf->list ); + return iobuf; + } + assert ( 0 ); + } + + return NULL; +} + +/** + * Transmit 802.11 management frame + * + * @v dev 802.11 device + * @v fc Frame Control flags for management frame + * @v dest Destination access point + * @v iob I/O buffer + * @ret rc Return status code + * + * The @a fc argument must contain at least an IEEE 802.11 management + * subtype number (e.g. IEEE80211_STYPE_PROBE_REQ). If it contains + * IEEE80211_FC_PROTECTED, the frame will be encrypted prior to + * transmission. + * + * It is required that @a iob have at least 24 bytes of headroom + * reserved before its data start. + */ +int net80211_tx_mgmt ( struct net80211_device *dev, u16 fc, u8 dest[6], + struct io_buffer *iob ) +{ + struct ieee80211_frame *hdr = iob_push ( iob, + IEEE80211_TYP_FRAME_HEADER_LEN ); + + hdr->fc = IEEE80211_THIS_VERSION | IEEE80211_TYPE_MGMT | + ( fc & ~IEEE80211_FC_PROTECTED ); + hdr->duration = net80211_duration ( dev, 10, dev->rates[dev->rate] ); + hdr->seq = IEEE80211_MAKESEQ ( ++dev->last_tx_seqnr, 0 ); + + memcpy ( hdr->addr1, dest, ETH_ALEN ); /* DA = RA */ + memcpy ( hdr->addr2, dev->netdev->ll_addr, ETH_ALEN ); /* SA = TA */ + memcpy ( hdr->addr3, dest, ETH_ALEN ); /* BSSID */ + + if ( fc & IEEE80211_FC_PROTECTED ) { + if ( ! dev->crypto ) + return -EINVAL_CRYPTO_REQUEST; + + struct io_buffer *eiob = dev->crypto->encrypt ( dev->crypto, + iob ); + free_iob ( iob ); + iob = eiob; + } + + return netdev_tx ( dev->netdev, iob ); +} + + +/* ---------- Driver API ---------- */ + +/** + * Allocate 802.11 device + * + * @v priv_size Size of driver-private allocation area + * @ret dev Newly allocated 802.11 device + * + * This function allocates a net_device with space in its private area + * for both the net80211_device it will wrap and the driver-private + * data space requested. It initializes the link-layer-specific parts + * of the net_device, and links the net80211_device to the net_device + * appropriately. + */ +struct net80211_device * net80211_alloc ( size_t priv_size ) +{ + struct net80211_device *dev; + struct net_device *netdev = + alloc_netdev ( sizeof ( *dev ) + priv_size ); + + if ( ! netdev ) + return NULL; + + netdev->ll_protocol = &net80211_ll_protocol; + netdev->ll_broadcast = net80211_ll_broadcast; + netdev->max_pkt_len = IEEE80211_MAX_DATA_LEN; + netdev_init ( netdev, &net80211_netdev_ops ); + + dev = netdev->priv; + dev->netdev = netdev; + dev->priv = ( u8 * ) dev + sizeof ( *dev ); + dev->op = &net80211_null_ops; + + process_init_stopped ( &dev->proc_assoc, net80211_step_associate, + &netdev->refcnt ); + INIT_LIST_HEAD ( &dev->mgmt_queue ); + INIT_LIST_HEAD ( &dev->mgmt_info_queue ); + + return dev; +} + +/** + * Register 802.11 device with network stack + * + * @v dev 802.11 device + * @v ops 802.11 device operations + * @v hw 802.11 hardware information + * + * This also registers the wrapping net_device with the higher network + * layers. + */ +int net80211_register ( struct net80211_device *dev, + struct net80211_device_operations *ops, + struct net80211_hw_info *hw ) +{ + dev->op = ops; + dev->hw = malloc ( sizeof ( *hw ) ); + if ( ! dev->hw ) + return -ENOMEM; + + memcpy ( dev->hw, hw, sizeof ( *hw ) ); + memcpy ( dev->netdev->hw_addr, hw->hwaddr, ETH_ALEN ); + + /* Set some sensible channel defaults for driver's open() function */ + memcpy ( dev->channels, dev->hw->channels, + NET80211_MAX_CHANNELS * sizeof ( dev->channels[0] ) ); + dev->channel = 0; + + list_add_tail ( &dev->list, &net80211_devices ); + return register_netdev ( dev->netdev ); +} + +/** + * Unregister 802.11 device from network stack + * + * @v dev 802.11 device + * + * After this call, the device operations are cleared so that they + * will not be called. + */ +void net80211_unregister ( struct net80211_device *dev ) +{ + unregister_netdev ( dev->netdev ); + list_del ( &dev->list ); + dev->op = &net80211_null_ops; +} + +/** + * Free 802.11 device + * + * @v dev 802.11 device + * + * The device should be unregistered before this function is called. + */ +void net80211_free ( struct net80211_device *dev ) +{ + free ( dev->hw ); + rc80211_free ( dev->rctl ); + netdev_nullify ( dev->netdev ); + netdev_put ( dev->netdev ); +} + + +/* ---------- 802.11 network management workhorse code ---------- */ + +/** + * Set state of 802.11 device + * + * @v dev 802.11 device + * @v clear Bitmask of flags to clear + * @v set Bitmask of flags to set + * @v status Status or reason code for most recent operation + * + * If @a status represents a reason code, it should be OR'ed with + * NET80211_IS_REASON. + * + * Clearing authentication also clears association; clearing + * association also clears security handshaking state. Clearing + * association removes the link-up flag from the wrapping net_device, + * but setting it does not automatically set the flag; that is left to + * the judgment of higher-level code. + */ +static inline void net80211_set_state ( struct net80211_device *dev, + short clear, short set, + u16 status ) +{ + /* The conditions in this function are deliberately formulated + to be decidable at compile-time in most cases. Since clear + and set are generally passed as constants, the body of this + function can be reduced down to a few statements by the + compiler. */ + + const int statmsk = NET80211_STATUS_MASK | NET80211_IS_REASON; + + if ( clear & NET80211_PROBED ) + clear |= NET80211_AUTHENTICATED; + + if ( clear & NET80211_AUTHENTICATED ) + clear |= NET80211_ASSOCIATED; + + if ( clear & NET80211_ASSOCIATED ) + clear |= NET80211_CRYPTO_SYNCED; + + dev->state = ( dev->state & ~clear ) | set; + dev->state = ( dev->state & ~statmsk ) | ( status & statmsk ); + + if ( clear & NET80211_ASSOCIATED ) + netdev_link_down ( dev->netdev ); + + if ( ( clear | set ) & NET80211_ASSOCIATED ) + dev->op->config ( dev, NET80211_CFG_ASSOC ); + + if ( status != 0 ) { + if ( status & NET80211_IS_REASON ) + dev->assoc_rc = -E80211_REASON ( status ); + else + dev->assoc_rc = -E80211_STATUS ( status ); + netdev_link_err ( dev->netdev, dev->assoc_rc ); + } +} + +/** + * Add channels to 802.11 device + * + * @v dev 802.11 device + * @v start First channel number to add + * @v len Number of channels to add + * @v txpower TX power (dBm) to allow on added channels + * + * To replace the current list of channels instead of adding to it, + * set the nr_channels field of the 802.11 device to 0 before calling + * this function. + */ +static void net80211_add_channels ( struct net80211_device *dev, int start, + int len, int txpower ) +{ + int i, chan = start; + + for ( i = dev->nr_channels; len-- && i < NET80211_MAX_CHANNELS; i++ ) { + dev->channels[i].channel_nr = chan; + dev->channels[i].maxpower = txpower; + dev->channels[i].hw_value = 0; + + if ( chan >= 1 && chan <= 14 ) { + dev->channels[i].band = NET80211_BAND_2GHZ; + if ( chan == 14 ) + dev->channels[i].center_freq = 2484; + else + dev->channels[i].center_freq = 2407 + 5 * chan; + chan++; + } else { + dev->channels[i].band = NET80211_BAND_5GHZ; + dev->channels[i].center_freq = 5000 + 5 * chan; + chan += 4; + } + } + + dev->nr_channels = i; +} + +/** + * Filter 802.11 device channels for hardware capabilities + * + * @v dev 802.11 device + * + * Hardware may support fewer channels than regulatory restrictions + * allow; this function filters out channels in dev->channels that are + * not supported by the hardware list in dev->hwinfo. It also copies + * over the net80211_channel::hw_value and limits maximum TX power + * appropriately. + * + * Channels are matched based on center frequency, ignoring band and + * channel number. + * + * If the driver specifies no supported channels, the effect will be + * as though all were supported. + */ +static void net80211_filter_hw_channels ( struct net80211_device *dev ) +{ + int delta = 0, i = 0; + int old_freq = dev->channels[dev->channel].center_freq; + struct net80211_channel *chan, *hwchan; + + if ( ! dev->hw->nr_channels ) + return; + + dev->channel = 0; + for ( chan = dev->channels; chan < dev->channels + dev->nr_channels; + chan++, i++ ) { + int ok = 0; + for ( hwchan = dev->hw->channels; + hwchan < dev->hw->channels + dev->hw->nr_channels; + hwchan++ ) { + if ( hwchan->center_freq == chan->center_freq ) { + ok = 1; + break; + } + } + + if ( ! ok ) + delta++; + else { + chan->hw_value = hwchan->hw_value; + if ( hwchan->maxpower != 0 && + chan->maxpower > hwchan->maxpower ) + chan->maxpower = hwchan->maxpower; + if ( old_freq == chan->center_freq ) + dev->channel = i - delta; + if ( delta ) + chan[-delta] = *chan; + } + } + + dev->nr_channels -= delta; + + if ( dev->channels[dev->channel].center_freq != old_freq ) + dev->op->config ( dev, NET80211_CFG_CHANNEL ); +} + +/** + * Update 802.11 device state to reflect received capabilities field + * + * @v dev 802.11 device + * @v capab Capabilities field in beacon, probe, or association frame + * @ret rc Return status code + */ +static int net80211_process_capab ( struct net80211_device *dev, + u16 capab ) +{ + u16 old_phy = dev->phy_flags; + + if ( ( capab & ( IEEE80211_CAPAB_MANAGED | IEEE80211_CAPAB_ADHOC ) ) != + IEEE80211_CAPAB_MANAGED ) { + DBGC ( dev, "802.11 %p cannot handle IBSS network\n", dev ); + return -ENOSYS; + } + + dev->phy_flags &= ~( NET80211_PHY_USE_SHORT_PREAMBLE | + NET80211_PHY_USE_SHORT_SLOT ); + + if ( capab & IEEE80211_CAPAB_SHORT_PMBL ) + dev->phy_flags |= NET80211_PHY_USE_SHORT_PREAMBLE; + + if ( capab & IEEE80211_CAPAB_SHORT_SLOT ) + dev->phy_flags |= NET80211_PHY_USE_SHORT_SLOT; + + if ( old_phy != dev->phy_flags ) + dev->op->config ( dev, NET80211_CFG_PHY_PARAMS ); + + return 0; +} + +/** + * Update 802.11 device state to reflect received information elements + * + * @v dev 802.11 device + * @v ie Pointer to first information element + * @v ie_end Pointer to tail of packet I/O buffer + * @ret rc Return status code + */ +static int net80211_process_ie ( struct net80211_device *dev, + union ieee80211_ie *ie, void *ie_end ) +{ + u16 old_rate = dev->rates[dev->rate]; + u16 old_phy = dev->phy_flags; + int have_rates = 0, i; + int ds_channel = 0; + int changed = 0; + int band = dev->channels[dev->channel].band; + + if ( ! ieee80211_ie_bound ( ie, ie_end ) ) + return 0; + + for ( ; ie; ie = ieee80211_next_ie ( ie, ie_end ) ) { + switch ( ie->id ) { + case IEEE80211_IE_SSID: + if ( ie->len <= 32 ) { + memcpy ( dev->essid, ie->ssid, ie->len ); + dev->essid[ie->len] = 0; + } + break; + + case IEEE80211_IE_RATES: + case IEEE80211_IE_EXT_RATES: + if ( ! have_rates ) { + dev->nr_rates = 0; + dev->basic_rates = 0; + have_rates = 1; + } + for ( i = 0; i < ie->len && + dev->nr_rates < NET80211_MAX_RATES; i++ ) { + u8 rid = ie->rates[i]; + u16 rate = ( rid & 0x7f ) * 5; + + if ( rid & 0x80 ) + dev->basic_rates |= + ( 1 << dev->nr_rates ); + + dev->rates[dev->nr_rates++] = rate; + } + + break; + + case IEEE80211_IE_DS_PARAM: + if ( dev->channel < dev->nr_channels && ds_channel == + dev->channels[dev->channel].channel_nr ) + break; + ds_channel = ie->ds_param.current_channel; + net80211_change_channel ( dev, ds_channel ); + break; + + case IEEE80211_IE_COUNTRY: + dev->nr_channels = 0; + + DBGC ( dev, "802.11 %p setting country regulations " + "for %c%c\n", dev, ie->country.name[0], + ie->country.name[1] ); + for ( i = 0; i < ( ie->len - 3 ) / 3; i++ ) { + union ieee80211_ie_country_triplet *t = + &ie->country.triplet[i]; + if ( t->first > 200 ) { + DBGC ( dev, "802.11 %p ignoring regulatory " + "extension information\n", dev ); + } else { + net80211_add_channels ( dev, + t->band.first_channel, + t->band.nr_channels, + t->band.max_txpower ); + } + } + net80211_filter_hw_channels ( dev ); + break; + + case IEEE80211_IE_ERP_INFO: + dev->phy_flags &= ~( NET80211_PHY_USE_PROTECTION | + NET80211_PHY_USE_SHORT_PREAMBLE ); + if ( ie->erp_info & IEEE80211_ERP_USE_PROTECTION ) + dev->phy_flags |= NET80211_PHY_USE_PROTECTION; + if ( ! ( ie->erp_info & IEEE80211_ERP_BARKER_LONG ) ) + dev->phy_flags |= NET80211_PHY_USE_SHORT_PREAMBLE; + break; + } + } + + if ( have_rates ) { + /* Allow only those rates that are also supported by + the hardware. */ + int delta = 0, j; + + dev->rate = 0; + for ( i = 0; i < dev->nr_rates; i++ ) { + int ok = 0; + for ( j = 0; j < dev->hw->nr_rates[band]; j++ ) { + if ( dev->hw->rates[band][j] == dev->rates[i] ){ + ok = 1; + break; + } + } + + if ( ! ok ) + delta++; + else { + dev->rates[i - delta] = dev->rates[i]; + if ( old_rate == dev->rates[i] ) + dev->rate = i - delta; + } + } + + dev->nr_rates -= delta; + + /* Sort available rates - sorted subclumps tend to already + exist, so insertion sort works well. */ + for ( i = 1; i < dev->nr_rates; i++ ) { + u16 rate = dev->rates[i]; + u32 tmp, br, mask; + + for ( j = i - 1; j >= 0 && dev->rates[j] >= rate; j-- ) + dev->rates[j + 1] = dev->rates[j]; + dev->rates[j + 1] = rate; + + /* Adjust basic_rates to match by rotating the + bits from bit j+1 to bit i left one position. */ + mask = ( ( 1 << i ) - 1 ) & ~( ( 1 << ( j + 1 ) ) - 1 ); + br = dev->basic_rates; + tmp = br & ( 1 << i ); + br = ( br & ~( mask | tmp ) ) | ( ( br & mask ) << 1 ); + br |= ( tmp >> ( i - j - 1 ) ); + dev->basic_rates = br; + } + + net80211_set_rtscts_rate ( dev ); + + if ( dev->rates[dev->rate] != old_rate ) + changed |= NET80211_CFG_RATE; + } + + if ( dev->hw->flags & NET80211_HW_NO_SHORT_PREAMBLE ) + dev->phy_flags &= ~NET80211_PHY_USE_SHORT_PREAMBLE; + if ( dev->hw->flags & NET80211_HW_NO_SHORT_SLOT ) + dev->phy_flags &= ~NET80211_PHY_USE_SHORT_SLOT; + + if ( old_phy != dev->phy_flags ) + changed |= NET80211_CFG_PHY_PARAMS; + + if ( changed ) + dev->op->config ( dev, changed ); + + return 0; +} + +/** + * Create information elements for outgoing probe or association packet + * + * @v dev 802.11 device + * @v ie Pointer to start of information element area + * @ret next_ie Pointer to first byte after added information elements + */ +static union ieee80211_ie * +net80211_marshal_request_info ( struct net80211_device *dev, + union ieee80211_ie *ie ) +{ + int i; + + ie->id = IEEE80211_IE_SSID; + ie->len = strlen ( dev->essid ); + memcpy ( ie->ssid, dev->essid, ie->len ); + + ie = ieee80211_next_ie ( ie, NULL ); + + ie->id = IEEE80211_IE_RATES; + ie->len = dev->nr_rates; + if ( ie->len > 8 ) + ie->len = 8; + + for ( i = 0; i < ie->len; i++ ) { + ie->rates[i] = dev->rates[i] / 5; + if ( dev->basic_rates & ( 1 << i ) ) + ie->rates[i] |= 0x80; + } + + ie = ieee80211_next_ie ( ie, NULL ); + + if ( dev->rsn_ie && dev->rsn_ie->id == IEEE80211_IE_RSN ) { + memcpy ( ie, dev->rsn_ie, dev->rsn_ie->len + 2 ); + ie = ieee80211_next_ie ( ie, NULL ); + } + + if ( dev->nr_rates > 8 ) { + /* 802.11 requires we use an Extended Basic Rates IE + for the rates beyond the eighth. */ + + ie->id = IEEE80211_IE_EXT_RATES; + ie->len = dev->nr_rates - 8; + + for ( ; i < dev->nr_rates; i++ ) { + ie->rates[i - 8] = dev->rates[i] / 5; + if ( dev->basic_rates & ( 1 << i ) ) + ie->rates[i - 8] |= 0x80; + } + + ie = ieee80211_next_ie ( ie, NULL ); + } + + if ( dev->rsn_ie && dev->rsn_ie->id == IEEE80211_IE_VENDOR ) { + memcpy ( ie, dev->rsn_ie, dev->rsn_ie->len + 2 ); + ie = ieee80211_next_ie ( ie, NULL ); + } + + return ie; +} + +/** Seconds to wait after finding a network, to possibly find better APs for it + * + * This is used when a specific SSID to scan for is specified. + */ +#define NET80211_PROBE_GATHER 1 + +/** Seconds to wait after finding a network, to possibly find other networks + * + * This is used when an empty SSID is specified, to scan for all + * networks. + */ +#define NET80211_PROBE_GATHER_ALL 2 + +/** Seconds to allow a probe to take if no network has been found */ +#define NET80211_PROBE_TIMEOUT 6 + +/** + * Begin probe of 802.11 networks + * + * @v dev 802.11 device + * @v essid SSID to probe for, or "" to accept any (may not be NULL) + * @v active Whether to use active scanning + * @ret ctx Probe context + * + * Active scanning may only be used on channels 1-11 in the 2.4GHz + * band, due to gPXE's lack of a complete regulatory database. If + * active scanning is used, probe packets will be sent on each + * channel; this can allow association with hidden-SSID networks if + * the SSID is properly specified. + * + * A @c NULL return indicates an out-of-memory condition. + * + * The returned context must be periodically passed to + * net80211_probe_step() until that function returns zero. + */ +struct net80211_probe_ctx * net80211_probe_start ( struct net80211_device *dev, + const char *essid, + int active ) +{ + struct net80211_probe_ctx *ctx = zalloc ( sizeof ( *ctx ) ); + + if ( ! ctx ) + return NULL; + + assert ( dev->netdev->state & NETDEV_OPEN ); + + ctx->dev = dev; + ctx->old_keep_mgmt = net80211_keep_mgmt ( dev, 1 ); + ctx->essid = essid; + if ( dev->essid != ctx->essid ) + strcpy ( dev->essid, ctx->essid ); + + if ( active ) { + struct ieee80211_probe_req *probe_req; + union ieee80211_ie *ie; + + ctx->probe = alloc_iob ( 128 ); + iob_reserve ( ctx->probe, IEEE80211_TYP_FRAME_HEADER_LEN ); + probe_req = ctx->probe->data; + + ie = net80211_marshal_request_info ( dev, + probe_req->info_element ); + + iob_put ( ctx->probe, ( void * ) ie - ctx->probe->data ); + } + + ctx->ticks_start = currticks(); + ctx->ticks_beacon = 0; + ctx->ticks_channel = currticks(); + ctx->hop_time = ticks_per_sec() / ( active ? 2 : 6 ); + + /* + * Channels on 2.4GHz overlap, and the most commonly used + * are 1, 6, and 11. We'll get a result faster if we check + * every 5 channels, but in order to hit all of them the + * number of channels must be relatively prime to 5. If it's + * not, tweak the hop. + */ + ctx->hop_step = 5; + while ( dev->nr_channels % ctx->hop_step == 0 && ctx->hop_step > 1 ) + ctx->hop_step--; + + ctx->beacons = malloc ( sizeof ( *ctx->beacons ) ); + INIT_LIST_HEAD ( ctx->beacons ); + + dev->channel = 0; + dev->op->config ( dev, NET80211_CFG_CHANNEL ); + + return ctx; +} + +/** + * Continue probe of 802.11 networks + * + * @v ctx Probe context returned by net80211_probe_start() + * @ret rc Probe status + * + * The return code will be 0 if the probe is still going on (and this + * function should be called again), a positive number if the probe + * completed successfully, or a negative error code if the probe + * failed for that reason. + * + * Whether the probe succeeded or failed, you must call + * net80211_probe_finish_all() or net80211_probe_finish_best() + * (depending on whether you want information on all networks or just + * the best-signal one) in order to release the probe context. A + * failed probe may still have acquired some valid data. + */ +int net80211_probe_step ( struct net80211_probe_ctx *ctx ) +{ + struct net80211_device *dev = ctx->dev; + u32 start_timeout = NET80211_PROBE_TIMEOUT * ticks_per_sec(); + u32 gather_timeout = ticks_per_sec(); + u32 now = currticks(); + struct io_buffer *iob; + int signal; + int rc; + char ssid[IEEE80211_MAX_SSID_LEN + 1]; + + gather_timeout *= ( ctx->essid[0] ? NET80211_PROBE_GATHER : + NET80211_PROBE_GATHER_ALL ); + + /* Time out if necessary */ + if ( now >= ctx->ticks_start + start_timeout ) + return list_empty ( ctx->beacons ) ? -ETIMEDOUT : +1; + + if ( ctx->ticks_beacon > 0 && now >= ctx->ticks_start + gather_timeout ) + return +1; + + /* Change channels if necessary */ + if ( now >= ctx->ticks_channel + ctx->hop_time ) { + dev->channel = ( dev->channel + ctx->hop_step ) + % dev->nr_channels; + dev->op->config ( dev, NET80211_CFG_CHANNEL ); + udelay ( dev->hw->channel_change_time ); + + ctx->ticks_channel = now; + + if ( ctx->probe ) { + struct io_buffer *siob = ctx->probe; /* to send */ + + /* make a copy for future use */ + iob = alloc_iob ( siob->tail - siob->head ); + iob_reserve ( iob, iob_headroom ( siob ) ); + memcpy ( iob_put ( iob, iob_len ( siob ) ), + siob->data, iob_len ( siob ) ); + + ctx->probe = iob; + rc = net80211_tx_mgmt ( dev, IEEE80211_STYPE_PROBE_REQ, + net80211_ll_broadcast, + iob_disown ( siob ) ); + if ( rc ) { + DBGC ( dev, "802.11 %p send probe failed: " + "%s\n", dev, strerror ( rc ) ); + return rc; + } + } + } + + /* Check for new management packets */ + while ( ( iob = net80211_mgmt_dequeue ( dev, &signal ) ) != NULL ) { + struct ieee80211_frame *hdr; + struct ieee80211_beacon *beacon; + union ieee80211_ie *ie; + struct net80211_wlan *wlan; + u16 type; + + hdr = iob->data; + type = hdr->fc & IEEE80211_FC_SUBTYPE; + beacon = ( struct ieee80211_beacon * ) hdr->data; + + if ( type != IEEE80211_STYPE_BEACON && + type != IEEE80211_STYPE_PROBE_RESP ) { + DBGC2 ( dev, "802.11 %p probe: non-beacon\n", dev ); + goto drop; + } + + if ( ( void * ) beacon->info_element >= iob->tail ) { + DBGC ( dev, "802.11 %p probe: beacon with no IEs\n", + dev ); + goto drop; + } + + ie = beacon->info_element; + + if ( ! ieee80211_ie_bound ( ie, iob->tail ) ) + ie = NULL; + + while ( ie && ie->id != IEEE80211_IE_SSID ) + ie = ieee80211_next_ie ( ie, iob->tail ); + + if ( ! ie ) { + DBGC ( dev, "802.11 %p probe: beacon with no SSID\n", + dev ); + goto drop; + } + + memcpy ( ssid, ie->ssid, ie->len ); + ssid[ie->len] = 0; + + if ( ctx->essid[0] && strcmp ( ctx->essid, ssid ) != 0 ) { + DBGC2 ( dev, "802.11 %p probe: beacon with wrong SSID " + "(%s)\n", dev, ssid ); + goto drop; + } + + /* See if we've got an entry for this network */ + list_for_each_entry ( wlan, ctx->beacons, list ) { + if ( strcmp ( wlan->essid, ssid ) != 0 ) + continue; + + if ( signal < wlan->signal ) { + DBGC2 ( dev, "802.11 %p probe: beacon for %s " + "(%s) with weaker signal %d\n", dev, + ssid, eth_ntoa ( hdr->addr3 ), signal ); + goto drop; + } + + goto fill; + } + + /* No entry yet - make one */ + wlan = zalloc ( sizeof ( *wlan ) ); + strcpy ( wlan->essid, ssid ); + list_add_tail ( &wlan->list, ctx->beacons ); + + /* Whether we're using an old entry or a new one, fill + it with new data. */ + fill: + memcpy ( wlan->bssid, hdr->addr3, ETH_ALEN ); + wlan->signal = signal; + wlan->channel = dev->channels[dev->channel].channel_nr; + + /* Copy this I/O buffer into a new wlan->beacon; the + * iob we've got probably came from the device driver + * and may have the full 2.4k allocation, which we + * don't want to keep around wasting memory. + */ + free_iob ( wlan->beacon ); + wlan->beacon = alloc_iob ( iob_len ( iob ) ); + memcpy ( iob_put ( wlan->beacon, iob_len ( iob ) ), + iob->data, iob_len ( iob ) ); + + if ( ( rc = sec80211_detect ( wlan->beacon, &wlan->handshaking, + &wlan->crypto ) ) == -ENOTSUP ) { + struct ieee80211_beacon *beacon = + ( struct ieee80211_beacon * ) hdr->data; + + if ( beacon->capability & IEEE80211_CAPAB_PRIVACY ) { + DBG ( "802.11 %p probe: secured network %s but " + "encryption support not compiled in\n", + dev, wlan->essid ); + wlan->handshaking = NET80211_SECPROT_UNKNOWN; + wlan->crypto = NET80211_CRYPT_UNKNOWN; + } else { + wlan->handshaking = NET80211_SECPROT_NONE; + wlan->crypto = NET80211_CRYPT_NONE; + } + } else if ( rc != 0 ) { + DBGC ( dev, "802.11 %p probe warning: network " + "%s with unidentifiable security " + "settings: %s\n", dev, wlan->essid, + strerror ( rc ) ); + } + + ctx->ticks_beacon = now; + + DBGC2 ( dev, "802.11 %p probe: good beacon for %s (%s)\n", + dev, wlan->essid, eth_ntoa ( wlan->bssid ) ); + + drop: + free_iob ( iob ); + } + + return 0; +} + + +/** + * Finish probe of 802.11 networks, returning best-signal network found + * + * @v ctx Probe context + * @ret wlan Best-signal network found, or @c NULL if none were found + * + * If net80211_probe_start() was called with a particular SSID + * parameter as filter, only a network with that SSID (matching + * case-sensitively) can be returned from this function. + */ +struct net80211_wlan * +net80211_probe_finish_best ( struct net80211_probe_ctx *ctx ) +{ + struct net80211_wlan *best = NULL, *wlan; + + if ( ! ctx ) + return NULL; + + list_for_each_entry ( wlan, ctx->beacons, list ) { + if ( ! best || best->signal < wlan->signal ) + best = wlan; + } + + if ( best ) + list_del ( &best->list ); + else + DBGC ( ctx->dev, "802.11 %p probe: found nothing for '%s'\n", + ctx->dev, ctx->essid ); + + net80211_free_wlanlist ( ctx->beacons ); + + net80211_keep_mgmt ( ctx->dev, ctx->old_keep_mgmt ); + + if ( ctx->probe ) + free_iob ( ctx->probe ); + + free ( ctx ); + + return best; +} + + +/** + * Finish probe of 802.11 networks, returning all networks found + * + * @v ctx Probe context + * @ret list List of net80211_wlan detailing networks found + * + * If net80211_probe_start() was called with a particular SSID + * parameter as filter, this will always return either an empty or a + * one-element list. + */ +struct list_head *net80211_probe_finish_all ( struct net80211_probe_ctx *ctx ) +{ + struct list_head *beacons = ctx->beacons; + + if ( ! ctx ) + return NULL; + + net80211_keep_mgmt ( ctx->dev, ctx->old_keep_mgmt ); + + if ( ctx->probe ) + free_iob ( ctx->probe ); + + free ( ctx ); + + return beacons; +} + + +/** + * Free WLAN structure + * + * @v wlan WLAN structure to free + */ +void net80211_free_wlan ( struct net80211_wlan *wlan ) +{ + if ( wlan ) { + free_iob ( wlan->beacon ); + free ( wlan ); + } +} + + +/** + * Free list of WLAN structures + * + * @v list List of WLAN structures to free + */ +void net80211_free_wlanlist ( struct list_head *list ) +{ + struct net80211_wlan *wlan, *tmp; + + if ( ! list ) + return; + + list_for_each_entry_safe ( wlan, tmp, list, list ) { + list_del ( &wlan->list ); + net80211_free_wlan ( wlan ); + } + + free ( list ); +} + + +/** Number of ticks to wait for replies to association management frames */ +#define ASSOC_TIMEOUT TICKS_PER_SEC + +/** Number of times to try sending a particular association management frame */ +#define ASSOC_RETRIES 2 + +/** + * Step 802.11 association process + * + * @v proc Association process + */ +static void net80211_step_associate ( struct process *proc ) +{ + struct net80211_device *dev = + container_of ( proc, struct net80211_device, proc_assoc ); + int rc = 0; + int status = dev->state & NET80211_STATUS_MASK; + + /* + * We use a sort of state machine implemented using bits in + * the dev->state variable. At each call, we take the + * logically first step that has not yet succeeded; either it + * has not been tried yet, it's being retried, or it failed. + * If it failed, we return an error indication; otherwise we + * perform the step. If it succeeds, RX handling code will set + * the appropriate status bit for us. + * + * Probe works a bit differently, since we have to step it + * on every call instead of waiting for a packet to arrive + * that will set the completion bit for us. + */ + + /* If we're waiting for a reply, check for timeout condition */ + if ( dev->state & NET80211_WAITING ) { + /* Sanity check */ + if ( ! dev->associating ) + return; + + if ( currticks() - dev->ctx.assoc->last_packet > ASSOC_TIMEOUT ) { + /* Timed out - fail if too many retries, or retry */ + dev->ctx.assoc->times_tried++; + if ( ++dev->ctx.assoc->times_tried > ASSOC_RETRIES ) { + rc = -ETIMEDOUT; + goto fail; + } + } else { + /* Didn't time out - let it keep going */ + return; + } + } else { + if ( dev->state & NET80211_PROBED ) + dev->ctx.assoc->times_tried = 0; + } + + if ( ! ( dev->state & NET80211_PROBED ) ) { + /* state: probe */ + + if ( ! dev->ctx.probe ) { + /* start probe */ + int active = fetch_intz_setting ( NULL, + &net80211_active_setting ); + int band = dev->hw->bands; + + if ( active ) + band &= ~NET80211_BAND_BIT_5GHZ; + + rc = net80211_prepare_probe ( dev, band, active ); + if ( rc ) + goto fail; + + dev->ctx.probe = net80211_probe_start ( dev, dev->essid, + active ); + if ( ! dev->ctx.probe ) { + dev->assoc_rc = -ENOMEM; + goto fail; + } + } + + rc = net80211_probe_step ( dev->ctx.probe ); + if ( ! rc ) { + return; /* still going */ + } + + dev->associating = net80211_probe_finish_best ( dev->ctx.probe ); + dev->ctx.probe = NULL; + if ( ! dev->associating ) { + if ( rc > 0 ) /* "successful" probe found nothing */ + rc = -ETIMEDOUT; + goto fail; + } + + /* If we probed using a broadcast SSID, record that + fact for the settings applicator before we clobber + it with the specific SSID we've chosen. */ + if ( ! dev->essid[0] ) + dev->state |= NET80211_AUTO_SSID; + + DBGC ( dev, "802.11 %p found network %s (%s)\n", dev, + dev->associating->essid, + eth_ntoa ( dev->associating->bssid ) ); + + dev->ctx.assoc = zalloc ( sizeof ( *dev->ctx.assoc ) ); + if ( ! dev->ctx.assoc ) { + rc = -ENOMEM; + goto fail; + } + + dev->state |= NET80211_PROBED; + dev->ctx.assoc->method = IEEE80211_AUTH_OPEN_SYSTEM; + + return; + } + + /* Record time of sending the packet we're about to send, for timeout */ + dev->ctx.assoc->last_packet = currticks(); + + if ( ! ( dev->state & NET80211_AUTHENTICATED ) ) { + /* state: prepare and authenticate */ + + if ( status != IEEE80211_STATUS_SUCCESS ) { + /* we tried authenticating already, but failed */ + int method = dev->ctx.assoc->method; + + if ( method == IEEE80211_AUTH_OPEN_SYSTEM && + ( status == IEEE80211_STATUS_AUTH_CHALL_INVALID || + status == IEEE80211_STATUS_AUTH_ALGO_UNSUPP ) ) { + /* Maybe this network uses Shared Key? */ + dev->ctx.assoc->method = + IEEE80211_AUTH_SHARED_KEY; + } else { + goto fail; + } + } + + DBGC ( dev, "802.11 %p authenticating with method %d\n", dev, + dev->ctx.assoc->method ); + + rc = net80211_prepare_assoc ( dev, dev->associating ); + if ( rc ) + goto fail; + + rc = net80211_send_auth ( dev, dev->associating, + dev->ctx.assoc->method ); + if ( rc ) + goto fail; + + return; + } + + if ( ! ( dev->state & NET80211_ASSOCIATED ) ) { + /* state: associate */ + + if ( status != IEEE80211_STATUS_SUCCESS ) + goto fail; + + DBGC ( dev, "802.11 %p associating\n", dev ); + + if ( dev->handshaker && dev->handshaker->start && + ! dev->handshaker->started ) { + rc = dev->handshaker->start ( dev ); + if ( rc < 0 ) + goto fail; + dev->handshaker->started = 1; + } + + rc = net80211_send_assoc ( dev, dev->associating ); + if ( rc ) + goto fail; + + return; + } + + if ( ! ( dev->state & NET80211_CRYPTO_SYNCED ) ) { + /* state: crypto sync */ + DBGC ( dev, "802.11 %p security handshaking\n", dev ); + + if ( ! dev->handshaker || ! dev->handshaker->step ) { + dev->state |= NET80211_CRYPTO_SYNCED; + return; + } + + rc = dev->handshaker->step ( dev ); + + if ( rc < 0 ) { + /* Only record the returned error if we're + still marked as associated, because an + asynchronous error will have already been + reported to net80211_deauthenticate() and + assoc_rc thereby set. */ + if ( dev->state & NET80211_ASSOCIATED ) + dev->assoc_rc = rc; + rc = 0; + goto fail; + } + + if ( rc > 0 ) { + dev->assoc_rc = 0; + dev->state |= NET80211_CRYPTO_SYNCED; + } + return; + } + + /* state: done! */ + netdev_link_up ( dev->netdev ); + dev->assoc_rc = 0; + dev->state &= ~NET80211_WORKING; + + free ( dev->ctx.assoc ); + dev->ctx.assoc = NULL; + + net80211_free_wlan ( dev->associating ); + dev->associating = NULL; + + dev->rctl = rc80211_init ( dev ); + + process_del ( proc ); + + DBGC ( dev, "802.11 %p associated with %s (%s)\n", dev, + dev->essid, eth_ntoa ( dev->bssid ) ); + + return; + + fail: + dev->state &= ~( NET80211_WORKING | NET80211_WAITING ); + if ( rc ) + dev->assoc_rc = rc; + + netdev_link_err ( dev->netdev, dev->assoc_rc ); + + /* We never reach here from the middle of a probe, so we don't + need to worry about freeing dev->ctx.probe. */ + + if ( dev->state & NET80211_PROBED ) { + free ( dev->ctx.assoc ); + dev->ctx.assoc = NULL; + } + + net80211_free_wlan ( dev->associating ); + dev->associating = NULL; + + process_del ( proc ); + + DBGC ( dev, "802.11 %p association failed (state=%04x): " + "%s\n", dev, dev->state, strerror ( dev->assoc_rc ) ); + + /* Try it again: */ + net80211_autoassociate ( dev ); +} + +/** + * Check for 802.11 SSID or key updates + * + * This acts as a settings applicator; if the user changes netX/ssid, + * and netX is currently open, the association task will be invoked + * again. If the user changes the encryption key, the current security + * handshaker will be asked to update its state to match; if that is + * impossible without reassociation, we reassociate. + */ +static int net80211_check_settings_update ( void ) +{ + struct net80211_device *dev; + char ssid[IEEE80211_MAX_SSID_LEN + 1]; + int key_reassoc; + + list_for_each_entry ( dev, &net80211_devices, list ) { + if ( ! ( dev->netdev->state & NETDEV_OPEN ) ) + continue; + + key_reassoc = 0; + if ( dev->handshaker && dev->handshaker->change_key && + dev->handshaker->change_key ( dev ) < 0 ) + key_reassoc = 1; + + fetch_string_setting ( netdev_settings ( dev->netdev ), + &net80211_ssid_setting, ssid, + IEEE80211_MAX_SSID_LEN + 1 ); + + if ( key_reassoc || + ( ! ( ! ssid[0] && ( dev->state & NET80211_AUTO_SSID ) ) && + strcmp ( ssid, dev->essid ) != 0 ) ) { + DBGC ( dev, "802.11 %p updating association: " + "%s -> %s\n", dev, dev->essid, ssid ); + net80211_autoassociate ( dev ); + } + } + + return 0; +} + +/** + * Start 802.11 association process + * + * @v dev 802.11 device + * + * If the association process is running, it will be restarted. + */ +void net80211_autoassociate ( struct net80211_device *dev ) +{ + if ( ! ( dev->state & NET80211_WORKING ) ) { + DBGC2 ( dev, "802.11 %p spawning association process\n", dev ); + process_add ( &dev->proc_assoc ); + } else { + DBGC2 ( dev, "802.11 %p restarting association\n", dev ); + } + + /* Clean up everything an earlier association process might + have been in the middle of using */ + if ( dev->associating ) + net80211_free_wlan ( dev->associating ); + + if ( ! ( dev->state & NET80211_PROBED ) ) + net80211_free_wlan ( + net80211_probe_finish_best ( dev->ctx.probe ) ); + else + free ( dev->ctx.assoc ); + + /* Reset to a clean state */ + fetch_string_setting ( netdev_settings ( dev->netdev ), + &net80211_ssid_setting, dev->essid, + IEEE80211_MAX_SSID_LEN + 1 ); + dev->ctx.probe = NULL; + dev->associating = NULL; + dev->assoc_rc = 0; + net80211_set_state ( dev, NET80211_PROBED, NET80211_WORKING, 0 ); +} + +/** + * Pick TX rate for RTS/CTS packets based on data rate + * + * @v dev 802.11 device + * + * The RTS/CTS rate is the fastest TX rate marked as "basic" that is + * not faster than the data rate. + */ +static void net80211_set_rtscts_rate ( struct net80211_device *dev ) +{ + u16 datarate = dev->rates[dev->rate]; + u16 rtsrate = 0; + int rts_idx = -1; + int i; + + for ( i = 0; i < dev->nr_rates; i++ ) { + u16 rate = dev->rates[i]; + + if ( ! ( dev->basic_rates & ( 1 << i ) ) || rate > datarate ) + continue; + + if ( rate > rtsrate ) { + rtsrate = rate; + rts_idx = i; + } + } + + /* If this is in initialization, we might not have any basic + rates; just use the first data rate in that case. */ + if ( rts_idx < 0 ) + rts_idx = 0; + + dev->rtscts_rate = rts_idx; +} + +/** + * Set data transmission rate for 802.11 device + * + * @v dev 802.11 device + * @v rate Rate to set, as index into @c dev->rates array + */ +void net80211_set_rate_idx ( struct net80211_device *dev, int rate ) +{ + assert ( dev->netdev->state & NETDEV_OPEN ); + + if ( rate >= 0 && rate < dev->nr_rates && rate != dev->rate ) { + DBGC2 ( dev, "802.11 %p changing rate from %d->%d Mbps\n", + dev, dev->rates[dev->rate] / 10, + dev->rates[rate] / 10 ); + + dev->rate = rate; + net80211_set_rtscts_rate ( dev ); + dev->op->config ( dev, NET80211_CFG_RATE ); + } +} + +/** + * Configure 802.11 device to transmit on a certain channel + * + * @v dev 802.11 device + * @v channel Channel number (1-11 for 2.4GHz) to transmit on + */ +int net80211_change_channel ( struct net80211_device *dev, int channel ) +{ + int i, oldchan = dev->channel; + + assert ( dev->netdev->state & NETDEV_OPEN ); + + for ( i = 0; i < dev->nr_channels; i++ ) { + if ( dev->channels[i].channel_nr == channel ) { + dev->channel = i; + break; + } + } + + if ( i == dev->nr_channels ) + return -ENOENT; + + if ( i != oldchan ) + return dev->op->config ( dev, NET80211_CFG_CHANNEL ); + + return 0; +} + +/** + * Prepare 802.11 device channel and rate set for scanning + * + * @v dev 802.11 device + * @v band RF band(s) on which to prepare for scanning + * @v active Whether the scanning will be active + * @ret rc Return status code + */ +int net80211_prepare_probe ( struct net80211_device *dev, int band, + int active ) +{ + assert ( dev->netdev->state & NETDEV_OPEN ); + + if ( active && ( band & NET80211_BAND_BIT_5GHZ ) ) { + DBGC ( dev, "802.11 %p cannot perform active scanning on " + "5GHz band\n", dev ); + return -EINVAL_ACTIVE_SCAN; + } + + if ( band == 0 ) { + /* This can happen for a 5GHz-only card with 5GHz + scanning masked out by an active request. */ + DBGC ( dev, "802.11 %p asked to prepare for scanning nothing\n", + dev ); + return -EINVAL_ACTIVE_SCAN; + } + + dev->nr_channels = 0; + + if ( active ) + net80211_add_channels ( dev, 1, 11, NET80211_REG_TXPOWER ); + else { + if ( band & NET80211_BAND_BIT_2GHZ ) + net80211_add_channels ( dev, 1, 14, + NET80211_REG_TXPOWER ); + if ( band & NET80211_BAND_BIT_5GHZ ) + net80211_add_channels ( dev, 36, 8, + NET80211_REG_TXPOWER ); + } + + net80211_filter_hw_channels ( dev ); + + /* Use channel 1 for now */ + dev->channel = 0; + dev->op->config ( dev, NET80211_CFG_CHANNEL ); + + /* Always do active probes at lowest (presumably first) speed */ + dev->rate = 0; + dev->nr_rates = 1; + dev->rates[0] = dev->hw->rates[dev->channels[0].band][0]; + dev->op->config ( dev, NET80211_CFG_RATE ); + + return 0; +} + +/** + * Prepare 802.11 device channel and rate set for communication + * + * @v dev 802.11 device + * @v wlan WLAN to prepare for communication with + * @ret rc Return status code + */ +int net80211_prepare_assoc ( struct net80211_device *dev, + struct net80211_wlan *wlan ) +{ + struct ieee80211_frame *hdr = wlan->beacon->data; + struct ieee80211_beacon *beacon = + ( struct ieee80211_beacon * ) hdr->data; + struct net80211_handshaker *handshaker; + int rc; + + assert ( dev->netdev->state & NETDEV_OPEN ); + + net80211_set_state ( dev, NET80211_ASSOCIATED, 0, 0 ); + memcpy ( dev->bssid, wlan->bssid, ETH_ALEN ); + strcpy ( dev->essid, wlan->essid ); + + free ( dev->rsn_ie ); + dev->rsn_ie = NULL; + + dev->last_beacon_timestamp = beacon->timestamp; + dev->tx_beacon_interval = 1024 * beacon->beacon_interval; + + /* Barring an IE that tells us the channel outright, assume + the channel we heard this AP best on is the channel it's + communicating on. */ + net80211_change_channel ( dev, wlan->channel ); + + rc = net80211_process_capab ( dev, beacon->capability ); + if ( rc ) + return rc; + + rc = net80211_process_ie ( dev, beacon->info_element, + wlan->beacon->tail ); + if ( rc ) + return rc; + + /* Associate at the lowest rate so we know it'll get through */ + dev->rate = 0; + dev->op->config ( dev, NET80211_CFG_RATE ); + + /* Free old handshaker and crypto, if they exist */ + if ( dev->handshaker && dev->handshaker->stop && + dev->handshaker->started ) + dev->handshaker->stop ( dev ); + free ( dev->handshaker ); + dev->handshaker = NULL; + free ( dev->crypto ); + free ( dev->gcrypto ); + dev->crypto = dev->gcrypto = NULL; + + /* Find new security handshaker to use */ + for_each_table_entry ( handshaker, NET80211_HANDSHAKERS ) { + if ( handshaker->protocol == wlan->handshaking ) { + dev->handshaker = zalloc ( sizeof ( *handshaker ) + + handshaker->priv_len ); + if ( ! dev->handshaker ) + return -ENOMEM; + + memcpy ( dev->handshaker, handshaker, + sizeof ( *handshaker ) ); + dev->handshaker->priv = ( ( void * ) dev->handshaker + + sizeof ( *handshaker ) ); + break; + } + } + + if ( ( wlan->handshaking != NET80211_SECPROT_NONE ) && + ! dev->handshaker ) { + DBGC ( dev, "802.11 %p no support for handshaking scheme %d\n", + dev, wlan->handshaking ); + return -( ENOTSUP | ( wlan->handshaking << 8 ) ); + } + + /* Initialize security handshaker */ + if ( dev->handshaker ) { + rc = dev->handshaker->init ( dev ); + if ( rc < 0 ) + return rc; + } + + return 0; +} + +/** + * Send 802.11 initial authentication frame + * + * @v dev 802.11 device + * @v wlan WLAN to authenticate with + * @v method Authentication method + * @ret rc Return status code + * + * @a method may be 0 for Open System authentication or 1 for Shared + * Key authentication. Open System provides no security in association + * whatsoever, relying on encryption for confidentiality, but Shared + * Key actively introduces security problems and is very rarely used. + */ +int net80211_send_auth ( struct net80211_device *dev, + struct net80211_wlan *wlan, int method ) +{ + struct io_buffer *iob = alloc_iob ( 64 ); + struct ieee80211_auth *auth; + + net80211_set_state ( dev, 0, NET80211_WAITING, 0 ); + iob_reserve ( iob, IEEE80211_TYP_FRAME_HEADER_LEN ); + auth = iob_put ( iob, sizeof ( *auth ) ); + auth->algorithm = method; + auth->tx_seq = 1; + auth->status = 0; + + return net80211_tx_mgmt ( dev, IEEE80211_STYPE_AUTH, wlan->bssid, iob ); +} + +/** + * Handle receipt of 802.11 authentication frame + * + * @v dev 802.11 device + * @v iob I/O buffer + * + * If the authentication method being used is Shared Key, and the + * frame that was received included challenge text, the frame is + * encrypted using the cryptosystem currently in effect and sent back + * to the AP to complete the authentication. + */ +static void net80211_handle_auth ( struct net80211_device *dev, + struct io_buffer *iob ) +{ + struct ieee80211_frame *hdr = iob->data; + struct ieee80211_auth *auth = + ( struct ieee80211_auth * ) hdr->data; + + if ( auth->tx_seq & 1 ) { + DBGC ( dev, "802.11 %p authentication received improperly " + "directed frame (seq. %d)\n", dev, auth->tx_seq ); + net80211_set_state ( dev, NET80211_WAITING, 0, + IEEE80211_STATUS_FAILURE ); + return; + } + + if ( auth->status != IEEE80211_STATUS_SUCCESS ) { + DBGC ( dev, "802.11 %p authentication failed: status %d\n", + dev, auth->status ); + net80211_set_state ( dev, NET80211_WAITING, 0, + auth->status ); + return; + } + + if ( auth->algorithm == IEEE80211_AUTH_SHARED_KEY && ! dev->crypto ) { + DBGC ( dev, "802.11 %p can't perform shared-key authentication " + "without a cryptosystem\n", dev ); + net80211_set_state ( dev, NET80211_WAITING, 0, + IEEE80211_STATUS_FAILURE ); + return; + } + + if ( auth->algorithm == IEEE80211_AUTH_SHARED_KEY && + auth->tx_seq == 2 ) { + /* Since the iob we got is going to be freed as soon + as we return, we can do some in-place + modification. */ + auth->tx_seq = 3; + auth->status = 0; + + memcpy ( hdr->addr2, hdr->addr1, ETH_ALEN ); + memcpy ( hdr->addr1, hdr->addr3, ETH_ALEN ); + + netdev_tx ( dev->netdev, + dev->crypto->encrypt ( dev->crypto, iob ) ); + return; + } + + net80211_set_state ( dev, NET80211_WAITING, NET80211_AUTHENTICATED, + IEEE80211_STATUS_SUCCESS ); + + return; +} + +/** + * Send 802.11 association frame + * + * @v dev 802.11 device + * @v wlan WLAN to associate with + * @ret rc Return status code + */ +int net80211_send_assoc ( struct net80211_device *dev, + struct net80211_wlan *wlan ) +{ + struct io_buffer *iob = alloc_iob ( 128 ); + struct ieee80211_assoc_req *assoc; + union ieee80211_ie *ie; + + net80211_set_state ( dev, 0, NET80211_WAITING, 0 ); + + iob_reserve ( iob, IEEE80211_TYP_FRAME_HEADER_LEN ); + assoc = iob->data; + + assoc->capability = IEEE80211_CAPAB_MANAGED; + if ( ! ( dev->hw->flags & NET80211_HW_NO_SHORT_PREAMBLE ) ) + assoc->capability |= IEEE80211_CAPAB_SHORT_PMBL; + if ( ! ( dev->hw->flags & NET80211_HW_NO_SHORT_SLOT ) ) + assoc->capability |= IEEE80211_CAPAB_SHORT_SLOT; + if ( wlan->crypto ) + assoc->capability |= IEEE80211_CAPAB_PRIVACY; + + assoc->listen_interval = 1; + + ie = net80211_marshal_request_info ( dev, assoc->info_element ); + + DBGP ( "802.11 %p about to send association request:\n", dev ); + DBGP_HD ( iob->data, ( void * ) ie - iob->data ); + + iob_put ( iob, ( void * ) ie - iob->data ); + + return net80211_tx_mgmt ( dev, IEEE80211_STYPE_ASSOC_REQ, + wlan->bssid, iob ); +} + +/** + * Handle receipt of 802.11 association reply frame + * + * @v dev 802.11 device + * @v iob I/O buffer + */ +static void net80211_handle_assoc_reply ( struct net80211_device *dev, + struct io_buffer *iob ) +{ + struct ieee80211_frame *hdr = iob->data; + struct ieee80211_assoc_resp *assoc = + ( struct ieee80211_assoc_resp * ) hdr->data; + + net80211_process_capab ( dev, assoc->capability ); + net80211_process_ie ( dev, assoc->info_element, iob->tail ); + + if ( assoc->status != IEEE80211_STATUS_SUCCESS ) { + DBGC ( dev, "802.11 %p association failed: status %d\n", + dev, assoc->status ); + net80211_set_state ( dev, NET80211_WAITING, 0, + assoc->status ); + return; + } + + /* ESSID was filled before the association request was sent */ + memcpy ( dev->bssid, hdr->addr3, ETH_ALEN ); + dev->aid = assoc->aid; + + net80211_set_state ( dev, NET80211_WAITING, NET80211_ASSOCIATED, + IEEE80211_STATUS_SUCCESS ); +} + + +/** + * Send 802.11 disassociation frame + * + * @v dev 802.11 device + * @v reason Reason for disassociation + * @v deauth If TRUE, send deauthentication instead of disassociation + * @ret rc Return status code + */ +static int net80211_send_disassoc ( struct net80211_device *dev, int reason, + int deauth ) +{ + struct io_buffer *iob = alloc_iob ( 64 ); + struct ieee80211_disassoc *disassoc; + + if ( ! ( dev->state & NET80211_ASSOCIATED ) ) + return -EINVAL; + + net80211_set_state ( dev, NET80211_ASSOCIATED, 0, 0 ); + iob_reserve ( iob, IEEE80211_TYP_FRAME_HEADER_LEN ); + disassoc = iob_put ( iob, sizeof ( *disassoc ) ); + disassoc->reason = reason; + + return net80211_tx_mgmt ( dev, deauth ? IEEE80211_STYPE_DEAUTH : + IEEE80211_STYPE_DISASSOC, dev->bssid, iob ); +} + + +/** + * Deauthenticate from current network and try again + * + * @v dev 802.11 device + * @v rc Return status code indicating reason + * + * The deauthentication will be sent using an 802.11 "unspecified + * reason", as is common, but @a rc will be set as a link-up + * error to aid the user in debugging. + */ +void net80211_deauthenticate ( struct net80211_device *dev, int rc ) +{ + net80211_send_disassoc ( dev, IEEE80211_REASON_UNSPECIFIED, 1 ); + dev->assoc_rc = rc; + netdev_link_err ( dev->netdev, rc ); + + net80211_autoassociate ( dev ); +} + + +/** Smoothing factor (1-7) for link quality calculation */ +#define LQ_SMOOTH 7 + +/** + * Update link quality information based on received beacon + * + * @v dev 802.11 device + * @v iob I/O buffer containing beacon + * @ret rc Return status code + */ +static void net80211_update_link_quality ( struct net80211_device *dev, + struct io_buffer *iob ) +{ + struct ieee80211_frame *hdr = iob->data; + struct ieee80211_beacon *beacon; + u32 dt, rxi; + + if ( ! ( dev->state & NET80211_ASSOCIATED ) ) + return; + + beacon = ( struct ieee80211_beacon * ) hdr->data; + dt = ( u32 ) ( beacon->timestamp - dev->last_beacon_timestamp ); + rxi = dev->rx_beacon_interval; + + rxi = ( LQ_SMOOTH * rxi ) + ( ( 8 - LQ_SMOOTH ) * dt ); + dev->rx_beacon_interval = rxi >> 3; + + dev->last_beacon_timestamp = beacon->timestamp; +} + + +/** + * Handle receipt of 802.11 management frame + * + * @v dev 802.11 device + * @v iob I/O buffer + * @v signal Signal strength of received frame + */ +static void net80211_handle_mgmt ( struct net80211_device *dev, + struct io_buffer *iob, int signal ) +{ + struct ieee80211_frame *hdr = iob->data; + struct ieee80211_disassoc *disassoc; + u16 stype = hdr->fc & IEEE80211_FC_SUBTYPE; + int keep = 0; + int is_deauth = ( stype == IEEE80211_STYPE_DEAUTH ); + + if ( ( hdr->fc & IEEE80211_FC_TYPE ) != IEEE80211_TYPE_MGMT ) { + free_iob ( iob ); + return; /* only handle management frames */ + } + + switch ( stype ) { + /* We reconnect on deauthentication and disassociation. */ + case IEEE80211_STYPE_DEAUTH: + case IEEE80211_STYPE_DISASSOC: + disassoc = ( struct ieee80211_disassoc * ) hdr->data; + net80211_set_state ( dev, is_deauth ? NET80211_AUTHENTICATED : + NET80211_ASSOCIATED, 0, + NET80211_IS_REASON | disassoc->reason ); + DBGC ( dev, "802.11 %p %s: reason %d\n", + dev, is_deauth ? "deauthenticated" : "disassociated", + disassoc->reason ); + + /* Try to reassociate, in case it's transient. */ + net80211_autoassociate ( dev ); + + break; + + /* We handle authentication and association. */ + case IEEE80211_STYPE_AUTH: + if ( ! ( dev->state & NET80211_AUTHENTICATED ) ) + net80211_handle_auth ( dev, iob ); + break; + + case IEEE80211_STYPE_ASSOC_RESP: + case IEEE80211_STYPE_REASSOC_RESP: + if ( ! ( dev->state & NET80211_ASSOCIATED ) ) + net80211_handle_assoc_reply ( dev, iob ); + break; + + /* We pass probes and beacons onto network scanning + code. Pass actions for future extensibility. */ + case IEEE80211_STYPE_BEACON: + net80211_update_link_quality ( dev, iob ); + /* fall through */ + case IEEE80211_STYPE_PROBE_RESP: + case IEEE80211_STYPE_ACTION: + if ( dev->keep_mgmt ) { + struct net80211_rx_info *rxinf; + rxinf = zalloc ( sizeof ( *rxinf ) ); + if ( ! rxinf ) { + DBGC ( dev, "802.11 %p out of memory\n", dev ); + break; + } + rxinf->signal = signal; + list_add_tail ( &iob->list, &dev->mgmt_queue ); + list_add_tail ( &rxinf->list, &dev->mgmt_info_queue ); + keep = 1; + } + break; + + case IEEE80211_STYPE_PROBE_REQ: + /* Some nodes send these broadcast. Ignore them. */ + break; + + case IEEE80211_STYPE_ASSOC_REQ: + case IEEE80211_STYPE_REASSOC_REQ: + /* We should never receive these, only send them. */ + DBGC ( dev, "802.11 %p received strange management request " + "(%04x)\n", dev, stype ); + break; + + default: + DBGC ( dev, "802.11 %p received unimplemented management " + "packet (%04x)\n", dev, stype ); + break; + } + + if ( ! keep ) + free_iob ( iob ); +} + +/* ---------- Packet handling functions ---------- */ + +/** + * Free buffers used by 802.11 fragment cache entry + * + * @v dev 802.11 device + * @v fcid Fragment cache entry index + * + * After this function, the referenced entry will be marked unused. + */ +static void net80211_free_frags ( struct net80211_device *dev, int fcid ) +{ + int j; + struct net80211_frag_cache *frag = &dev->frags[fcid]; + + for ( j = 0; j < 16; j++ ) { + if ( frag->iob[j] ) { + free_iob ( frag->iob[j] ); + frag->iob[j] = NULL; + } + } + + frag->seqnr = 0; + frag->start_ticks = 0; + frag->in_use = 0; +} + +/** + * Accumulate 802.11 fragments into one I/O buffer + * + * @v dev 802.11 device + * @v fcid Fragment cache entry index + * @v nfrags Number of fragments received + * @v size Sum of sizes of all fragments, including headers + * @ret iob I/O buffer containing reassembled packet + * + * This function does not free the fragment buffers. + */ +static struct io_buffer *net80211_accum_frags ( struct net80211_device *dev, + int fcid, int nfrags, int size ) +{ + struct net80211_frag_cache *frag = &dev->frags[fcid]; + int hdrsize = IEEE80211_TYP_FRAME_HEADER_LEN; + int nsize = size - hdrsize * ( nfrags - 1 ); + int i; + + struct io_buffer *niob = alloc_iob ( nsize ); + struct ieee80211_frame *hdr; + + /* Add the header from the first one... */ + memcpy ( iob_put ( niob, hdrsize ), frag->iob[0]->data, hdrsize ); + + /* ... and all the data from all of them. */ + for ( i = 0; i < nfrags; i++ ) { + int len = iob_len ( frag->iob[i] ) - hdrsize; + memcpy ( iob_put ( niob, len ), + frag->iob[i]->data + hdrsize, len ); + } + + /* Turn off the fragment bit. */ + hdr = niob->data; + hdr->fc &= ~IEEE80211_FC_MORE_FRAG; + + return niob; +} + +/** + * Handle receipt of 802.11 fragment + * + * @v dev 802.11 device + * @v iob I/O buffer containing fragment + * @v signal Signal strength with which fragment was received + */ +static void net80211_rx_frag ( struct net80211_device *dev, + struct io_buffer *iob, int signal ) +{ + struct ieee80211_frame *hdr = iob->data; + int fragnr = IEEE80211_FRAG ( hdr->seq ); + + if ( fragnr == 0 && ( hdr->fc & IEEE80211_FC_MORE_FRAG ) ) { + /* start a frag cache entry */ + int i, newest = -1; + u32 curr_ticks = currticks(), newest_ticks = 0; + u32 timeout = ticks_per_sec() * NET80211_FRAG_TIMEOUT; + + for ( i = 0; i < NET80211_NR_CONCURRENT_FRAGS; i++ ) { + if ( dev->frags[i].in_use == 0 ) + break; + + if ( dev->frags[i].start_ticks + timeout >= + curr_ticks ) { + net80211_free_frags ( dev, i ); + break; + } + + if ( dev->frags[i].start_ticks > newest_ticks ) { + newest = i; + newest_ticks = dev->frags[i].start_ticks; + } + } + + /* If we're being sent more concurrent fragmented + packets than we can handle, drop the newest so the + older ones have time to complete. */ + if ( i == NET80211_NR_CONCURRENT_FRAGS ) { + i = newest; + net80211_free_frags ( dev, i ); + } + + dev->frags[i].in_use = 1; + dev->frags[i].seqnr = IEEE80211_SEQNR ( hdr->seq ); + dev->frags[i].start_ticks = currticks(); + dev->frags[i].iob[0] = iob; + return; + } else { + int i; + for ( i = 0; i < NET80211_NR_CONCURRENT_FRAGS; i++ ) { + if ( dev->frags[i].in_use && dev->frags[i].seqnr == + IEEE80211_SEQNR ( hdr->seq ) ) + break; + } + if ( i == NET80211_NR_CONCURRENT_FRAGS ) { + /* Drop non-first not-in-cache fragments */ + DBGC ( dev, "802.11 %p dropped fragment fc=%04x " + "seq=%04x\n", dev, hdr->fc, hdr->seq ); + free_iob ( iob ); + return; + } + + dev->frags[i].iob[fragnr] = iob; + + if ( ! ( hdr->fc & IEEE80211_FC_MORE_FRAG ) ) { + int j, size = 0; + for ( j = 0; j < fragnr; j++ ) { + size += iob_len ( dev->frags[i].iob[j] ); + if ( dev->frags[i].iob[j] == NULL ) + break; + } + if ( j == fragnr ) { + /* We've got everything */ + struct io_buffer *niob = + net80211_accum_frags ( dev, i, fragnr, + size ); + net80211_free_frags ( dev, i ); + net80211_rx ( dev, niob, signal, 0 ); + } else { + DBGC ( dev, "802.11 %p dropping fragmented " + "packet due to out-of-order arrival, " + "fc=%04x seq=%04x\n", dev, hdr->fc, + hdr->seq ); + net80211_free_frags ( dev, i ); + } + } + } +} + +/** + * Handle receipt of 802.11 frame + * + * @v dev 802.11 device + * @v iob I/O buffer + * @v signal Received signal strength + * @v rate Bitrate at which frame was received, in 100 kbps units + * + * If the rate or signal is unknown, 0 should be passed. + */ +void net80211_rx ( struct net80211_device *dev, struct io_buffer *iob, + int signal, u16 rate ) +{ + struct ieee80211_frame *hdr = iob->data; + u16 type = hdr->fc & IEEE80211_FC_TYPE; + if ( ( hdr->fc & IEEE80211_FC_VERSION ) != IEEE80211_THIS_VERSION ) + goto drop; /* drop invalid-version packets */ + + if ( type == IEEE80211_TYPE_CTRL ) + goto drop; /* we don't handle control packets, + the hardware does */ + + if ( dev->last_rx_seq == hdr->seq ) + goto drop; /* avoid duplicate packet */ + dev->last_rx_seq = hdr->seq; + + if ( dev->hw->flags & NET80211_HW_RX_HAS_FCS ) { + /* discard the FCS */ + iob_unput ( iob, 4 ); + } + + /* Only decrypt packets from our BSSID, to avoid spurious errors */ + if ( ( hdr->fc & IEEE80211_FC_PROTECTED ) && + ! memcmp ( hdr->addr2, dev->bssid, ETH_ALEN ) ) { + /* Decrypt packet; record and drop if it fails */ + struct io_buffer *niob; + struct net80211_crypto *crypto = dev->crypto; + + if ( ! dev->crypto ) { + DBGC ( dev, "802.11 %p cannot decrypt packet " + "without a cryptosystem\n", dev ); + goto drop_crypt; + } + + if ( ( hdr->addr1[0] & 1 ) && dev->gcrypto ) { + /* Use group decryption if needed */ + crypto = dev->gcrypto; + } + + niob = crypto->decrypt ( crypto, iob ); + if ( ! niob ) { + DBGC ( dev, "802.11 %p decryption error\n", dev ); + goto drop_crypt; + } + free_iob ( iob ); + iob = niob; + } + + dev->last_signal = signal; + + /* Fragments go into the frag cache or get dropped. */ + if ( IEEE80211_FRAG ( hdr->seq ) != 0 + || ( hdr->fc & IEEE80211_FC_MORE_FRAG ) ) { + net80211_rx_frag ( dev, iob, signal ); + return; + } + + /* Management frames get handled, enqueued, or dropped. */ + if ( type == IEEE80211_TYPE_MGMT ) { + net80211_handle_mgmt ( dev, iob, signal ); + return; + } + + /* Data frames get dropped or sent to the net_device. */ + if ( ( hdr->fc & IEEE80211_FC_SUBTYPE ) != IEEE80211_STYPE_DATA ) + goto drop; /* drop QoS, CFP, or null data packets */ + + /* Update rate-control algorithm */ + if ( dev->rctl ) + rc80211_update_rx ( dev, hdr->fc & IEEE80211_FC_RETRY, rate ); + + /* Pass packet onward */ + if ( dev->state & NET80211_ASSOCIATED ) { + netdev_rx ( dev->netdev, iob ); + return; + } + + /* No association? Drop it. */ + goto drop; + + drop_crypt: + netdev_rx_err ( dev->netdev, NULL, EINVAL_CRYPTO_REQUEST ); + drop: + DBGC2 ( dev, "802.11 %p dropped packet fc=%04x seq=%04x\n", dev, + hdr->fc, hdr->seq ); + free_iob ( iob ); + return; +} + +/** Indicate an error in receiving a packet + * + * @v dev 802.11 device + * @v iob I/O buffer with received packet, or NULL + * @v rc Error code + * + * This logs the error with the wrapping net_device, and frees iob if + * it is passed. + */ +void net80211_rx_err ( struct net80211_device *dev, + struct io_buffer *iob, int rc ) +{ + netdev_rx_err ( dev->netdev, iob, rc ); +} + +/** Indicate the completed transmission of a packet + * + * @v dev 802.11 device + * @v iob I/O buffer of transmitted packet + * @v retries Number of times this packet was retransmitted + * @v rc Error code, or 0 for success + * + * This logs an error with the wrapping net_device if one occurred, + * and removes and frees the I/O buffer from its TX queue. The + * provided retry information is used to tune our transmission rate. + * + * If the packet did not need to be retransmitted because it was + * properly ACKed the first time, @a retries should be 0. + */ +void net80211_tx_complete ( struct net80211_device *dev, + struct io_buffer *iob, int retries, int rc ) +{ + /* Update rate-control algorithm */ + if ( dev->rctl ) + rc80211_update_tx ( dev, retries, rc ); + + /* Pass completion onward */ + netdev_tx_complete_err ( dev->netdev, iob, rc ); +} diff --git a/gpxe/src/net/80211/rc80211.c b/gpxe/src/net/80211/rc80211.c new file mode 100644 index 00000000..5bd19143 --- /dev/null +++ b/gpxe/src/net/80211/rc80211.c @@ -0,0 +1,371 @@ +/* + * Simple 802.11 rate-control algorithm for gPXE. + * + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdlib.h> +#include <gpxe/net80211.h> + +/** + * @file + * + * Simple 802.11 rate-control algorithm + */ + +/** @page rc80211 Rate control philosophy + * + * We want to maximize our transmission speed, to the extent that we + * can do that without dropping undue numbers of packets. We also + * don't want to take up very much code space, so our algorithm has to + * be pretty simple + * + * When we receive a packet, we know what rate it was transmitted at, + * and whether it had to be retransmitted to get to us. + * + * When we send a packet, we hear back how many times it had to be + * retried to get through, and whether it got through at all. + * + * Indications of TX success are more reliable than RX success, but RX + * information helps us know where to start. + * + * To handle all of this, we keep for each rate and each direction (TX + * and RX separately) some state information for the most recent + * packets on that rate and the number of packets for which we have + * information. The state is a 32-bit unsigned integer in which two + * bits represent a packet: 11 if it went through well, 10 if it went + * through with one retry, 01 if it went through with more than one + * retry, or 00 if it didn't go through at all. We define the + * "goodness" for a particular (rate, direction) combination as the + * sum of all the 2-bit fields, times 33, divided by the number of + * 2-bit fields containing valid information (16 except when we're + * starting out). The number produced is between 0 and 99; we use -1 + * for rates with less than 4 RX packets or 1 TX, as an indicator that + * we do not have enough information to rely on them. + * + * In deciding which rates are best, we find the weighted average of + * TX and RX goodness, where the weighting is by number of packets + * with data and TX packets are worth 4 times as much as RX packets. + * The weighted average is called "net goodness" and is also a number + * between 0 and 99. If 3 consecutive packets fail transmission + * outright, we automatically ratchet down the rate; otherwise, we + * switch to the best rate whenever the current rate's goodness falls + * below some threshold, and try increasing our rate when the goodness + * is very high. + * + * This system is optimized for gPXE's style of usage. Because normal + * operation always involves receiving something, we'll make our way + * to the best rate pretty quickly. We tend to follow the lead of the + * sending AP in choosing rates, but we won't use rates for long that + * don't work well for us in transmission. We assume gPXE won't be + * running for long enough that rate patterns will change much, so we + * don't have to keep time counters or the like. And if this doesn't + * work well in practice there are many ways it could be tweaked. + * + * To avoid staying at 1Mbps for a long time, we don't track any + * transmitted packets until we've set our rate based on received + * packets. + */ + +/** Two-bit packet status indicator for a packet with no retries */ +#define RC_PKT_OK 0x3 + +/** Two-bit packet status indicator for a packet with one retry */ +#define RC_PKT_RETRIED_ONCE 0x2 + +/** Two-bit packet status indicator for a TX packet with multiple retries + * + * It is not possible to tell whether an RX packet had one or multiple + * retries; we rely instead on the fact that failed RX packets won't + * get to us at all, so if we receive a lot of RX packets on a certain + * rate it must be pretty good. + */ +#define RC_PKT_RETRIED_MULTI 0x1 + +/** Two-bit packet status indicator for a TX packet that was never ACKed + * + * It is not possible to tell whether an RX packet was setn if it + * didn't get through to us, but if we don't see one we won't increase + * the goodness for its rate. This asymmetry is part of why TX packets + * are weighted much more heavily than RX. + */ +#define RC_PKT_FAILED 0x0 + +/** Number of times to weight TX packets more heavily than RX packets */ +#define RC_TX_FACTOR 4 + +/** Number of consecutive failed TX packets that cause an automatic rate drop */ +#define RC_TX_EMERG_FAIL 3 + +/** Minimum net goodness below which we will search for a better rate */ +#define RC_GOODNESS_MIN 85 + +/** Maximum net goodness above which we will try to increase our rate */ +#define RC_GOODNESS_MAX 95 + +/** Minimum (num RX + @c RC_TX_FACTOR * num TX) to use a certain rate */ +#define RC_UNCERTAINTY_THRESH 4 + +/** TX direction */ +#define TX 0 + +/** RX direction */ +#define RX 1 + +/** A rate control context */ +struct rc80211_ctx +{ + /** Goodness state for each rate, TX and RX */ + u32 goodness[2][NET80211_MAX_RATES]; + + /** Number of packets recorded for each rate */ + u8 count[2][NET80211_MAX_RATES]; + + /** Indication of whether we've set the device rate yet */ + int started; + + /** Counter of all packets sent and received */ + int packets; +}; + +/** + * Initialize rate-control algorithm + * + * @v dev 802.11 device + * @ret ctx Rate-control context, to be stored in @c dev->rctl + */ +struct rc80211_ctx * rc80211_init ( struct net80211_device *dev __unused ) +{ + struct rc80211_ctx *ret = zalloc ( sizeof ( *ret ) ); + return ret; +} + +/** + * Calculate net goodness for a certain rate + * + * @v ctx Rate-control context + * @v rate_idx Index of rate to calculate net goodness for + */ +static int rc80211_calc_net_goodness ( struct rc80211_ctx *ctx, + int rate_idx ) +{ + int sum[2], num[2], dir, pkt; + + for ( dir = 0; dir < 2; dir++ ) { + u32 good = ctx->goodness[dir][rate_idx]; + + num[dir] = ctx->count[dir][rate_idx]; + sum[dir] = 0; + + for ( pkt = 0; pkt < num[dir]; pkt++ ) + sum[dir] += ( good >> ( 2 * pkt ) ) & 0x3; + } + + if ( ( num[TX] * RC_TX_FACTOR + num[RX] ) < RC_UNCERTAINTY_THRESH ) + return -1; + + return ( 33 * ( sum[TX] * RC_TX_FACTOR + sum[RX] ) / + ( num[TX] * RC_TX_FACTOR + num[RX] ) ); +} + +/** + * Determine the best rate to switch to and return it + * + * @v dev 802.11 device + * @ret rate_idx Index of the best rate to switch to + */ +static int rc80211_pick_best ( struct net80211_device *dev ) +{ + struct rc80211_ctx *ctx = dev->rctl; + int best_net_good = 0, best_rate = -1, i; + + for ( i = 0; i < dev->nr_rates; i++ ) { + int net_good = rc80211_calc_net_goodness ( ctx, i ); + + if ( net_good > best_net_good || + ( best_net_good > RC_GOODNESS_MIN && + net_good > RC_GOODNESS_MIN ) ) { + best_net_good = net_good; + best_rate = i; + } + } + + if ( best_rate >= 0 ) { + int old_good = rc80211_calc_net_goodness ( ctx, dev->rate ); + if ( old_good != best_net_good ) + DBGC ( ctx, "802.11 RC %p switching from goodness " + "%d to %d\n", ctx, old_good, best_net_good ); + + ctx->started = 1; + return best_rate; + } + + return dev->rate; +} + +/** + * Set 802.11 device rate + * + * @v dev 802.11 device + * @v rate_idx Index of rate to switch to + * + * This is a thin wrapper around net80211_set_rate_idx to insert a + * debugging message where appropriate. + */ +static inline void rc80211_set_rate ( struct net80211_device *dev, + int rate_idx ) +{ + DBGC ( dev->rctl, "802.11 RC %p changing rate %d->%d Mbps\n", dev->rctl, + dev->rates[dev->rate] / 10, dev->rates[rate_idx] / 10 ); + + net80211_set_rate_idx ( dev, rate_idx ); +} + +/** + * Check rate-control state and change rate if necessary + * + * @v dev 802.11 device + */ +static void rc80211_maybe_set_new ( struct net80211_device *dev ) +{ + struct rc80211_ctx *ctx = dev->rctl; + int net_good; + + net_good = rc80211_calc_net_goodness ( ctx, dev->rate ); + + if ( ! ctx->started ) { + rc80211_set_rate ( dev, rc80211_pick_best ( dev ) ); + return; + } + + if ( net_good < 0 ) /* insufficient data */ + return; + + if ( net_good > RC_GOODNESS_MAX && dev->rate + 1 < dev->nr_rates ) { + int higher = rc80211_calc_net_goodness ( ctx, dev->rate + 1 ); + if ( higher > net_good || higher < 0 ) + rc80211_set_rate ( dev, dev->rate + 1 ); + else + rc80211_set_rate ( dev, rc80211_pick_best ( dev ) ); + } + + if ( net_good < RC_GOODNESS_MIN ) { + rc80211_set_rate ( dev, rc80211_pick_best ( dev ) ); + } +} + +/** + * Update rate-control state + * + * @v dev 802.11 device + * @v direction One of the direction constants TX or RX + * @v rate_idx Index of rate at which packet was sent or received + * @v retries Number of times packet was retried before success + * @v failed If nonzero, the packet failed to get through + */ +static void rc80211_update ( struct net80211_device *dev, int direction, + int rate_idx, int retries, int failed ) +{ + struct rc80211_ctx *ctx = dev->rctl; + u32 goodness = ctx->goodness[direction][rate_idx]; + + if ( ctx->count[direction][rate_idx] < 16 ) + ctx->count[direction][rate_idx]++; + + goodness <<= 2; + if ( failed ) + goodness |= RC_PKT_FAILED; + else if ( retries > 1 ) + goodness |= RC_PKT_RETRIED_MULTI; + else if ( retries ) + goodness |= RC_PKT_RETRIED_ONCE; + else + goodness |= RC_PKT_OK; + + ctx->goodness[direction][rate_idx] = goodness; + + ctx->packets++; + + rc80211_maybe_set_new ( dev ); +} + +/** + * Update rate-control state for transmitted packet + * + * @v dev 802.11 device + * @v retries Number of times packet was transmitted before success + * @v rc Return status code for transmission + */ +void rc80211_update_tx ( struct net80211_device *dev, int retries, int rc ) +{ + struct rc80211_ctx *ctx = dev->rctl; + + if ( ! ctx->started ) + return; + + rc80211_update ( dev, TX, dev->rate, retries, rc ); + + /* Check if the last RC_TX_EMERG_FAIL packets have all failed */ + if ( ! ( ctx->goodness[TX][dev->rate] & + ( ( 1 << ( 2 * RC_TX_EMERG_FAIL ) ) - 1 ) ) ) { + if ( dev->rate == 0 ) + DBGC ( dev->rctl, "802.11 RC %p saw %d consecutive " + "failed TX, but cannot lower rate any further\n", + dev->rctl, RC_TX_EMERG_FAIL ); + else { + DBGC ( dev->rctl, "802.11 RC %p lowering rate (%d->%d " + "Mbps) due to %d consecutive TX failures\n", + dev->rctl, dev->rates[dev->rate] / 10, + dev->rates[dev->rate - 1] / 10, + RC_TX_EMERG_FAIL ); + + rc80211_set_rate ( dev, dev->rate - 1 ); + } + } +} + +/** + * Update rate-control state for received packet + * + * @v dev 802.11 device + * @v retry Whether the received packet had been retransmitted + * @v rate Rate at which packet was received, in 100 kbps units + */ +void rc80211_update_rx ( struct net80211_device *dev, int retry, u16 rate ) +{ + int ridx; + + for ( ridx = 0; ridx < dev->nr_rates && dev->rates[ridx] != rate; + ridx++ ) + ; + if ( ridx >= dev->nr_rates ) + return; /* couldn't find the rate */ + + rc80211_update ( dev, RX, ridx, retry, 0 ); +} + +/** + * Free rate-control context + * + * @v ctx Rate-control context + */ +void rc80211_free ( struct rc80211_ctx *ctx ) +{ + free ( ctx ); +} diff --git a/gpxe/src/net/80211/sec80211.c b/gpxe/src/net/80211/sec80211.c new file mode 100644 index 00000000..c5aa1183 --- /dev/null +++ b/gpxe/src/net/80211/sec80211.c @@ -0,0 +1,503 @@ +/* + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <gpxe/ieee80211.h> +#include <gpxe/net80211.h> +#include <gpxe/sec80211.h> + +/** @file + * + * General secured-network routines required whenever any secure + * network support at all is compiled in. This involves things like + * installing keys, determining the type of security used by a probed + * network, and some small helper functions that take advantage of + * static data in this file. + */ + +/** Mapping from net80211 crypto/secprot types to RSN OUI descriptors */ +struct descriptor_map { + /** Value of net80211_crypto_alg or net80211_security_proto */ + u32 net80211_type; + + /** OUI+type in appropriate byte order, masked to exclude vendor */ + u32 oui_type; +}; + +/** Magic number in @a oui_type showing end of list */ +#define END_MAGIC 0xFFFFFFFF + +/** Mapping between net80211 cryptosystems and 802.11i cipher IDs */ +static struct descriptor_map rsn_cipher_map[] = { + { .net80211_type = NET80211_CRYPT_WEP, + .oui_type = IEEE80211_RSN_CTYPE_WEP40 }, + + { .net80211_type = NET80211_CRYPT_WEP, + .oui_type = IEEE80211_RSN_CTYPE_WEP104 }, + + { .net80211_type = NET80211_CRYPT_TKIP, + .oui_type = IEEE80211_RSN_CTYPE_TKIP }, + + { .net80211_type = NET80211_CRYPT_CCMP, + .oui_type = IEEE80211_RSN_CTYPE_CCMP }, + + { .net80211_type = NET80211_CRYPT_UNKNOWN, + .oui_type = END_MAGIC }, +}; + +/** Mapping between net80211 handshakers and 802.11i AKM IDs */ +static struct descriptor_map rsn_akm_map[] = { + { .net80211_type = NET80211_SECPROT_EAP, + .oui_type = IEEE80211_RSN_ATYPE_8021X }, + + { .net80211_type = NET80211_SECPROT_PSK, + .oui_type = IEEE80211_RSN_ATYPE_PSK }, + + { .net80211_type = NET80211_SECPROT_UNKNOWN, + .oui_type = END_MAGIC }, +}; + + +/** + * Install 802.11 cryptosystem + * + * @v which Pointer to the cryptosystem structure to install in + * @v crypt Cryptosystem ID number + * @v key Encryption key to use + * @v len Length of encryption key + * @v rsc Initial receive sequence counter, if applicable + * @ret rc Return status code + * + * The encryption key will not be accessed via the provided pointer + * after this function returns, so you may keep it on the stack. + * + * @a which must point to either @c dev->crypto (for the normal case + * of installing a unicast cryptosystem) or @c dev->gcrypto (to + * install a cryptosystem that will be used only for decrypting + * group-source frames). + */ +int sec80211_install ( struct net80211_crypto **which, + enum net80211_crypto_alg crypt, + const void *key, int len, const void *rsc ) +{ + struct net80211_crypto *crypto = *which; + struct net80211_crypto *tbl_crypto; + + /* Remove old crypto if it exists */ + free ( *which ); + *which = NULL; + + if ( crypt == NET80211_CRYPT_NONE ) { + DBG ( "802.11-Sec not installing null cryptography\n" ); + return 0; + } + + /* Find cryptosystem to use */ + for_each_table_entry ( tbl_crypto, NET80211_CRYPTOS ) { + if ( tbl_crypto->algorithm == crypt ) { + crypto = zalloc ( sizeof ( *crypto ) + + tbl_crypto->priv_len ); + if ( ! crypto ) { + DBG ( "802.11-Sec out of memory\n" ); + return -ENOMEM; + } + + memcpy ( crypto, tbl_crypto, sizeof ( *crypto ) ); + crypto->priv = ( ( void * ) crypto + + sizeof ( *crypto ) ); + break; + } + } + + if ( ! crypto ) { + DBG ( "802.11-Sec no support for cryptosystem %d\n", crypt ); + return -( ENOTSUP | EUNIQ_10 | ( crypt << 8 ) ); + } + + *which = crypto; + + DBG ( "802.11-Sec installing cryptosystem %d as %p with key of " + "length %d\n", crypt, crypto, len ); + + return crypto->init ( crypto, key, len, rsc ); +} + + +/** + * Determine net80211 crypto or handshaking type value to return for RSN info + * + * @v rsnp Pointer to next descriptor count field in RSN IE + * @v rsn_end Pointer to end of RSN IE + * @v map Descriptor map to use + * @v tbl_start Start of linker table to examine for gPXE support + * @v tbl_end End of linker table to examine for gPXE support + * @ret rsnp Updated to point to first byte after descriptors + * @ret map_ent Descriptor map entry of translation to use + * + * The entries in the linker table must be either net80211_crypto or + * net80211_handshaker structures, and @a tbl_stride must be set to + * sizeof() the appropriate one. + * + * This function expects @a rsnp to point at a two-byte descriptor + * count followed by a list of four-byte cipher or AKM descriptors; it + * will return @c NULL if the input packet is malformed, and otherwise + * set @a rsnp to the first byte it has not looked at. It will return + * the first cipher in the list that is supported by the current build + * of gPXE, or the first of all if none are supported. + * + * We play rather fast and loose with type checking, because this + * function is only called from two well-defined places in the + * RSN-checking code. Don't try to use it for anything else. + */ +static struct descriptor_map * rsn_pick_desc ( u8 **rsnp, u8 *rsn_end, + struct descriptor_map *map, + void *tbl_start, void *tbl_end ) +{ + int ndesc; + int ok = 0; + struct descriptor_map *map_ent, *map_ret = NULL; + u8 *rsn = *rsnp; + void *tblp; + size_t tbl_stride = ( map == rsn_cipher_map ? + sizeof ( struct net80211_crypto ) : + sizeof ( struct net80211_handshaker ) ); + + if ( map != rsn_cipher_map && map != rsn_akm_map ) + return NULL; + + /* Determine which types we support */ + for ( tblp = tbl_start; tblp < tbl_end; tblp += tbl_stride ) { + struct net80211_crypto *crypto = tblp; + struct net80211_handshaker *hs = tblp; + + if ( map == rsn_cipher_map ) + ok |= ( 1 << crypto->algorithm ); + else + ok |= ( 1 << hs->protocol ); + } + + /* RSN sanity checks */ + if ( rsn + 2 > rsn_end ) { + DBG ( "RSN detect: malformed descriptor count\n" ); + return NULL; + } + + ndesc = *( u16 * ) rsn; + rsn += 2; + + if ( ! ndesc ) { + DBG ( "RSN detect: no descriptors\n" ); + return NULL; + } + + /* Determine which net80211 crypto types are listed */ + while ( ndesc-- ) { + u32 desc; + + if ( rsn + 4 > rsn_end ) { + DBG ( "RSN detect: malformed descriptor (%d left)\n", + ndesc ); + return NULL; + } + + desc = *( u32 * ) rsn; + rsn += 4; + + for ( map_ent = map; map_ent->oui_type != END_MAGIC; map_ent++ ) + if ( map_ent->oui_type == ( desc & OUI_TYPE_MASK ) ) + break; + + /* Use first cipher as a fallback */ + if ( ! map_ret ) + map_ret = map_ent; + + /* Once we find one we support, use it */ + if ( ok & ( 1 << map_ent->net80211_type ) ) { + map_ret = map_ent; + break; + } + } + + if ( ndesc > 0 ) + rsn += 4 * ndesc; + + *rsnp = rsn; + return map_ret; +} + + +/** + * Find the RSN or WPA information element in the provided beacon frame + * + * @v ie Pointer to first information element to check + * @v ie_end Pointer to end of information element space + * @ret is_rsn TRUE if returned IE is RSN, FALSE if it's WPA + * @ret end Pointer to byte immediately after last byte of data + * @ret data Pointer to first byte of data (the `version' field) + * + * If both an RSN and a WPA information element are found, this + * function will return the first one seen, which by ordering rules + * should always prefer the newer RSN IE. + * + * If no RSN or WPA infomration element is found, returns @c NULL and + * leaves @a is_rsn and @a end in an undefined state. + * + * This function will not return a pointer to an information element + * that states it extends past the tail of the io_buffer, or whose @a + * version field is incorrect. + */ +u8 * sec80211_find_rsn ( union ieee80211_ie *ie, void *ie_end, + int *is_rsn, u8 **end ) +{ + u8 *rsn = NULL; + + if ( ! ieee80211_ie_bound ( ie, ie_end ) ) + return NULL; + + while ( ie ) { + if ( ie->id == IEEE80211_IE_VENDOR && + ie->vendor.oui == IEEE80211_WPA_OUI_VEN ) { + DBG ( "RSN detect: old-style WPA IE found\n" ); + rsn = &ie->vendor.data[0]; + *end = rsn + ie->len - 4; + *is_rsn = 0; + } else if ( ie->id == IEEE80211_IE_RSN ) { + DBG ( "RSN detect: 802.11i RSN IE found\n" ); + rsn = ( u8 * ) &ie->rsn.version; + *end = rsn + ie->len; + *is_rsn = 1; + } + + if ( rsn && ( *end > ( u8 * ) ie_end || rsn >= *end || + *( u16 * ) rsn != IEEE80211_RSN_VERSION ) ) { + DBG ( "RSN detect: malformed RSN IE or unknown " + "version, keep trying\n" ); + rsn = NULL; + } + + if ( rsn ) + break; + + ie = ieee80211_next_ie ( ie, ie_end ); + } + + if ( ! ie ) { + DBG ( "RSN detect: no RSN IE found\n" ); + return NULL; + } + + return rsn; +} + + +/** + * Detect crypto and AKM types from RSN information element + * + * @v is_rsn If TRUE, IE is a new-style RSN information element + * @v start Pointer to first byte of @a version field + * @v end Pointer to first byte not in the RSN IE + * @ret secprot Security handshaking protocol used by network + * @ret crypt Cryptosystem used by network + * @ret rc Return status code + * + * If the IE cannot be parsed, returns an error indication and leaves + * @a secprot and @a crypt unchanged. + */ +int sec80211_detect_ie ( int is_rsn, u8 *start, u8 *end, + enum net80211_security_proto *secprot, + enum net80211_crypto_alg *crypt ) +{ + enum net80211_security_proto sp; + enum net80211_crypto_alg cr; + struct descriptor_map *map; + u8 *rsn = start; + + /* Set some defaults */ + cr = ( is_rsn ? NET80211_CRYPT_CCMP : NET80211_CRYPT_TKIP ); + sp = NET80211_SECPROT_EAP; + + rsn += 2; /* version - already checked */ + rsn += 4; /* group cipher - we don't use it here */ + + if ( rsn >= end ) + goto done; + + /* Pick crypto algorithm */ + map = rsn_pick_desc ( &rsn, end, rsn_cipher_map, + table_start ( NET80211_CRYPTOS ), + table_end ( NET80211_CRYPTOS ) ); + if ( ! map ) + goto invalid_rsn; + + cr = map->net80211_type; + + if ( rsn >= end ) + goto done; + + /* Pick handshaking algorithm */ + map = rsn_pick_desc ( &rsn, end, rsn_akm_map, + table_start ( NET80211_HANDSHAKERS ), + table_end ( NET80211_HANDSHAKERS ) ); + if ( ! map ) + goto invalid_rsn; + + sp = map->net80211_type; + + done: + DBG ( "RSN detect: OK, crypto type %d, secprot type %d\n", cr, sp ); + *secprot = sp; + *crypt = cr; + return 0; + + invalid_rsn: + DBG ( "RSN detect: invalid RSN IE\n" ); + return -EINVAL; +} + + +/** + * Detect the cryptosystem and handshaking protocol used by an 802.11 network + * + * @v iob I/O buffer containing beacon frame + * @ret secprot Security handshaking protocol used by network + * @ret crypt Cryptosystem used by network + * @ret rc Return status code + * + * This function uses weak linkage, as it must be called from generic + * contexts but should only be linked in if some encryption is + * supported; you must test its address against @c NULL before calling + * it. If it does not exist, any network with the PRIVACY bit set in + * beacon->capab should be considered unknown. + */ +int _sec80211_detect ( struct io_buffer *iob, + enum net80211_security_proto *secprot, + enum net80211_crypto_alg *crypt ) +{ + struct ieee80211_frame *hdr = iob->data; + struct ieee80211_beacon *beacon = + ( struct ieee80211_beacon * ) hdr->data; + u8 *rsn, *rsn_end; + int is_rsn, rc; + + *crypt = NET80211_CRYPT_UNKNOWN; + *secprot = NET80211_SECPROT_UNKNOWN; + + /* Find RSN or WPA IE */ + if ( ! ( rsn = sec80211_find_rsn ( beacon->info_element, iob->tail, + &is_rsn, &rsn_end ) ) ) { + /* No security IE at all; either WEP or no security. */ + *secprot = NET80211_SECPROT_NONE; + + if ( beacon->capability & IEEE80211_CAPAB_PRIVACY ) + *crypt = NET80211_CRYPT_WEP; + else + *crypt = NET80211_CRYPT_NONE; + + return 0; + } + + /* Determine type of security */ + if ( ( rc = sec80211_detect_ie ( is_rsn, rsn, rsn_end, secprot, + crypt ) ) == 0 ) + return 0; + + /* If we get here, the RSN IE was invalid */ + + *crypt = NET80211_CRYPT_UNKNOWN; + *secprot = NET80211_SECPROT_UNKNOWN; + DBG ( "Failed to handle RSN IE:\n" ); + DBG_HD ( rsn, rsn_end - rsn ); + return rc; +} + + +/** + * Determine RSN descriptor for specified net80211 ID + * + * @v id net80211 ID value + * @v rsnie Whether to return a new-format (RSN IE) descriptor + * @v map Map to use in translation + * @ret desc RSN descriptor, or 0 on error + * + * If @a rsnie is false, returns an old-format (WPA vendor IE) + * descriptor. + */ +static u32 rsn_get_desc ( unsigned id, int rsnie, struct descriptor_map *map ) +{ + u32 vendor = ( rsnie ? IEEE80211_RSN_OUI : IEEE80211_WPA_OUI ); + + for ( ; map->oui_type != END_MAGIC; map++ ) { + if ( map->net80211_type == id ) + return map->oui_type | vendor; + } + + return 0; +} + +/** + * Determine RSN descriptor for specified net80211 cryptosystem number + * + * @v crypt Cryptosystem number + * @v rsnie Whether to return a new-format (RSN IE) descriptor + * @ret desc RSN descriptor + * + * If @a rsnie is false, returns an old-format (WPA vendor IE) + * descriptor. + */ +u32 sec80211_rsn_get_crypto_desc ( enum net80211_crypto_alg crypt, int rsnie ) +{ + return rsn_get_desc ( crypt, rsnie, rsn_cipher_map ); +} + +/** + * Determine RSN descriptor for specified net80211 handshaker number + * + * @v secprot Handshaker number + * @v rsnie Whether to return a new-format (RSN IE) descriptor + * @ret desc RSN descriptor + * + * If @a rsnie is false, returns an old-format (WPA vendor IE) + * descriptor. + */ +u32 sec80211_rsn_get_akm_desc ( enum net80211_security_proto secprot, + int rsnie ) +{ + return rsn_get_desc ( secprot, rsnie, rsn_akm_map ); +} + +/** + * Determine net80211 cryptosystem number from RSN descriptor + * + * @v desc RSN descriptor + * @ret crypt net80211 cryptosystem enumeration value + */ +enum net80211_crypto_alg sec80211_rsn_get_net80211_crypt ( u32 desc ) +{ + struct descriptor_map *map = rsn_cipher_map; + + for ( ; map->oui_type != END_MAGIC; map++ ) { + if ( map->oui_type == ( desc & OUI_TYPE_MASK ) ) + break; + } + + return map->net80211_type; +} diff --git a/gpxe/src/net/80211/wep.c b/gpxe/src/net/80211/wep.c new file mode 100644 index 00000000..1c37e0c3 --- /dev/null +++ b/gpxe/src/net/80211/wep.c @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <gpxe/net80211.h> +#include <gpxe/sec80211.h> +#include <gpxe/crypto.h> +#include <gpxe/arc4.h> +#include <gpxe/crc32.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +/** @file + * + * The WEP wireless encryption method (insecure!) + * + * The data field in a WEP-encrypted packet contains a 3-byte + * initialisation vector, one-byte Key ID field (only the bottom two + * bits are ever used), encrypted data, and a 4-byte encrypted CRC of + * the plaintext data, called the ICV. To decrypt it, the IV is + * prepended to the shared key and the data stream (including ICV) is + * run through the ARC4 stream cipher; if the ICV matches a CRC32 + * calculated on the plaintext, the packet is valid. + * + * For efficiency and code-size reasons, this file assumes it is + * running on a little-endian machine. + */ + +/** Length of WEP initialisation vector */ +#define WEP_IV_LEN 3 + +/** Length of WEP key ID byte */ +#define WEP_KID_LEN 1 + +/** Length of WEP ICV checksum */ +#define WEP_ICV_LEN 4 + +/** Maximum length of WEP key */ +#define WEP_MAX_KEY 16 + +/** Amount of data placed before the encrypted bytes */ +#define WEP_HEADER_LEN 4 + +/** Amount of data placed after the encrypted bytes */ +#define WEP_TRAILER_LEN 4 + +/** Total WEP overhead bytes */ +#define WEP_OVERHEAD 8 + +/** Context for WEP encryption and decryption */ +struct wep_ctx +{ + /** Encoded WEP key + * + * The actual key bytes are stored beginning at offset 3, to + * leave room for easily inserting the IV before a particular + * operation. + */ + u8 key[WEP_IV_LEN + WEP_MAX_KEY]; + + /** Length of WEP key (not including IV bytes) */ + int keylen; + + /** ARC4 context */ + struct arc4_ctx arc4; +}; + +/** + * Initialize WEP algorithm + * + * @v crypto 802.11 cryptographic algorithm + * @v key WEP key to use + * @v keylen Length of WEP key + * @v rsc Initial receive sequence counter (unused) + * @ret rc Return status code + * + * Standard key lengths are 5 and 13 bytes; 16-byte keys are + * occasionally supported as an extension to the standard. + */ +static int wep_init ( struct net80211_crypto *crypto, const void *key, + int keylen, const void *rsc __unused ) +{ + struct wep_ctx *ctx = crypto->priv; + + ctx->keylen = ( keylen > WEP_MAX_KEY ? WEP_MAX_KEY : keylen ); + memcpy ( ctx->key + WEP_IV_LEN, key, ctx->keylen ); + + return 0; +} + +/** + * Encrypt packet using WEP + * + * @v crypto 802.11 cryptographic algorithm + * @v iob I/O buffer of plaintext packet + * @ret eiob Newly allocated I/O buffer for encrypted packet, or NULL + * + * If memory allocation fails, @c NULL is returned. + */ +static struct io_buffer * wep_encrypt ( struct net80211_crypto *crypto, + struct io_buffer *iob ) +{ + struct wep_ctx *ctx = crypto->priv; + struct io_buffer *eiob; + struct ieee80211_frame *hdr; + const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN; + int datalen = iob_len ( iob ) - hdrlen; + int newlen = hdrlen + datalen + WEP_OVERHEAD; + u32 iv, icv; + + eiob = alloc_iob ( newlen ); + if ( ! eiob ) + return NULL; + + memcpy ( iob_put ( eiob, hdrlen ), iob->data, hdrlen ); + hdr = eiob->data; + hdr->fc |= IEEE80211_FC_PROTECTED; + + /* Calculate IV, put it in the header (with key ID byte = 0), and + set it up at the start of the encryption key. */ + iv = random() & 0xffffff; /* IV in bottom 3 bytes, top byte = KID = 0 */ + memcpy ( iob_put ( eiob, WEP_HEADER_LEN ), &iv, WEP_HEADER_LEN ); + memcpy ( ctx->key, &iv, WEP_IV_LEN ); + + /* Encrypt the data using RC4 */ + cipher_setkey ( &arc4_algorithm, &ctx->arc4, ctx->key, + ctx->keylen + WEP_IV_LEN ); + cipher_encrypt ( &arc4_algorithm, &ctx->arc4, iob->data + hdrlen, + iob_put ( eiob, datalen ), datalen ); + + /* Add ICV */ + icv = ~crc32_le ( ~0, iob->data + hdrlen, datalen ); + cipher_encrypt ( &arc4_algorithm, &ctx->arc4, &icv, + iob_put ( eiob, WEP_ICV_LEN ), WEP_ICV_LEN ); + + return eiob; +} + +/** + * Decrypt packet using WEP + * + * @v crypto 802.11 cryptographic algorithm + * @v eiob I/O buffer of encrypted packet + * @ret iob Newly allocated I/O buffer for plaintext packet, or NULL + * + * If a consistency check for the decryption fails (usually indicating + * an invalid key), @c NULL is returned. + */ +static struct io_buffer * wep_decrypt ( struct net80211_crypto *crypto, + struct io_buffer *eiob ) +{ + struct wep_ctx *ctx = crypto->priv; + struct io_buffer *iob; + struct ieee80211_frame *hdr; + const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN; + int datalen = iob_len ( eiob ) - hdrlen - WEP_OVERHEAD; + int newlen = hdrlen + datalen; + u32 iv, icv, crc; + + iob = alloc_iob ( newlen ); + if ( ! iob ) + return NULL; + + memcpy ( iob_put ( iob, hdrlen ), eiob->data, hdrlen ); + hdr = iob->data; + hdr->fc &= ~IEEE80211_FC_PROTECTED; + + /* Strip off IV and use it to initialize cryptosystem */ + memcpy ( &iv, eiob->data + hdrlen, 4 ); + iv &= 0xffffff; /* ignore key ID byte */ + memcpy ( ctx->key, &iv, WEP_IV_LEN ); + + /* Decrypt the data using RC4 */ + cipher_setkey ( &arc4_algorithm, &ctx->arc4, ctx->key, + ctx->keylen + WEP_IV_LEN ); + cipher_decrypt ( &arc4_algorithm, &ctx->arc4, eiob->data + hdrlen + + WEP_HEADER_LEN, iob_put ( iob, datalen ), datalen ); + + /* Strip off ICV and verify it */ + cipher_decrypt ( &arc4_algorithm, &ctx->arc4, eiob->data + hdrlen + + WEP_HEADER_LEN + datalen, &icv, WEP_ICV_LEN ); + crc = ~crc32_le ( ~0, iob->data + hdrlen, datalen ); + if ( crc != icv ) { + DBGC ( crypto, "WEP %p CRC mismatch: expect %08x, get %08x\n", + crypto, icv, crc ); + free_iob ( iob ); + return NULL; + } + return iob; +} + +/** WEP cryptosystem for 802.11 */ +struct net80211_crypto wep_crypto __net80211_crypto = { + .algorithm = NET80211_CRYPT_WEP, + .init = wep_init, + .encrypt = wep_encrypt, + .decrypt = wep_decrypt, + .priv_len = sizeof ( struct wep_ctx ), +}; + +/** + * Initialize trivial 802.11 security handshaker + * + * @v dev 802.11 device + * @v ctx Security handshaker + * + * This simply fetches a WEP key from netX/key, and if it exists, + * installs WEP cryptography on the 802.11 device. No real handshaking + * is performed. + */ +static int trivial_init ( struct net80211_device *dev ) +{ + u8 key[WEP_MAX_KEY]; /* support up to 128-bit keys */ + int len; + int rc; + + if ( dev->associating && + dev->associating->crypto == NET80211_CRYPT_NONE ) + return 0; /* no crypto? OK. */ + + len = fetch_setting ( netdev_settings ( dev->netdev ), + &net80211_key_setting, key, WEP_MAX_KEY ); + + if ( len <= 0 ) { + DBGC ( dev, "802.11 %p cannot do WEP without a key\n", dev ); + return -EACCES; + } + + /* Full 128-bit keys are a nonstandard extension, but they're + utterly trivial to support, so we do. */ + if ( len != 5 && len != 13 && len != 16 ) { + DBGC ( dev, "802.11 %p invalid WEP key length %d\n", + dev, len ); + return -EINVAL; + } + + DBGC ( dev, "802.11 %p installing %d-bit WEP\n", dev, len * 8 ); + + rc = sec80211_install ( &dev->crypto, NET80211_CRYPT_WEP, key, len, + NULL ); + if ( rc < 0 ) + return rc; + + return 0; +} + +/** + * Check for key change on trivial 802.11 security handshaker + * + * @v dev 802.11 device + * @v ctx Security handshaker + */ +static int trivial_change_key ( struct net80211_device *dev ) +{ + u8 key[WEP_MAX_KEY]; + int len; + int change = 0; + + /* If going from WEP to clear, or something else to WEP, reassociate. */ + if ( ! dev->crypto || ( dev->crypto->init != wep_init ) ) + change ^= 1; + + len = fetch_setting ( netdev_settings ( dev->netdev ), + &net80211_key_setting, key, WEP_MAX_KEY ); + if ( len <= 0 ) + change ^= 1; + + /* Changing crypto type => return nonzero to reassociate. */ + if ( change ) + return -EINVAL; + + /* Going from no crypto to still no crypto => nothing to do. */ + if ( len <= 0 ) + return 0; + + /* Otherwise, reinitialise WEP with new key. */ + return wep_init ( dev->crypto, key, len, NULL ); +} + +/** Trivial 802.11 security handshaker */ +struct net80211_handshaker trivial_handshaker __net80211_handshaker = { + .protocol = NET80211_SECPROT_NONE, + .init = trivial_init, + .change_key = trivial_change_key, + .priv_len = 0, +}; diff --git a/gpxe/src/net/80211/wpa.c b/gpxe/src/net/80211/wpa.c new file mode 100644 index 00000000..9bac8fe7 --- /dev/null +++ b/gpxe/src/net/80211/wpa.c @@ -0,0 +1,973 @@ +/* + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <gpxe/net80211.h> +#include <gpxe/sec80211.h> +#include <gpxe/wpa.h> +#include <gpxe/eapol.h> +#include <gpxe/crypto.h> +#include <gpxe/arc4.h> +#include <gpxe/crc32.h> +#include <gpxe/sha1.h> +#include <gpxe/hmac.h> +#include <gpxe/list.h> +#include <gpxe/ethernet.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +/** @file + * + * Handler for the aspects of WPA handshaking that are independent of + * 802.1X/PSK or TKIP/CCMP; this mostly involves the 4-Way Handshake. + */ + +/** List of WPA contexts in active use. */ +struct list_head wpa_contexts = LIST_HEAD_INIT ( wpa_contexts ); + + +/** + * Return an error code and deauthenticate + * + * @v ctx WPA common context + * @v rc Return status code + * @ret rc The passed return status code + */ +static int wpa_fail ( struct wpa_common_ctx *ctx, int rc ) +{ + net80211_deauthenticate ( ctx->dev, rc ); + return rc; +} + + +/** + * Find a cryptosystem handler structure from a crypto ID + * + * @v crypt Cryptosystem ID + * @ret crypto Cryptosystem handler structure + * + * If support for @a crypt is not compiled in to gPXE, or if @a crypt + * is NET80211_CRYPT_UNKNOWN, returns @c NULL. + */ +static struct net80211_crypto * +wpa_find_cryptosystem ( enum net80211_crypto_alg crypt ) +{ + struct net80211_crypto *crypto; + + for_each_table_entry ( crypto, NET80211_CRYPTOS ) { + if ( crypto->algorithm == crypt ) + return crypto; + } + + return NULL; +} + + +/** + * Find WPA key integrity and encryption handler from key version field + * + * @v ver Version bits of EAPOL-Key info field + * @ret kie Key integrity and encryption handler + */ +struct wpa_kie * wpa_find_kie ( int version ) +{ + struct wpa_kie *kie; + + for_each_table_entry ( kie, WPA_KIES ) { + if ( kie->version == version ) + return kie; + } + + return NULL; +} + + +/** + * Construct RSN or WPA information element + * + * @v dev 802.11 device + * @ret ie_ret RSN or WPA information element + * @ret rc Return status code + * + * This function allocates, fills, and returns a RSN or WPA + * information element suitable for including in an association + * request frame to the network identified by @c dev->associating. + * If it is impossible to construct an information element consistent + * with gPXE's capabilities that is compatible with that network, or + * if none should be sent because that network's beacon included no + * security information, returns an error indication and leaves + * @a ie_ret unchanged. + * + * The returned IE will be of the same type (RSN or WPA) as was + * included in the beacon for the network it is destined for. + */ +int wpa_make_rsn_ie ( struct net80211_device *dev, union ieee80211_ie **ie_ret ) +{ + u8 *rsn, *rsn_end; + int is_rsn; + u32 group_cipher; + enum net80211_crypto_alg gcrypt; + int ie_len; + u8 *iep; + struct ieee80211_ie_rsn *ie; + struct ieee80211_frame *hdr; + struct ieee80211_beacon *beacon; + + if ( ! dev->associating ) { + DBG ( "WPA: Can't make RSN IE for a non-associating device\n" ); + return -EINVAL; + } + + hdr = dev->associating->beacon->data; + beacon = ( struct ieee80211_beacon * ) hdr->data; + rsn = sec80211_find_rsn ( beacon->info_element, + dev->associating->beacon->tail, &is_rsn, + &rsn_end ); + if ( ! rsn ) { + DBG ( "WPA: Can't make RSN IE when we didn't get one\n" ); + return -EINVAL; + } + + rsn += 2; /* skip version */ + group_cipher = *( u32 * ) rsn; + gcrypt = sec80211_rsn_get_net80211_crypt ( group_cipher ); + + if ( ! wpa_find_cryptosystem ( gcrypt ) || + ! wpa_find_cryptosystem ( dev->associating->crypto ) ) { + DBG ( "WPA: No support for (GC:%d, PC:%d)\n", + gcrypt, dev->associating->crypto ); + return -ENOTSUP; + } + + /* Everything looks good - make our IE. */ + + /* WPA IEs need 4 more bytes for the OUI+type */ + ie_len = ieee80211_rsn_size ( 1, 1, 0, is_rsn ) + ( 4 * ! is_rsn ); + iep = malloc ( ie_len ); + if ( ! iep ) + return -ENOMEM; + + *ie_ret = ( union ieee80211_ie * ) iep; + + /* Store ID and length bytes. */ + *iep++ = ( is_rsn ? IEEE80211_IE_RSN : IEEE80211_IE_VENDOR ); + *iep++ = ie_len - 2; + + /* Store OUI+type for WPA IEs. */ + if ( ! is_rsn ) { + *( u32 * ) iep = IEEE80211_WPA_OUI_VEN; + iep += 4; + } + + /* If this is a WPA IE, the id and len bytes in the + ieee80211_ie_rsn structure will not be valid, but by doing + the cast we can fill all the other fields much more + readily. */ + + ie = ( struct ieee80211_ie_rsn * ) ( iep - 2 ); + ie->version = IEEE80211_RSN_VERSION; + ie->group_cipher = group_cipher; + ie->pairwise_count = 1; + ie->pairwise_cipher[0] = + sec80211_rsn_get_crypto_desc ( dev->associating->crypto, + is_rsn ); + ie->akm_count = 1; + ie->akm_list[0] = + sec80211_rsn_get_akm_desc ( dev->associating->handshaking, + is_rsn ); + if ( is_rsn ) { + ie->rsn_capab = 0; + ie->pmkid_count = 0; + } + + return 0; +} + + +/** + * Set up generic WPA support to handle 4-Way Handshake + * + * @v dev 802.11 device + * @v ctx WPA common context + * @v pmk Pairwise Master Key to use for session + * @v pmk_len Length of PMK, almost always 32 + * @ret rc Return status code + */ +int wpa_start ( struct net80211_device *dev, struct wpa_common_ctx *ctx, + const void *pmk, size_t pmk_len ) +{ + struct io_buffer *iob; + struct ieee80211_frame *hdr; + struct ieee80211_beacon *beacon; + u8 *ap_rsn_ie = NULL, *ap_rsn_ie_end; + + if ( ! dev->rsn_ie || ! dev->associating ) + return -EINVAL; + + ctx->dev = dev; + memcpy ( ctx->pmk, pmk, ctx->pmk_len = pmk_len ); + ctx->state = WPA_READY; + ctx->replay = ~0ULL; + + iob = dev->associating->beacon; + hdr = iob->data; + beacon = ( struct ieee80211_beacon * ) hdr->data; + ap_rsn_ie = sec80211_find_rsn ( beacon->info_element, iob->tail, + &ctx->ap_rsn_is_rsn, &ap_rsn_ie_end ); + if ( ap_rsn_ie ) { + ctx->ap_rsn_ie = malloc ( ap_rsn_ie_end - ap_rsn_ie ); + if ( ! ctx->ap_rsn_ie ) + return -ENOMEM; + memcpy ( ctx->ap_rsn_ie, ap_rsn_ie, ap_rsn_ie_end - ap_rsn_ie ); + ctx->ap_rsn_ie_len = ap_rsn_ie_end - ap_rsn_ie; + } else { + return -ENOENT; + } + + ctx->crypt = dev->associating->crypto; + ctx->gcrypt = NET80211_CRYPT_UNKNOWN; + + list_add_tail ( &ctx->list, &wpa_contexts ); + return 0; +} + + +/** + * Disable handling of received WPA handshake frames + * + * @v dev 802.11 device + */ +void wpa_stop ( struct net80211_device *dev ) +{ + struct wpa_common_ctx *ctx, *tmp; + + list_for_each_entry_safe ( ctx, tmp, &wpa_contexts, list ) { + if ( ctx->dev == dev ) { + free ( ctx->ap_rsn_ie ); + ctx->ap_rsn_ie = NULL; + list_del ( &ctx->list ); + } + } +} + + +/** + * Check PMKID consistency + * + * @v ctx WPA common context + * @v pmkid PMKID to check against (16 bytes long) + * @ret rc Zero if they match, or a negative error code if not + */ +int wpa_check_pmkid ( struct wpa_common_ctx *ctx, const u8 *pmkid ) +{ + u8 sha1_ctx[SHA1_CTX_SIZE]; + u8 my_pmkid[SHA1_SIZE]; + u8 pmk[ctx->pmk_len]; + size_t pmk_len; + struct { + char name[8]; + u8 aa[ETH_ALEN]; + u8 spa[ETH_ALEN]; + } __attribute__ (( packed )) pmkid_data; + + memcpy ( pmk, ctx->pmk, ctx->pmk_len ); + pmk_len = ctx->pmk_len; + + memcpy ( pmkid_data.name, "PMK Name", 8 ); + memcpy ( pmkid_data.aa, ctx->dev->bssid, ETH_ALEN ); + memcpy ( pmkid_data.spa, ctx->dev->netdev->ll_addr, ETH_ALEN ); + + hmac_init ( &sha1_algorithm, sha1_ctx, pmk, &pmk_len ); + hmac_update ( &sha1_algorithm, sha1_ctx, &pmkid_data, + sizeof ( pmkid_data ) ); + hmac_final ( &sha1_algorithm, sha1_ctx, pmk, &pmk_len, my_pmkid ); + + if ( memcmp ( my_pmkid, pmkid, WPA_PMKID_LEN ) != 0 ) + return -EACCES; + + return 0; +} + + +/** + * Derive pairwise transient key + * + * @v ctx WPA common context + */ +static void wpa_derive_ptk ( struct wpa_common_ctx *ctx ) +{ + struct { + u8 mac1[ETH_ALEN]; + u8 mac2[ETH_ALEN]; + u8 nonce1[WPA_NONCE_LEN]; + u8 nonce2[WPA_NONCE_LEN]; + } __attribute__ (( packed )) ptk_data; + + /* The addresses and nonces are stored in numerical order (!) */ + + if ( memcmp ( ctx->dev->netdev->ll_addr, ctx->dev->bssid, + ETH_ALEN ) < 0 ) { + memcpy ( ptk_data.mac1, ctx->dev->netdev->ll_addr, ETH_ALEN ); + memcpy ( ptk_data.mac2, ctx->dev->bssid, ETH_ALEN ); + } else { + memcpy ( ptk_data.mac1, ctx->dev->bssid, ETH_ALEN ); + memcpy ( ptk_data.mac2, ctx->dev->netdev->ll_addr, ETH_ALEN ); + } + + if ( memcmp ( ctx->Anonce, ctx->Snonce, WPA_NONCE_LEN ) < 0 ) { + memcpy ( ptk_data.nonce1, ctx->Anonce, WPA_NONCE_LEN ); + memcpy ( ptk_data.nonce2, ctx->Snonce, WPA_NONCE_LEN ); + } else { + memcpy ( ptk_data.nonce1, ctx->Snonce, WPA_NONCE_LEN ); + memcpy ( ptk_data.nonce2, ctx->Anonce, WPA_NONCE_LEN ); + } + + DBGC2 ( ctx, "WPA %p A1 %s, A2 %s\n", ctx, eth_ntoa ( ptk_data.mac1 ), + eth_ntoa ( ptk_data.mac2 ) ); + DBGC2 ( ctx, "WPA %p Nonce1, Nonce2:\n", ctx ); + DBGC2_HD ( ctx, ptk_data.nonce1, WPA_NONCE_LEN ); + DBGC2_HD ( ctx, ptk_data.nonce2, WPA_NONCE_LEN ); + + prf_sha1 ( ctx->pmk, ctx->pmk_len, + "Pairwise key expansion", + &ptk_data, sizeof ( ptk_data ), + &ctx->ptk, sizeof ( ctx->ptk ) ); + + DBGC2 ( ctx, "WPA %p PTK:\n", ctx ); + DBGC2_HD ( ctx, &ctx->ptk, sizeof ( ctx->ptk ) ); +} + + +/** + * Install pairwise transient key + * + * @v ctx WPA common context + * @v len Key length (16 for CCMP, 32 for TKIP) + * @ret rc Return status code + */ +static inline int wpa_install_ptk ( struct wpa_common_ctx *ctx, int len ) +{ + DBGC ( ctx, "WPA %p: installing %d-byte pairwise transient key\n", + ctx, len ); + DBGC2_HD ( ctx, &ctx->ptk.tk, len ); + + return sec80211_install ( &ctx->dev->crypto, ctx->crypt, + &ctx->ptk.tk, len, NULL ); +} + +/** + * Install group transient key + * + * @v ctx WPA common context + * @v len Key length (16 for CCMP, 32 for TKIP) + * @v rsc Receive sequence counter field in EAPOL-Key packet + * @ret rc Return status code + */ +static inline int wpa_install_gtk ( struct wpa_common_ctx *ctx, int len, + const void *rsc ) +{ + DBGC ( ctx, "WPA %p: installing %d-byte group transient key\n", + ctx, len ); + DBGC2_HD ( ctx, &ctx->gtk.tk, len ); + + return sec80211_install ( &ctx->dev->gcrypto, ctx->gcrypt, + &ctx->gtk.tk, len, rsc ); +} + +/** + * Search for group transient key, and install it if found + * + * @v ctx WPA common context + * @v ie Pointer to first IE in key data field + * @v ie_end Pointer to first byte not in key data field + * @v rsc Receive sequence counter field in EAPOL-Key packet + * @ret rc Return status code + */ +static int wpa_maybe_install_gtk ( struct wpa_common_ctx *ctx, + union ieee80211_ie *ie, void *ie_end, + const void *rsc ) +{ + struct wpa_kde *kde; + + if ( ! ieee80211_ie_bound ( ie, ie_end ) ) + return -ENOENT; + + while ( ie ) { + if ( ie->id == IEEE80211_IE_VENDOR && + ie->vendor.oui == WPA_KDE_GTK ) + break; + + ie = ieee80211_next_ie ( ie, ie_end ); + } + + if ( ! ie ) + return -ENOENT; + + if ( ie->len - 6u > sizeof ( ctx->gtk.tk ) ) { + DBGC ( ctx, "WPA %p: GTK KDE is too long (%d bytes, max %d)\n", + ctx, ie->len - 4, sizeof ( ctx->gtk.tk ) ); + return -EINVAL; + } + + /* XXX We ignore key ID for now. */ + kde = ( struct wpa_kde * ) ie; + memcpy ( &ctx->gtk.tk, &kde->gtk_encap.gtk, kde->len - 6 ); + + return wpa_install_gtk ( ctx, kde->len - 6, rsc ); +} + + +/** + * Allocate I/O buffer for construction of outgoing EAPOL-Key frame + * + * @v kdlen Maximum number of bytes in the Key Data field + * @ret iob Newly allocated I/O buffer + * + * The returned buffer will have space reserved for the link-layer and + * EAPOL headers, and will have @c iob->tail pointing to the start of + * the Key Data field. Thus, it is necessary to use iob_put() in + * filling the Key Data. + */ +static struct io_buffer * wpa_alloc_frame ( int kdlen ) +{ + struct io_buffer *ret = alloc_iob ( sizeof ( struct eapol_key_pkt ) + + kdlen + EAPOL_HDR_LEN + + MAX_LL_HEADER_LEN ); + if ( ! ret ) + return NULL; + + iob_reserve ( ret, MAX_LL_HEADER_LEN + EAPOL_HDR_LEN ); + memset ( iob_put ( ret, sizeof ( struct eapol_key_pkt ) ), 0, + sizeof ( struct eapol_key_pkt ) ); + + return ret; +} + + +/** + * Send EAPOL-Key packet + * + * @v iob I/O buffer, with sufficient headroom for headers + * @v dev 802.11 device + * @v kie Key integrity and encryption handler + * @v is_rsn If TRUE, handshake uses new RSN format + * @ret rc Return status code + * + * If a KIE is specified, the MIC will be filled in before transmission. + */ +static int wpa_send_eapol ( struct io_buffer *iob, struct wpa_common_ctx *ctx, + struct wpa_kie *kie ) +{ + struct eapol_key_pkt *pkt = iob->data; + struct eapol_frame *eapol = iob_push ( iob, EAPOL_HDR_LEN ); + + pkt->info = htons ( pkt->info ); + pkt->keysize = htons ( pkt->keysize ); + pkt->datalen = htons ( pkt->datalen ); + pkt->replay = cpu_to_be64 ( pkt->replay ); + eapol->version = EAPOL_THIS_VERSION; + eapol->type = EAPOL_TYPE_KEY; + eapol->length = htons ( iob->tail - iob->data - sizeof ( *eapol ) ); + + memset ( pkt->mic, 0, sizeof ( pkt->mic ) ); + if ( kie ) + kie->mic ( &ctx->ptk.kck, eapol, EAPOL_HDR_LEN + + sizeof ( *pkt ) + ntohs ( pkt->datalen ), + pkt->mic ); + + return net_tx ( iob, ctx->dev->netdev, &eapol_protocol, + ctx->dev->bssid ); +} + + +/** + * Send second frame in 4-Way Handshake + * + * @v ctx WPA common context + * @v pkt First frame, to which this is a reply + * @v is_rsn If TRUE, handshake uses new RSN format + * @v kie Key integrity and encryption handler + * @ret rc Return status code + */ +static int wpa_send_2_of_4 ( struct wpa_common_ctx *ctx, + struct eapol_key_pkt *pkt, int is_rsn, + struct wpa_kie *kie ) +{ + struct io_buffer *iob = wpa_alloc_frame ( ctx->dev->rsn_ie->len + 2 ); + struct eapol_key_pkt *npkt; + + if ( ! iob ) + return -ENOMEM; + + npkt = iob->data; + memcpy ( npkt, pkt, sizeof ( *pkt ) ); + npkt->info &= ~EAPOL_KEY_INFO_KEY_ACK; + npkt->info |= EAPOL_KEY_INFO_KEY_MIC; + if ( is_rsn ) + npkt->keysize = 0; + memcpy ( npkt->nonce, ctx->Snonce, sizeof ( npkt->nonce ) ); + npkt->datalen = ctx->dev->rsn_ie->len + 2; + memcpy ( iob_put ( iob, npkt->datalen ), ctx->dev->rsn_ie, + npkt->datalen ); + + DBGC ( ctx, "WPA %p: sending 2/4\n", ctx ); + + return wpa_send_eapol ( iob, ctx, kie ); +} + + +/** + * Handle receipt of first frame in 4-Way Handshake + * + * @v ctx WPA common context + * @v pkt EAPOL-Key packet + * @v is_rsn If TRUE, frame uses new RSN format + * @v kie Key integrity and encryption handler + * @ret rc Return status code + */ +static int wpa_handle_1_of_4 ( struct wpa_common_ctx *ctx, + struct eapol_key_pkt *pkt, int is_rsn, + struct wpa_kie *kie ) +{ + int rc; + + if ( ctx->state == WPA_WAITING ) + return -EINVAL; + + ctx->state = WPA_WORKING; + memcpy ( ctx->Anonce, pkt->nonce, sizeof ( ctx->Anonce ) ); + if ( ! ctx->have_Snonce ) { + get_random_bytes ( ctx->Snonce, sizeof ( ctx->Snonce ) ); + ctx->have_Snonce = 1; + } + + if ( is_rsn && pkt->datalen ) { + union ieee80211_ie *ie = ( union ieee80211_ie * ) pkt->data; + void *ie_end = pkt->data + pkt->datalen; + + if ( ! ieee80211_ie_bound ( ie, ie_end ) ) { + DBGC ( ctx, "WPA %p: malformed PMKID KDE\n", ctx ); + return wpa_fail ( ctx, -EINVAL ); + } + + while ( ie ) { + if ( ie->id == IEEE80211_IE_VENDOR && + ie->vendor.oui == WPA_KDE_PMKID ) { + rc = wpa_check_pmkid ( ctx, ie->vendor.data ); + if ( rc < 0 ) { + DBGC ( ctx, "WPA %p ALERT: PMKID " + "mismatch in 1/4\n", ctx ); + return wpa_fail ( ctx, rc ); + } + } + + ie = ieee80211_next_ie ( ie, ie_end ); + } + } + + DBGC ( ctx, "WPA %p: received 1/4, looks OK\n", ctx ); + + wpa_derive_ptk ( ctx ); + + return wpa_send_2_of_4 ( ctx, pkt, is_rsn, kie ); +} + + +/** + * Send fourth frame in 4-Way Handshake, or second in Group Key Handshake + * + * @v ctx WPA common context + * @v pkt EAPOL-Key packet for frame to which we're replying + * @v is_rsn If TRUE, frame uses new RSN format + * @v kie Key integrity and encryption handler + * @ret rc Return status code + */ +static int wpa_send_final ( struct wpa_common_ctx *ctx, + struct eapol_key_pkt *pkt, int is_rsn, + struct wpa_kie *kie ) +{ + struct io_buffer *iob = wpa_alloc_frame ( 0 ); + struct eapol_key_pkt *npkt; + + if ( ! iob ) + return -ENOMEM; + + npkt = iob->data; + memcpy ( npkt, pkt, sizeof ( *pkt ) ); + npkt->info &= ~( EAPOL_KEY_INFO_KEY_ACK | EAPOL_KEY_INFO_INSTALL | + EAPOL_KEY_INFO_KEY_ENC ); + if ( is_rsn ) + npkt->keysize = 0; + memset ( npkt->nonce, 0, sizeof ( npkt->nonce ) ); + memset ( npkt->iv, 0, sizeof ( npkt->iv ) ); + npkt->datalen = 0; + + if ( npkt->info & EAPOL_KEY_INFO_TYPE ) + DBGC ( ctx, "WPA %p: sending 4/4\n", ctx ); + else + DBGC ( ctx, "WPA %p: sending 2/2\n", ctx ); + + return wpa_send_eapol ( iob, ctx, kie ); + +} + + +/** + * Handle receipt of third frame in 4-Way Handshake + * + * @v ctx WPA common context + * @v pkt EAPOL-Key packet + * @v is_rsn If TRUE, frame uses new RSN format + * @v kie Key integrity and encryption handler + * @ret rc Return status code + */ +static int wpa_handle_3_of_4 ( struct wpa_common_ctx *ctx, + struct eapol_key_pkt *pkt, int is_rsn, + struct wpa_kie *kie ) +{ + int rc; + u8 *this_rsn, *this_rsn_end; + u8 *new_rsn, *new_rsn_end; + int this_is_rsn, new_is_rsn; + + if ( ctx->state == WPA_WAITING ) + return -EINVAL; + + ctx->state = WPA_WORKING; + + /* Check nonce */ + if ( memcmp ( ctx->Anonce, pkt->nonce, WPA_NONCE_LEN ) != 0 ) { + DBGC ( ctx, "WPA %p ALERT: nonce mismatch in 3/4\n", ctx ); + return wpa_fail ( ctx, -EACCES ); + } + + /* Check RSN IE */ + this_rsn = sec80211_find_rsn ( ( union ieee80211_ie * ) pkt->data, + pkt->data + pkt->datalen, + &this_is_rsn, &this_rsn_end ); + if ( this_rsn ) + new_rsn = sec80211_find_rsn ( ( union ieee80211_ie * ) + this_rsn_end, + pkt->data + pkt->datalen, + &new_is_rsn, &new_rsn_end ); + else + new_rsn = NULL; + + if ( ! ctx->ap_rsn_ie || ! this_rsn || + ctx->ap_rsn_ie_len != ( this_rsn_end - this_rsn ) || + ctx->ap_rsn_is_rsn != this_is_rsn || + memcmp ( ctx->ap_rsn_ie, this_rsn, ctx->ap_rsn_ie_len ) != 0 ) { + DBGC ( ctx, "WPA %p ALERT: RSN mismatch in 3/4\n", ctx ); + DBGC2 ( ctx, "WPA %p RSNs (in 3/4, in beacon):\n", ctx ); + DBGC2_HD ( ctx, this_rsn, this_rsn_end - this_rsn ); + DBGC2_HD ( ctx, ctx->ap_rsn_ie, ctx->ap_rsn_ie_len ); + return wpa_fail ( ctx, -EACCES ); + } + + /* Don't switch if they just supplied both styles of IE + simultaneously; we need two RSN IEs or two WPA IEs to + switch ciphers. They'll be immediately consecutive because + of ordering guarantees. */ + if ( new_rsn && this_is_rsn == new_is_rsn ) { + struct net80211_wlan *assoc = ctx->dev->associating; + DBGC ( ctx, "WPA %p: accommodating bait-and-switch tactics\n", + ctx ); + DBGC2 ( ctx, "WPA %p RSNs (in 3/4+beacon, new in 3/4):\n", + ctx ); + DBGC2_HD ( ctx, this_rsn, this_rsn_end - this_rsn ); + DBGC2_HD ( ctx, new_rsn, new_rsn_end - new_rsn ); + + if ( ( rc = sec80211_detect_ie ( new_is_rsn, new_rsn, + new_rsn_end, + &assoc->handshaking, + &assoc->crypto ) ) != 0 ) + DBGC ( ctx, "WPA %p: bait-and-switch invalid, staying " + "with original request\n", ctx ); + } else { + new_rsn = this_rsn; + new_is_rsn = this_is_rsn; + new_rsn_end = this_rsn_end; + } + + /* Grab group cryptosystem ID */ + ctx->gcrypt = sec80211_rsn_get_net80211_crypt ( *( u32 * ) + ( new_rsn + 2 ) ); + + /* Check for a GTK, if info field is encrypted */ + if ( pkt->info & EAPOL_KEY_INFO_KEY_ENC ) { + rc = wpa_maybe_install_gtk ( ctx, + ( union ieee80211_ie * ) pkt->data, + pkt->data + pkt->datalen, + pkt->rsc ); + if ( rc < 0 ) { + DBGC ( ctx, "WPA %p did not install GTK in 3/4: %s\n", + ctx, strerror ( rc ) ); + if ( rc != -ENOENT ) + return wpa_fail ( ctx, rc ); + } + } + + DBGC ( ctx, "WPA %p: received 3/4, looks OK\n", ctx ); + + /* Send final message */ + rc = wpa_send_final ( ctx, pkt, is_rsn, kie ); + if ( rc < 0 ) + return wpa_fail ( ctx, rc ); + + /* Install PTK */ + rc = wpa_install_ptk ( ctx, pkt->keysize ); + if ( rc < 0 ) { + DBGC ( ctx, "WPA %p failed to install PTK: %s\n", ctx, + strerror ( rc ) ); + return wpa_fail ( ctx, rc ); + } + + /* Mark us as needing a new Snonce if we rekey */ + ctx->have_Snonce = 0; + + /* Done! */ + ctx->state = WPA_SUCCESS; + return 0; +} + + +/** + * Handle receipt of first frame in Group Key Handshake + * + * @v ctx WPA common context + * @v pkt EAPOL-Key packet + * @v is_rsn If TRUE, frame uses new RSN format + * @v kie Key integrity and encryption handler + * @ret rc Return status code + */ +static int wpa_handle_1_of_2 ( struct wpa_common_ctx *ctx, + struct eapol_key_pkt *pkt, int is_rsn, + struct wpa_kie *kie ) +{ + int rc; + + /* + * WPA and RSN do this completely differently. + * + * The idea of encoding the GTK (or PMKID, or various other + * things) into a KDE that looks like an information element + * is an RSN innovation; old WPA code never encapsulates + * things like that. If it looks like an info element, it + * really is (for the WPA IE check in frames 2/4 and 3/4). The + * "key data encrypted" bit in the info field is also specific + * to RSN. + * + * So from an old WPA host, 3/4 does not contain an + * encapsulated GTK. The first frame of the GK handshake + * contains it, encrypted, but without a KDE wrapper, and with + * the key ID field (which gPXE doesn't use) shoved away in + * the reserved bits in the info field, and the TxRx bit + * stealing the Install bit's spot. + */ + + if ( is_rsn && ( pkt->info & EAPOL_KEY_INFO_KEY_ENC ) ) { + rc = wpa_maybe_install_gtk ( ctx, + ( union ieee80211_ie * ) pkt->data, + pkt->data + pkt->datalen, + pkt->rsc ); + if ( rc < 0 ) { + DBGC ( ctx, "WPA %p: failed to install GTK in 1/2: " + "%s\n", ctx, strerror ( rc ) ); + return wpa_fail ( ctx, rc ); + } + } else { + rc = kie->decrypt ( &ctx->ptk.kek, pkt->iv, pkt->data, + &pkt->datalen ); + if ( rc < 0 ) { + DBGC ( ctx, "WPA %p: failed to decrypt GTK: %s\n", + ctx, strerror ( rc ) ); + return rc; /* non-fatal */ + } + if ( pkt->datalen > sizeof ( ctx->gtk.tk ) ) { + DBGC ( ctx, "WPA %p: too much GTK data (%d > %d)\n", + ctx, pkt->datalen, sizeof ( ctx->gtk.tk ) ); + return wpa_fail ( ctx, -EINVAL ); + } + + memcpy ( &ctx->gtk.tk, pkt->data, pkt->datalen ); + wpa_install_gtk ( ctx, pkt->datalen, pkt->rsc ); + } + + DBGC ( ctx, "WPA %p: received 1/2, looks OK\n", ctx ); + + return wpa_send_final ( ctx, pkt, is_rsn, kie ); +} + + +/** + * Handle receipt of EAPOL-Key frame for WPA + * + * @v iob I/O buffer + * @v netdev Network device + * @v ll_source Source link-layer address + */ +static int eapol_key_rx ( struct io_buffer *iob, struct net_device *netdev, + const void *ll_source ) +{ + struct net80211_device *dev = net80211_get ( netdev ); + struct eapol_key_pkt *pkt = iob->data; + int is_rsn, found_ctx; + struct wpa_common_ctx *ctx; + int rc = 0; + struct wpa_kie *kie; + u8 their_mic[16], our_mic[16]; + + if ( pkt->type != EAPOL_KEY_TYPE_WPA && + pkt->type != EAPOL_KEY_TYPE_RSN ) { + DBG ( "EAPOL-Key: packet not of 802.11 type\n" ); + rc = -EINVAL; + goto drop; + } + + is_rsn = ( pkt->type == EAPOL_KEY_TYPE_RSN ); + + if ( ! dev ) { + DBG ( "EAPOL-Key: packet not from 802.11\n" ); + rc = -EINVAL; + goto drop; + } + + if ( memcmp ( dev->bssid, ll_source, ETH_ALEN ) != 0 ) { + DBG ( "EAPOL-Key: packet not from associated AP\n" ); + rc = -EINVAL; + goto drop; + } + + if ( ! ( ntohs ( pkt->info ) & EAPOL_KEY_INFO_KEY_ACK ) ) { + DBG ( "EAPOL-Key: packet sent in wrong direction\n" ); + rc = -EINVAL; + goto drop; + } + + found_ctx = 0; + list_for_each_entry ( ctx, &wpa_contexts, list ) { + if ( ctx->dev == dev ) { + found_ctx = 1; + break; + } + } + + if ( ! found_ctx ) { + DBG ( "EAPOL-Key: no WPA context to handle packet for %p\n", + dev ); + rc = -ENOENT; + goto drop; + } + + if ( ( void * ) ( pkt + 1 ) + ntohs ( pkt->datalen ) > iob->tail ) { + DBGC ( ctx, "WPA %p: packet truncated (has %d extra bytes, " + "states %d)\n", ctx, iob->tail - ( void * ) ( pkt + 1 ), + ntohs ( pkt->datalen ) ); + rc = -EINVAL; + goto drop; + } + + /* Get a handle on key integrity/encryption handler */ + kie = wpa_find_kie ( ntohs ( pkt->info ) & EAPOL_KEY_INFO_VERSION ); + if ( ! kie ) { + DBGC ( ctx, "WPA %p: no support for packet version %d\n", ctx, + ntohs ( pkt->info ) & EAPOL_KEY_INFO_VERSION ); + rc = wpa_fail ( ctx, -ENOTSUP ); + goto drop; + } + + /* Check MIC */ + if ( ntohs ( pkt->info ) & EAPOL_KEY_INFO_KEY_MIC ) { + memcpy ( their_mic, pkt->mic, sizeof ( pkt->mic ) ); + memset ( pkt->mic, 0, sizeof ( pkt->mic ) ); + kie->mic ( &ctx->ptk.kck, ( void * ) pkt - EAPOL_HDR_LEN, + EAPOL_HDR_LEN + sizeof ( *pkt ) + + ntohs ( pkt->datalen ), our_mic ); + DBGC2 ( ctx, "WPA %p MIC comparison (theirs, ours):\n", ctx ); + DBGC2_HD ( ctx, their_mic, 16 ); + DBGC2_HD ( ctx, our_mic, 16 ); + if ( memcmp ( their_mic, our_mic, sizeof ( pkt->mic ) ) != 0 ) { + DBGC ( ctx, "WPA %p: EAPOL MIC failure\n", ctx ); + goto drop; + } + } + + /* Fix byte order to local */ + pkt->info = ntohs ( pkt->info ); + pkt->keysize = ntohs ( pkt->keysize ); + pkt->datalen = ntohs ( pkt->datalen ); + pkt->replay = be64_to_cpu ( pkt->replay ); + + /* Check replay counter */ + if ( ctx->replay != ~0ULL && ctx->replay >= pkt->replay ) { + DBGC ( ctx, "WPA %p ALERT: Replay detected! " + "(%08x:%08x >= %08x:%08x)\n", ctx, + ( u32 ) ( ctx->replay >> 32 ), ( u32 ) ctx->replay, + ( u32 ) ( pkt->replay >> 32 ), ( u32 ) pkt->replay ); + rc = 0; /* ignore without error */ + goto drop; + } + ctx->replay = pkt->replay; + + /* Decrypt key data */ + if ( pkt->info & EAPOL_KEY_INFO_KEY_ENC ) { + rc = kie->decrypt ( &ctx->ptk.kek, pkt->iv, pkt->data, + &pkt->datalen ); + if ( rc < 0 ) { + DBGC ( ctx, "WPA %p: failed to decrypt packet: %s\n", + ctx, strerror ( rc ) ); + goto drop; + } + } + + /* Hand it off to appropriate handler */ + switch ( pkt->info & ( EAPOL_KEY_INFO_TYPE | + EAPOL_KEY_INFO_KEY_MIC ) ) { + case EAPOL_KEY_TYPE_PTK: + rc = wpa_handle_1_of_4 ( ctx, pkt, is_rsn, kie ); + break; + + case EAPOL_KEY_TYPE_PTK | EAPOL_KEY_INFO_KEY_MIC: + rc = wpa_handle_3_of_4 ( ctx, pkt, is_rsn, kie ); + break; + + case EAPOL_KEY_TYPE_GTK | EAPOL_KEY_INFO_KEY_MIC: + rc = wpa_handle_1_of_2 ( ctx, pkt, is_rsn, kie ); + break; + + default: + DBGC ( ctx, "WPA %p: Invalid combination of key flags %04x\n", + ctx, pkt->info ); + rc = -EINVAL; + break; + } + + drop: + free_iob ( iob ); + return rc; +} + +struct eapol_handler eapol_key_handler __eapol_handler = { + .type = EAPOL_TYPE_KEY, + .rx = eapol_key_rx, +}; + +/* WPA always needs EAPOL in order to be useful */ +REQUIRE_OBJECT ( eapol ); diff --git a/gpxe/src/net/80211/wpa_ccmp.c b/gpxe/src/net/80211/wpa_ccmp.c new file mode 100644 index 00000000..08b1e17b --- /dev/null +++ b/gpxe/src/net/80211/wpa_ccmp.c @@ -0,0 +1,528 @@ +/* + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <gpxe/net80211.h> +#include <gpxe/crypto.h> +#include <gpxe/hmac.h> +#include <gpxe/sha1.h> +#include <gpxe/aes.h> +#include <gpxe/wpa.h> +#include <byteswap.h> +#include <errno.h> + +/** @file + * + * Backend for WPA using the CCMP encryption method + */ + +/** Context for CCMP encryption and decryption */ +struct ccmp_ctx +{ + /** AES context - only ever used for encryption */ + u8 aes_ctx[AES_CTX_SIZE]; + + /** Most recently sent packet number */ + u64 tx_seq; + + /** Most recently received packet number */ + u64 rx_seq; +}; + +/** Header structure at the beginning of CCMP frame data */ +struct ccmp_head +{ + u8 pn_lo[2]; /**< Bytes 0 and 1 of packet number */ + u8 _rsvd; /**< Reserved byte */ + u8 kid; /**< Key ID and ExtIV byte */ + u8 pn_hi[4]; /**< Bytes 2-5 (2 first) of packet number */ +} __attribute__ (( packed )); + + +/** CCMP header overhead */ +#define CCMP_HEAD_LEN 8 + +/** CCMP MIC trailer overhead */ +#define CCMP_MIC_LEN 8 + +/** CCMP nonce length */ +#define CCMP_NONCE_LEN 13 + +/** CCMP nonce structure */ +struct ccmp_nonce +{ + u8 prio; /**< Packet priority, 0 for non-QoS */ + u8 a2[ETH_ALEN]; /**< Address 2 from packet header (sender) */ + u8 pn[6]; /**< Packet number */ +} __attribute__ (( packed )); + +/** CCMP additional authentication data length (for non-QoS, non-WDS frames) */ +#define CCMP_AAD_LEN 22 + +/** CCMP additional authentication data structure */ +struct ccmp_aad +{ + u16 fc; /**< Frame Control field */ + u8 a1[6]; /**< Address 1 */ + u8 a2[6]; /**< Address 2 */ + u8 a3[6]; /**< Address 3 */ + u16 seq; /**< Sequence Control field */ + /* Address 4 and QoS Control are included if present */ +} __attribute__ (( packed )); + +/** Mask for Frame Control field in AAD */ +#define CCMP_AAD_FC_MASK 0xC38F + +/** Mask for Sequence Control field in AAD */ +#define CCMP_AAD_SEQ_MASK 0x000F + + +/** + * Convert 6-byte LSB packet number to 64-bit integer + * + * @v pn Pointer to 6-byte packet number + * @ret v 64-bit integer value of @a pn + */ +static u64 pn_to_u64 ( const u8 *pn ) +{ + int i; + u64 ret = 0; + + for ( i = 5; i >= 0; i-- ) { + ret <<= 8; + ret |= pn[i]; + } + + return ret; +} + +/** + * Convert 64-bit integer to 6-byte packet number + * + * @v v 64-bit integer + * @v msb If TRUE, reverse the output PN to be in MSB order + * @ret pn 6-byte packet number + * + * The PN is stored in LSB order in the packet header and in MSB order + * in the nonce. WHYYYYY? + */ +static void u64_to_pn ( u64 v, u8 *pn, int msb ) +{ + int i; + u8 *pnp = pn + ( msb ? 5 : 0 ); + int delta = ( msb ? -1 : +1 ); + + for ( i = 0; i < 6; i++ ) { + *pnp = v & 0xFF; + pnp += delta; + v >>= 8; + } +} + +/** Value for @a msb argument of u64_to_pn() for MSB output */ +#define PN_MSB 1 + +/** Value for @a msb argument of u64_to_pn() for LSB output */ +#define PN_LSB 0 + + + +/** + * Initialise CCMP state and install key + * + * @v crypto CCMP cryptosystem structure + * @v key Pointer to 16-byte temporal key to install + * @v keylen Length of key (16 bytes) + * @v rsc Initial receive sequence counter + */ +static int ccmp_init ( struct net80211_crypto *crypto, const void *key, + int keylen, const void *rsc ) +{ + struct ccmp_ctx *ctx = crypto->priv; + + if ( keylen != 16 ) + return -EINVAL; + + if ( rsc ) + ctx->rx_seq = pn_to_u64 ( rsc ); + + cipher_setkey ( &aes_algorithm, ctx->aes_ctx, key, keylen ); + + return 0; +} + + +/** + * Encrypt or decrypt data stream using AES in Counter mode + * + * @v ctx CCMP cryptosystem context + * @v nonce Nonce value, 13 bytes + * @v srcv Data to encrypt or decrypt + * @v len Number of bytes pointed to by @a src + * @v msrcv MIC value to encrypt or decrypt (may be NULL) + * @ret destv Encrypted or decrypted data + * @ret mdestv Encrypted or decrypted MIC value + * + * This assumes CCMP parameters of L=2 and M=8. The algorithm is + * defined in RFC 3610. + */ +static void ccmp_ctr_xor ( struct ccmp_ctx *ctx, const void *nonce, + const void *srcv, void *destv, int len, + const void *msrcv, void *mdestv ) +{ + u8 A[16], S[16]; + u16 ctr; + int i; + const u8 *src = srcv, *msrc = msrcv; + u8 *dest = destv, *mdest = mdestv; + + A[0] = 0x01; /* flags, L' = L - 1 = 1, other bits rsvd */ + memcpy ( A + 1, nonce, CCMP_NONCE_LEN ); + + if ( msrcv ) { + A[14] = A[15] = 0; + + cipher_encrypt ( &aes_algorithm, ctx->aes_ctx, A, S, 16 ); + + for ( i = 0; i < 8; i++ ) { + *mdest++ = *msrc++ ^ S[i]; + } + } + + for ( ctr = 1 ;; ctr++ ) { + A[14] = ctr >> 8; + A[15] = ctr & 0xFF; + + cipher_encrypt ( &aes_algorithm, ctx->aes_ctx, A, S, 16 ); + + for ( i = 0; i < len && i < 16; i++ ) + *dest++ = *src++ ^ S[i]; + + if ( len <= 16 ) + break; /* we're done */ + + len -= 16; + } +} + + +/** + * Advance one block in CBC-MAC calculation + * + * @v aes_ctx AES encryption context with key set + * @v B Cleartext block to incorporate (16 bytes) + * @v X Previous ciphertext block (16 bytes) + * @ret B Clobbered + * @ret X New ciphertext block (16 bytes) + * + * This function does X := E[key] ( X ^ B ). + */ +static void ccmp_feed_cbc_mac ( void *aes_ctx, u8 *B, u8 *X ) +{ + int i; + for ( i = 0; i < 16; i++ ) + B[i] ^= X[i]; + cipher_encrypt ( &aes_algorithm, aes_ctx, B, X, 16 ); +} + + +/** + * Calculate MIC on plaintext data using CBC-MAC + * + * @v ctx CCMP cryptosystem context + * @v nonce Nonce value, 13 bytes + * @v data Data to calculate MIC over + * @v datalen Length of @a data + * @v aad Additional authentication data, for MIC but not encryption + * @ret mic MIC value (unencrypted), 8 bytes + * + * @a aadlen is assumed to be 22 bytes long, as it always is for + * 802.11 use when transmitting non-QoS, not-between-APs frames (the + * only type we deal with). + */ +static void ccmp_cbc_mac ( struct ccmp_ctx *ctx, const void *nonce, + const void *data, u16 datalen, + const void *aad, void *mic ) +{ + u8 X[16], B[16]; + + /* Zeroth block: flags, nonce, length */ + + /* Rsv AAD - M'- - L'- + * 0 1 0 1 1 0 0 1 for an 8-byte MAC and 2-byte message length + */ + B[0] = 0x59; + memcpy ( B + 1, nonce, CCMP_NONCE_LEN ); + B[14] = datalen >> 8; + B[15] = datalen & 0xFF; + + cipher_encrypt ( &aes_algorithm, ctx->aes_ctx, B, X, 16 ); + + /* First block: AAD length field and 14 bytes of AAD */ + B[0] = 0; + B[1] = CCMP_AAD_LEN; + memcpy ( B + 2, aad, 14 ); + + ccmp_feed_cbc_mac ( ctx->aes_ctx, B, X ); + + /* Second block: Remaining 8 bytes of AAD, 8 bytes zero pad */ + memcpy ( B, aad + 14, 8 ); + memset ( B + 8, 0, 8 ); + + ccmp_feed_cbc_mac ( ctx->aes_ctx, B, X ); + + /* Message blocks */ + while ( datalen ) { + if ( datalen >= 16 ) { + memcpy ( B, data, 16 ); + datalen -= 16; + } else { + memcpy ( B, data, datalen ); + memset ( B + datalen, 0, 16 - datalen ); + datalen = 0; + } + + ccmp_feed_cbc_mac ( ctx->aes_ctx, B, X ); + + data += 16; + } + + /* Get MIC from final value of X */ + memcpy ( mic, X, 8 ); +} + + +/** + * Encapsulate and encrypt a packet using CCMP + * + * @v crypto CCMP cryptosystem + * @v iob I/O buffer containing cleartext packet + * @ret eiob I/O buffer containing encrypted packet + */ +struct io_buffer * ccmp_encrypt ( struct net80211_crypto *crypto, + struct io_buffer *iob ) +{ + struct ccmp_ctx *ctx = crypto->priv; + struct ieee80211_frame *hdr = iob->data; + struct io_buffer *eiob; + const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN; + int datalen = iob_len ( iob ) - hdrlen; + struct ccmp_head head; + struct ccmp_nonce nonce; + struct ccmp_aad aad; + u8 mic[8], tx_pn[6]; + void *edata, *emic; + + ctx->tx_seq++; + u64_to_pn ( ctx->tx_seq, tx_pn, PN_LSB ); + + /* Allocate memory */ + eiob = alloc_iob ( iob_len ( iob ) + CCMP_HEAD_LEN + CCMP_MIC_LEN ); + if ( ! eiob ) + return NULL; + + /* Copy frame header */ + memcpy ( iob_put ( eiob, hdrlen ), iob->data, hdrlen ); + hdr = eiob->data; + hdr->fc |= IEEE80211_FC_PROTECTED; + + /* Fill in packet number and extended IV */ + memcpy ( head.pn_lo, tx_pn, 2 ); + memcpy ( head.pn_hi, tx_pn + 2, 4 ); + head.kid = 0x20; /* have Extended IV, key ID 0 */ + head._rsvd = 0; + memcpy ( iob_put ( eiob, sizeof ( head ) ), &head, sizeof ( head ) ); + + /* Form nonce */ + nonce.prio = 0; + memcpy ( nonce.a2, hdr->addr2, ETH_ALEN ); + u64_to_pn ( ctx->tx_seq, nonce.pn, PN_MSB ); + + /* Form additional authentication data */ + aad.fc = hdr->fc & CCMP_AAD_FC_MASK; + memcpy ( aad.a1, hdr->addr1, 3 * ETH_ALEN ); /* all 3 at once */ + aad.seq = hdr->seq & CCMP_AAD_SEQ_MASK; + + /* Calculate MIC over the data */ + ccmp_cbc_mac ( ctx, &nonce, iob->data + hdrlen, datalen, &aad, mic ); + + /* Copy and encrypt data and MIC */ + edata = iob_put ( eiob, datalen ); + emic = iob_put ( eiob, CCMP_MIC_LEN ); + ccmp_ctr_xor ( ctx, &nonce, + iob->data + hdrlen, edata, datalen, + mic, emic ); + + /* Done! */ + DBGC2 ( ctx, "WPA-CCMP %p: encrypted packet %p -> %p\n", ctx, + iob, eiob ); + + return eiob; +} + +/** + * Decrypt a packet using CCMP + * + * @v crypto CCMP cryptosystem + * @v eiob I/O buffer containing encrypted packet + * @ret iob I/O buffer containing cleartext packet + */ +static struct io_buffer * ccmp_decrypt ( struct net80211_crypto *crypto, + struct io_buffer *eiob ) +{ + struct ccmp_ctx *ctx = crypto->priv; + struct ieee80211_frame *hdr; + struct io_buffer *iob; + const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN; + int datalen = iob_len ( eiob ) - hdrlen - CCMP_HEAD_LEN - CCMP_MIC_LEN; + struct ccmp_head *head; + struct ccmp_nonce nonce; + struct ccmp_aad aad; + u8 rx_pn[6], their_mic[8], our_mic[8]; + + iob = alloc_iob ( hdrlen + datalen ); + if ( ! iob ) + return NULL; + + /* Copy frame header */ + memcpy ( iob_put ( iob, hdrlen ), eiob->data, hdrlen ); + hdr = iob->data; + hdr->fc &= ~IEEE80211_FC_PROTECTED; + + /* Check and update RX packet number */ + head = eiob->data + hdrlen; + memcpy ( rx_pn, head->pn_lo, 2 ); + memcpy ( rx_pn + 2, head->pn_hi, 4 ); + + if ( pn_to_u64 ( rx_pn ) <= ctx->rx_seq ) { + DBGC ( ctx, "WPA-CCMP %p: packet received out of order " + "(%012llx <= %012llx)\n", ctx, pn_to_u64 ( rx_pn ), + ctx->rx_seq ); + free_iob ( iob ); + return NULL; + } + + ctx->rx_seq = pn_to_u64 ( rx_pn ); + DBGC2 ( ctx, "WPA-CCMP %p: RX packet number %012llx\n", ctx, ctx->rx_seq ); + + /* Form nonce */ + nonce.prio = 0; + memcpy ( nonce.a2, hdr->addr2, ETH_ALEN ); + u64_to_pn ( ctx->rx_seq, nonce.pn, PN_MSB ); + + /* Form additional authentication data */ + aad.fc = ( hdr->fc & CCMP_AAD_FC_MASK ) | IEEE80211_FC_PROTECTED; + memcpy ( aad.a1, hdr->addr1, 3 * ETH_ALEN ); /* all 3 at once */ + aad.seq = hdr->seq & CCMP_AAD_SEQ_MASK; + + /* Copy-decrypt data and MIC */ + ccmp_ctr_xor ( ctx, &nonce, eiob->data + hdrlen + sizeof ( *head ), + iob_put ( iob, datalen ), datalen, + eiob->tail - CCMP_MIC_LEN, their_mic ); + + /* Check MIC */ + ccmp_cbc_mac ( ctx, &nonce, iob->data + hdrlen, datalen, &aad, + our_mic ); + + if ( memcmp ( their_mic, our_mic, CCMP_MIC_LEN ) != 0 ) { + DBGC2 ( ctx, "WPA-CCMP %p: MIC failure\n", ctx ); + free_iob ( iob ); + return NULL; + } + + DBGC2 ( ctx, "WPA-CCMP %p: decrypted packet %p -> %p\n", ctx, + eiob, iob ); + + return iob; +} + + +/** CCMP cryptosystem */ +struct net80211_crypto ccmp_crypto __net80211_crypto = { + .algorithm = NET80211_CRYPT_CCMP, + .init = ccmp_init, + .encrypt = ccmp_encrypt, + .decrypt = ccmp_decrypt, + .priv_len = sizeof ( struct ccmp_ctx ), +}; + + + + +/** + * Calculate HMAC-SHA1 MIC for EAPOL-Key frame + * + * @v kck Key Confirmation Key, 16 bytes + * @v msg Message to calculate MIC over + * @v len Number of bytes to calculate MIC over + * @ret mic Calculated MIC, 16 bytes long + */ +static void ccmp_kie_mic ( const void *kck, const void *msg, size_t len, + void *mic ) +{ + u8 sha1_ctx[SHA1_CTX_SIZE]; + u8 kckb[16]; + u8 hash[SHA1_SIZE]; + size_t kck_len = 16; + + memcpy ( kckb, kck, kck_len ); + + hmac_init ( &sha1_algorithm, sha1_ctx, kckb, &kck_len ); + hmac_update ( &sha1_algorithm, sha1_ctx, msg, len ); + hmac_final ( &sha1_algorithm, sha1_ctx, kckb, &kck_len, hash ); + + memcpy ( mic, hash, 16 ); +} + +/** + * Decrypt key data in EAPOL-Key frame + * + * @v kek Key Encryption Key, 16 bytes + * @v iv Initialisation vector, 16 bytes (unused) + * @v msg Message to decrypt + * @v len Length of message + * @ret msg Decrypted message in place of original + * @ret len Adjusted downward for 8 bytes of overhead + * @ret rc Return status code + * + * The returned message may still contain padding of 0xDD followed by + * zero or more 0x00 octets. It is impossible to remove the padding + * without parsing the IEs in the packet (another design decision that + * tends to make one question the 802.11i committee's intelligence...) + */ +static int ccmp_kie_decrypt ( const void *kek, const void *iv __unused, + void *msg, u16 *len ) +{ + if ( *len % 8 != 0 ) + return -EINVAL; + + if ( aes_unwrap ( kek, msg, msg, *len / 8 - 1 ) != 0 ) + return -EINVAL; + + *len -= 8; + + return 0; +} + +/** CCMP-style key integrity and encryption handler */ +struct wpa_kie ccmp_kie __wpa_kie = { + .version = EAPOL_KEY_VERSION_WPA2, + .mic = ccmp_kie_mic, + .decrypt = ccmp_kie_decrypt, +}; diff --git a/gpxe/src/net/80211/wpa_psk.c b/gpxe/src/net/80211/wpa_psk.c new file mode 100644 index 00000000..e7521682 --- /dev/null +++ b/gpxe/src/net/80211/wpa_psk.c @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <gpxe/net80211.h> +#include <gpxe/sha1.h> +#include <gpxe/wpa.h> +#include <errno.h> + +/** @file + * + * Frontend for WPA using a pre-shared key. + */ + +/** + * Initialise WPA-PSK state + * + * @v dev 802.11 device + * @ret rc Return status code + */ +static int wpa_psk_init ( struct net80211_device *dev ) +{ + return wpa_make_rsn_ie ( dev, &dev->rsn_ie ); +} + +/** + * Start WPA-PSK authentication + * + * @v dev 802.11 device + * @ret rc Return status code + */ +static int wpa_psk_start ( struct net80211_device *dev ) +{ + char passphrase[64+1]; + u8 pmk[WPA_PMK_LEN]; + int len; + struct wpa_common_ctx *ctx = dev->handshaker->priv; + + len = fetch_string_setting ( netdev_settings ( dev->netdev ), + &net80211_key_setting, passphrase, + 64 + 1 ); + + if ( len <= 0 ) { + DBGC ( ctx, "WPA-PSK %p: no passphrase provided!\n", ctx ); + net80211_deauthenticate ( dev, -EACCES ); + return -EACCES; + } + + pbkdf2_sha1 ( passphrase, len, dev->essid, strlen ( dev->essid ), + 4096, pmk, WPA_PMK_LEN ); + + DBGC ( ctx, "WPA-PSK %p: derived PMK from passphrase `%s':\n", ctx, + passphrase ); + DBGC_HD ( ctx, pmk, WPA_PMK_LEN ); + + return wpa_start ( dev, ctx, pmk, WPA_PMK_LEN ); +} + +/** + * Step WPA-PSK authentication + * + * @v dev 802.11 device + * @ret rc Return status code + */ +static int wpa_psk_step ( struct net80211_device *dev ) +{ + struct wpa_common_ctx *ctx = dev->handshaker->priv; + + switch ( ctx->state ) { + case WPA_SUCCESS: + return 1; + case WPA_FAILURE: + return -EACCES; + default: + return 0; + } +} + +/** + * Do-nothing function; you can't change a WPA key post-authentication + * + * @v dev 802.11 device + * @ret rc Return status code + */ +static int wpa_psk_no_change_key ( struct net80211_device *dev __unused ) +{ + return 0; +} + +/** + * Disable handling of received WPA authentication frames + * + * @v dev 802.11 device + */ +static void wpa_psk_stop ( struct net80211_device *dev ) +{ + wpa_stop ( dev ); +} + +/** WPA-PSK security handshaker */ +struct net80211_handshaker wpa_psk_handshaker __net80211_handshaker = { + .protocol = NET80211_SECPROT_PSK, + .init = wpa_psk_init, + .start = wpa_psk_start, + .step = wpa_psk_step, + .change_key = wpa_psk_no_change_key, + .stop = wpa_psk_stop, + .priv_len = sizeof ( struct wpa_common_ctx ), +}; diff --git a/gpxe/src/net/80211/wpa_tkip.c b/gpxe/src/net/80211/wpa_tkip.c new file mode 100644 index 00000000..0cb697fa --- /dev/null +++ b/gpxe/src/net/80211/wpa_tkip.c @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <gpxe/net80211.h> +#include <gpxe/crypto.h> +#include <gpxe/hmac.h> +#include <gpxe/sha1.h> +#include <gpxe/md5.h> +#include <gpxe/crc32.h> +#include <gpxe/arc4.h> +#include <gpxe/wpa.h> +#include <byteswap.h> +#include <errno.h> + +/** @file + * + * Backend for WPA using the TKIP encryption standard. + */ + +/** Context for one direction of TKIP, either encryption or decryption */ +struct tkip_dir_ctx +{ + /** High 32 bits of last sequence counter value used */ + u32 tsc_hi; + + /** Low 32 bits of last sequence counter value used */ + u16 tsc_lo; + + /** MAC address used to derive TTAK */ + u8 mac[ETH_ALEN]; + + /** If TRUE, TTAK is valid */ + u16 ttak_ok; + + /** TKIP-mixed transmit address and key, depends on tsc_hi and MAC */ + u16 ttak[5]; +}; + +/** Context for TKIP encryption and decryption */ +struct tkip_ctx +{ + /** Temporal key to use */ + struct tkip_tk tk; + + /** State for encryption */ + struct tkip_dir_ctx enc; + + /** State for decryption */ + struct tkip_dir_ctx dec; +}; + +/** Header structure at the beginning of TKIP frame data */ +struct tkip_head +{ + u8 tsc1; /**< High byte of low 16 bits of TSC */ + u8 seed1; /**< Second byte of WEP seed */ + u8 tsc0; /**< Low byte of TSC */ + u8 kid; /**< Key ID and ExtIV byte */ + u32 tsc_hi; /**< High 32 bits of TSC, as an ExtIV */ +} __attribute__ (( packed )); + + +/** TKIP header overhead (IV + KID + ExtIV) */ +#define TKIP_HEAD_LEN 8 + +/** TKIP trailer overhead (MIC + ICV) [assumes unfragmented] */ +#define TKIP_FOOT_LEN 12 + +/** TKIP MIC length */ +#define TKIP_MIC_LEN 8 + +/** TKIP ICV length */ +#define TKIP_ICV_LEN 4 + + +/** TKIP S-box */ +static const u16 Sbox[256] = { + 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154, + 0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A, + 0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B, + 0x41EC, 0xB367, 0x5FFD, 0x45EA, 0x23BF, 0x53F7, 0xE496, 0x9B5B, + 0x75C2, 0xE11C, 0x3DAE, 0x4C6A, 0x6C5A, 0x7E41, 0xF502, 0x834F, + 0x685C, 0x51F4, 0xD134, 0xF908, 0xE293, 0xAB73, 0x6253, 0x2A3F, + 0x080C, 0x9552, 0x4665, 0x9D5E, 0x3028, 0x37A1, 0x0A0F, 0x2FB5, + 0x0E09, 0x2436, 0x1B9B, 0xDF3D, 0xCD26, 0x4E69, 0x7FCD, 0xEA9F, + 0x121B, 0x1D9E, 0x5874, 0x342E, 0x362D, 0xDCB2, 0xB4EE, 0x5BFB, + 0xA4F6, 0x764D, 0xB761, 0x7DCE, 0x527B, 0xDD3E, 0x5E71, 0x1397, + 0xA6F5, 0xB968, 0x0000, 0xC12C, 0x4060, 0xE31F, 0x79C8, 0xB6ED, + 0xD4BE, 0x8D46, 0x67D9, 0x724B, 0x94DE, 0x98D4, 0xB0E8, 0x854A, + 0xBB6B, 0xC52A, 0x4FE5, 0xED16, 0x86C5, 0x9AD7, 0x6655, 0x1194, + 0x8ACF, 0xE910, 0x0406, 0xFE81, 0xA0F0, 0x7844, 0x25BA, 0x4BE3, + 0xA2F3, 0x5DFE, 0x80C0, 0x058A, 0x3FAD, 0x21BC, 0x7048, 0xF104, + 0x63DF, 0x77C1, 0xAF75, 0x4263, 0x2030, 0xE51A, 0xFD0E, 0xBF6D, + 0x814C, 0x1814, 0x2635, 0xC32F, 0xBEE1, 0x35A2, 0x88CC, 0x2E39, + 0x9357, 0x55F2, 0xFC82, 0x7A47, 0xC8AC, 0xBAE7, 0x322B, 0xE695, + 0xC0A0, 0x1998, 0x9ED1, 0xA37F, 0x4466, 0x547E, 0x3BAB, 0x0B83, + 0x8CCA, 0xC729, 0x6BD3, 0x283C, 0xA779, 0xBCE2, 0x161D, 0xAD76, + 0xDB3B, 0x6456, 0x744E, 0x141E, 0x92DB, 0x0C0A, 0x486C, 0xB8E4, + 0x9F5D, 0xBD6E, 0x43EF, 0xC4A6, 0x39A8, 0x31A4, 0xD337, 0xF28B, + 0xD532, 0x8B43, 0x6E59, 0xDAB7, 0x018C, 0xB164, 0x9CD2, 0x49E0, + 0xD8B4, 0xACFA, 0xF307, 0xCF25, 0xCAAF, 0xF48E, 0x47E9, 0x1018, + 0x6FD5, 0xF088, 0x4A6F, 0x5C72, 0x3824, 0x57F1, 0x73C7, 0x9751, + 0xCB23, 0xA17C, 0xE89C, 0x3E21, 0x96DD, 0x61DC, 0x0D86, 0x0F85, + 0xE090, 0x7C42, 0x71C4, 0xCCAA, 0x90D8, 0x0605, 0xF701, 0x1C12, + 0xC2A3, 0x6A5F, 0xAEF9, 0x69D0, 0x1791, 0x9958, 0x3A27, 0x27B9, + 0xD938, 0xEB13, 0x2BB3, 0x2233, 0xD2BB, 0xA970, 0x0789, 0x33A7, + 0x2DB6, 0x3C22, 0x1592, 0xC920, 0x8749, 0xAAFF, 0x5078, 0xA57A, + 0x038F, 0x59F8, 0x0980, 0x1A17, 0x65DA, 0xD731, 0x84C6, 0xD0B8, + 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A, +}; + +/** + * Perform S-box mapping on a 16-bit value + * + * @v v Value to perform S-box mapping on + * @ret Sv S-box mapped value + */ +static inline u16 S ( u16 v ) +{ + return Sbox[v & 0xFF] ^ swap16 ( Sbox[v >> 8] ); +} + +/** + * Rotate 16-bit value right + * + * @v v Value to rotate + * @v bits Number of bits to rotate by + * @ret rotv Rotated value + */ +static inline u16 ror16 ( u16 v, int bits ) +{ + return ( v >> bits ) | ( v << ( 16 - bits ) ); +} + +/** + * Rotate 32-bit value right + * + * @v v Value to rotate + * @v bits Number of bits to rotate by + * @ret rotv Rotated value + */ +static inline u32 ror32 ( u32 v, int bits ) +{ + return ( v >> bits ) | ( v << ( 32 - bits ) ); +} + +/** + * Rotate 32-bit value left + * + * @v v Value to rotate + * @v bits Number of bits to rotate by + * @ret rotv Rotated value + */ +static inline u32 rol32 ( u32 v, int bits ) +{ + return ( v << bits ) | ( v >> ( 32 - bits ) ); +} + + +/** + * Initialise TKIP state and install key + * + * @v crypto TKIP cryptosystem structure + * @v key Pointer to tkip_tk to install + * @v keylen Length of key (32 bytes) + * @v rsc Initial receive sequence counter + */ +static int tkip_init ( struct net80211_crypto *crypto, const void *key, + int keylen, const void *rsc ) +{ + struct tkip_ctx *ctx = crypto->priv; + const u8 *rscb = rsc; + + if ( keylen != sizeof ( ctx->tk ) ) + return -EINVAL; + + if ( rscb ) { + ctx->dec.tsc_lo = ( rscb[1] << 8 ) | rscb[0]; + ctx->dec.tsc_hi = ( ( rscb[5] << 24 ) | ( rscb[4] << 16 ) | + ( rscb[3] << 8 ) | rscb[2] ); + } + + memcpy ( &ctx->tk, key, sizeof ( ctx->tk ) ); + + return 0; +} + +/** + * Perform TKIP key mixing, phase 1 + * + * @v dctx TKIP directional context + * @v tk TKIP temporal key + * @v mac MAC address of transmitter + * + * This recomputes the TTAK in @a dctx if necessary, and sets + * @c dctx->ttak_ok. + */ +static void tkip_mix_1 ( struct tkip_dir_ctx *dctx, struct tkip_tk *tk, u8 *mac ) +{ + int i, j; + + if ( dctx->ttak_ok && ! memcmp ( mac, dctx->mac, ETH_ALEN ) ) + return; + + memcpy ( dctx->mac, mac, ETH_ALEN ); + + dctx->ttak[0] = dctx->tsc_hi & 0xFFFF; + dctx->ttak[1] = dctx->tsc_hi >> 16; + dctx->ttak[2] = ( mac[1] << 8 ) | mac[0]; + dctx->ttak[3] = ( mac[3] << 8 ) | mac[2]; + dctx->ttak[4] = ( mac[5] << 8 ) | mac[4]; + + for ( i = 0; i < 8; i++ ) { + j = 2 * ( i & 1 ); + + dctx->ttak[0] += S ( dctx->ttak[4] ^ ( ( tk->key[1 + j] << 8 ) | + tk->key[0 + j] ) ); + dctx->ttak[1] += S ( dctx->ttak[0] ^ ( ( tk->key[5 + j] << 8 ) | + tk->key[4 + j] ) ); + dctx->ttak[2] += S ( dctx->ttak[1] ^ ( ( tk->key[9 + j] << 8 ) | + tk->key[8 + j] ) ); + dctx->ttak[3] += S ( dctx->ttak[2] ^ ( ( tk->key[13+ j] << 8 ) | + tk->key[12+ j] ) ); + dctx->ttak[4] += S ( dctx->ttak[3] ^ ( ( tk->key[1 + j] << 8 ) | + tk->key[0 + j] ) ) + i; + } + + dctx->ttak_ok = 1; +} + +/** + * Perform TKIP key mixing, phase 2 + * + * @v dctx TKIP directional context + * @v tk TKIP temporal key + * @ret key ARC4 key, 16 bytes long + */ +static void tkip_mix_2 ( struct tkip_dir_ctx *dctx, struct tkip_tk *tk, + void *key ) +{ + u8 *kb = key; + u16 ppk[6]; + int i; + + memcpy ( ppk, dctx->ttak, sizeof ( dctx->ttak ) ); + ppk[5] = dctx->ttak[4] + dctx->tsc_lo; + + ppk[0] += S ( ppk[5] ^ ( ( tk->key[1] << 8 ) | tk->key[0] ) ); + ppk[1] += S ( ppk[0] ^ ( ( tk->key[3] << 8 ) | tk->key[2] ) ); + ppk[2] += S ( ppk[1] ^ ( ( tk->key[5] << 8 ) | tk->key[4] ) ); + ppk[3] += S ( ppk[2] ^ ( ( tk->key[7] << 8 ) | tk->key[6] ) ); + ppk[4] += S ( ppk[3] ^ ( ( tk->key[9] << 8 ) | tk->key[8] ) ); + ppk[5] += S ( ppk[4] ^ ( ( tk->key[11] << 8 ) | tk->key[10] ) ); + + ppk[0] += ror16 ( ppk[5] ^ ( ( tk->key[13] << 8 ) | tk->key[12] ), 1 ); + ppk[1] += ror16 ( ppk[0] ^ ( ( tk->key[15] << 8 ) | tk->key[14] ), 1 ); + ppk[2] += ror16 ( ppk[1], 1 ); + ppk[3] += ror16 ( ppk[2], 1 ); + ppk[4] += ror16 ( ppk[3], 1 ); + ppk[5] += ror16 ( ppk[4], 1 ); + + kb[0] = dctx->tsc_lo >> 8; + kb[1] = ( ( dctx->tsc_lo >> 8 ) | 0x20 ) & 0x7F; + kb[2] = dctx->tsc_lo & 0xFF; + kb[3] = ( ( ppk[5] ^ ( ( tk->key[1] << 8 ) | tk->key[0] ) ) >> 1 ) + & 0xFF; + + for ( i = 0; i < 6; i++ ) { + kb[4 + 2*i] = ppk[i] & 0xFF; + kb[5 + 2*i] = ppk[i] >> 8; + } +} + +/** + * Update Michael message integrity code based on next 32-bit word of data + * + * @v V Michael code state (two 32-bit words) + * @v word Next 32-bit word of data + */ +static void tkip_feed_michael ( u32 *V, u32 word ) +{ + V[0] ^= word; + V[1] ^= rol32 ( V[0], 17 ); + V[0] += V[1]; + V[1] ^= ( ( V[0] & 0xFF00FF00 ) >> 8 ) | ( ( V[0] & 0x00FF00FF ) << 8 ); + V[0] += V[1]; + V[1] ^= rol32 ( V[0], 3 ); + V[0] += V[1]; + V[1] ^= ror32 ( V[0], 2 ); + V[0] += V[1]; +} + +/** + * Calculate Michael message integrity code + * + * @v key MIC key to use (8 bytes) + * @v da Destination link-layer address + * @v sa Source link-layer address + * @v data Start of data to calculate over + * @v len Length of header + data + * @ret mic Calculated Michael MIC (8 bytes) + */ +static void tkip_michael ( const void *key, const void *da, const void *sa, + const void *data, size_t len, void *mic ) +{ + u32 V[2]; /* V[0] = "l", V[1] = "r" in 802.11 */ + union { + u8 byte[12]; + u32 word[3]; + } cap; + const u8 *ptr = data; + const u8 *end = ptr + len; + int i; + + memcpy ( V, key, sizeof ( V ) ); + V[0] = le32_to_cpu ( V[0] ); + V[1] = le32_to_cpu ( V[1] ); + + /* Feed in header (we assume non-QoS, so Priority = 0) */ + memcpy ( &cap.byte[0], da, ETH_ALEN ); + memcpy ( &cap.byte[6], sa, ETH_ALEN ); + tkip_feed_michael ( V, le32_to_cpu ( cap.word[0] ) ); + tkip_feed_michael ( V, le32_to_cpu ( cap.word[1] ) ); + tkip_feed_michael ( V, le32_to_cpu ( cap.word[2] ) ); + tkip_feed_michael ( V, 0 ); + + /* Feed in data */ + while ( ptr + 4 <= end ) { + tkip_feed_michael ( V, le32_to_cpu ( *( u32 * ) ptr ) ); + ptr += 4; + } + + /* Add unaligned part and padding */ + for ( i = 0; ptr < end; i++ ) + cap.byte[i] = *ptr++; + cap.byte[i++] = 0x5a; + for ( ; i < 8; i++ ) + cap.byte[i] = 0; + + /* Feed in padding */ + tkip_feed_michael ( V, le32_to_cpu ( cap.word[0] ) ); + tkip_feed_michael ( V, le32_to_cpu ( cap.word[1] ) ); + + /* Output MIC */ + V[0] = cpu_to_le32 ( V[0] ); + V[1] = cpu_to_le32 ( V[1] ); + memcpy ( mic, V, sizeof ( V ) ); +} + +/** + * Encrypt a packet using TKIP + * + * @v crypto TKIP cryptosystem + * @v iob I/O buffer containing cleartext packet + * @ret eiob I/O buffer containing encrypted packet + */ +static struct io_buffer * tkip_encrypt ( struct net80211_crypto *crypto, + struct io_buffer *iob ) +{ + struct tkip_ctx *ctx = crypto->priv; + struct ieee80211_frame *hdr = iob->data; + struct io_buffer *eiob; + struct arc4_ctx arc4; + u8 key[16]; + struct tkip_head head; + u8 mic[8]; + u32 icv; + const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN; + int datalen = iob_len ( iob ) - hdrlen; + + ctx->enc.tsc_lo++; + if ( ctx->enc.tsc_lo == 0 ) { + ctx->enc.tsc_hi++; + ctx->enc.ttak_ok = 0; + } + + tkip_mix_1 ( &ctx->enc, &ctx->tk, hdr->addr2 ); + tkip_mix_2 ( &ctx->enc, &ctx->tk, key ); + + eiob = alloc_iob ( iob_len ( iob ) + TKIP_HEAD_LEN + TKIP_FOOT_LEN ); + if ( ! eiob ) + return NULL; + + /* Copy frame header */ + memcpy ( iob_put ( eiob, hdrlen ), iob->data, hdrlen ); + hdr = eiob->data; + hdr->fc |= IEEE80211_FC_PROTECTED; + + /* Fill in IV and key ID byte, and extended IV */ + memcpy ( &head, key, 3 ); + head.kid = 0x20; /* have Extended IV, key ID 0 */ + head.tsc_hi = cpu_to_le32 ( ctx->enc.tsc_hi ); + memcpy ( iob_put ( eiob, sizeof ( head ) ), &head, sizeof ( head ) ); + + /* Copy and encrypt the data */ + cipher_setkey ( &arc4_algorithm, &arc4, key, 16 ); + cipher_encrypt ( &arc4_algorithm, &arc4, iob->data + hdrlen, + iob_put ( eiob, datalen ), datalen ); + + /* Add MIC */ + hdr = iob->data; + tkip_michael ( &ctx->tk.mic.tx, hdr->addr3, hdr->addr2, + iob->data + hdrlen, datalen, mic ); + cipher_encrypt ( &arc4_algorithm, &arc4, mic, + iob_put ( eiob, sizeof ( mic ) ), sizeof ( mic ) ); + + /* Add ICV */ + icv = crc32_le ( ~0, iob->data + hdrlen, datalen ); + icv = crc32_le ( icv, mic, sizeof ( mic ) ); + icv = cpu_to_le32 ( ~icv ); + cipher_encrypt ( &arc4_algorithm, &arc4, &icv, + iob_put ( eiob, TKIP_ICV_LEN ), TKIP_ICV_LEN ); + + DBGC2 ( ctx, "WPA-TKIP %p: encrypted packet %p -> %p\n", ctx, + iob, eiob ); + + return eiob; +} + +/** + * Decrypt a packet using TKIP + * + * @v crypto TKIP cryptosystem + * @v eiob I/O buffer containing encrypted packet + * @ret iob I/O buffer containing cleartext packet + */ +static struct io_buffer * tkip_decrypt ( struct net80211_crypto *crypto, + struct io_buffer *eiob ) +{ + struct tkip_ctx *ctx = crypto->priv; + struct ieee80211_frame *hdr; + struct io_buffer *iob; + const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN; + int datalen = iob_len ( eiob ) - hdrlen - TKIP_HEAD_LEN - TKIP_FOOT_LEN; + struct tkip_head *head; + struct arc4_ctx arc4; + u16 rx_tsc_lo; + u8 key[16]; + u8 mic[8]; + u32 icv, crc; + + iob = alloc_iob ( hdrlen + datalen + TKIP_FOOT_LEN ); + if ( ! iob ) + return NULL; + + /* Copy frame header */ + memcpy ( iob_put ( iob, hdrlen ), eiob->data, hdrlen ); + hdr = iob->data; + hdr->fc &= ~IEEE80211_FC_PROTECTED; + + /* Check and update TSC */ + head = eiob->data + hdrlen; + rx_tsc_lo = ( head->tsc1 << 8 ) | head->tsc0; + + if ( head->tsc_hi < ctx->dec.tsc_hi || + ( head->tsc_hi == ctx->dec.tsc_hi && + rx_tsc_lo <= ctx->dec.tsc_lo ) ) { + DBGC ( ctx, "WPA-TKIP %p: packet received out of order " + "(%08x:%04x <= %08x:%04x)\n", ctx, head->tsc_hi, + rx_tsc_lo, ctx->dec.tsc_hi, ctx->dec.tsc_lo ); + free_iob ( iob ); + return NULL; + } + ctx->dec.tsc_lo = rx_tsc_lo; + if ( ctx->dec.tsc_hi != head->tsc_hi ) { + ctx->dec.ttak_ok = 0; + ctx->dec.tsc_hi = head->tsc_hi; + } + + /* Calculate key */ + tkip_mix_1 ( &ctx->dec, &ctx->tk, hdr->addr2 ); + tkip_mix_2 ( &ctx->dec, &ctx->tk, key ); + + /* Copy-decrypt data, MIC, ICV */ + cipher_setkey ( &arc4_algorithm, &arc4, key, 16 ); + cipher_decrypt ( &arc4_algorithm, &arc4, + eiob->data + hdrlen + TKIP_HEAD_LEN, + iob_put ( iob, datalen ), datalen + TKIP_FOOT_LEN ); + + /* Check ICV */ + icv = le32_to_cpu ( *( u32 * ) ( iob->tail + TKIP_MIC_LEN ) ); + crc = ~crc32_le ( ~0, iob->data + hdrlen, datalen + TKIP_MIC_LEN ); + if ( crc != icv ) { + DBGC ( ctx, "WPA-TKIP %p CRC mismatch: expect %08x, get %08x\n", + ctx, icv, crc ); + free_iob ( iob ); + return NULL; + } + + /* Check MIC */ + tkip_michael ( &ctx->tk.mic.rx, hdr->addr1, hdr->addr3, + iob->data + hdrlen, datalen, mic ); + if ( memcmp ( mic, iob->tail, TKIP_MIC_LEN ) != 0 ) { + DBGC ( ctx, "WPA-TKIP %p ALERT! MIC failure\n", ctx ); + /* XXX we should do the countermeasures here */ + free_iob ( iob ); + return NULL; + } + + DBGC2 ( ctx, "WPA-TKIP %p: decrypted packet %p -> %p\n", ctx, + eiob, iob ); + + return iob; +} + +/** TKIP cryptosystem */ +struct net80211_crypto tkip_crypto __net80211_crypto = { + .algorithm = NET80211_CRYPT_TKIP, + .init = tkip_init, + .encrypt = tkip_encrypt, + .decrypt = tkip_decrypt, + .priv_len = sizeof ( struct tkip_ctx ), +}; + + + + +/** + * Calculate HMAC-MD5 MIC for EAPOL-Key frame + * + * @v kck Key Confirmation Key, 16 bytes + * @v msg Message to calculate MIC over + * @v len Number of bytes to calculate MIC over + * @ret mic Calculated MIC, 16 bytes long + */ +static void tkip_kie_mic ( const void *kck, const void *msg, size_t len, + void *mic ) +{ + struct md5_ctx md5; + u8 kckb[16]; + size_t kck_len = 16; + + memcpy ( kckb, kck, kck_len ); + + hmac_init ( &md5_algorithm, &md5, kckb, &kck_len ); + hmac_update ( &md5_algorithm, &md5, msg, len ); + hmac_final ( &md5_algorithm, &md5, kckb, &kck_len, mic ); +} + +/** + * Decrypt key data in EAPOL-Key frame + * + * @v kek Key Encryption Key, 16 bytes + * @v iv Initialisation vector, 16 bytes + * @v msg Message to decrypt + * @v len Length of message + * @ret msg Decrypted message in place of original + * @ret len Unchanged + * @ret rc Always 0 for success + */ +static int tkip_kie_decrypt ( const void *kek, const void *iv, + void *msg, u16 *len ) +{ + u8 key[32]; + memcpy ( key, iv, 16 ); + memcpy ( key + 16, kek, 16 ); + + arc4_skip ( key, 32, 256, msg, msg, *len ); + + return 0; +} + + +/** TKIP-style key integrity and encryption handler */ +struct wpa_kie tkip_kie __wpa_kie = { + .version = EAPOL_KEY_VERSION_WPA, + .mic = tkip_kie_mic, + .decrypt = tkip_kie_decrypt, +}; diff --git a/gpxe/src/net/aoe.c b/gpxe/src/net/aoe.c index 08887fe0..839a875b 100644 --- a/gpxe/src/net/aoe.c +++ b/gpxe/src/net/aoe.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stddef.h> #include <string.h> #include <stdio.h> @@ -66,6 +68,7 @@ static void aoe_done ( struct aoe_session *aoe, int rc ) { /* Record overall command status */ if ( aoe->command ) { aoe->command->cb.cmd_stat = aoe->status; + aoe->command->rc = rc; aoe->command = NULL; } @@ -354,7 +357,6 @@ static int aoe_command ( struct ata_device *ata, struct ata_command *command ) { struct aoe_session *aoe = container_of ( ata->backend, struct aoe_session, refcnt ); - int rc; aoe->command = command; aoe->status = 0; @@ -363,15 +365,9 @@ static int aoe_command ( struct ata_device *ata, aoe_send_command ( aoe ); - aoe->rc = -EINPROGRESS; - while ( aoe->rc == -EINPROGRESS ) - step(); - rc = aoe->rc; - - return rc; + return 0; } - /** * Issue AoE config query for AoE target discovery * @@ -444,8 +440,7 @@ int aoe_attach ( struct ata_device *ata, struct net_device *netdev, return -ENOMEM; aoe->refcnt.free = aoe_free; aoe->netdev = netdev_get ( netdev ); - memcpy ( aoe->target, ethernet_protocol.ll_broadcast, - sizeof ( aoe->target ) ); + memcpy ( aoe->target, netdev->ll_broadcast, sizeof ( aoe->target ) ); aoe->tag = AOE_TAG_MAGIC; aoe->timer.expired = aoe_timer_expired; diff --git a/gpxe/src/net/arp.c b/gpxe/src/net/arp.c index ba9ebf48..124a856e 100644 --- a/gpxe/src/net/arp.c +++ b/gpxe/src/net/arp.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stdint.h> #include <string.h> #include <byteswap.h> @@ -36,12 +38,6 @@ * */ -/** Registered ARP protocols */ -static struct arp_net_protocol arp_net_protocols[0] - __table_start ( struct arp_net_protocol, arp_net_protocols ); -static struct arp_net_protocol arp_net_protocols_end[0] - __table_end ( struct arp_net_protocol, arp_net_protocols ); - /** An ARP cache entry */ struct arp_entry { /** Network-layer protocol */ @@ -160,7 +156,7 @@ int arp_resolve ( struct net_device *netdev, struct net_protocol *net_protocol, /* Transmit ARP request */ if ( ( rc = net_tx ( iobuf, netdev, &arp_protocol, - ll_protocol->ll_broadcast ) ) != 0 ) + netdev->ll_broadcast ) ) != 0 ) return rc; return -ENOENT; @@ -176,8 +172,7 @@ int arp_resolve ( struct net_device *netdev, struct net_protocol *net_protocol, static struct arp_net_protocol * arp_find_protocol ( uint16_t net_proto ) { struct arp_net_protocol *arp_net_protocol; - for ( arp_net_protocol = arp_net_protocols ; - arp_net_protocol < arp_net_protocols_end ; arp_net_protocol++ ) { + for_each_table_entry ( arp_net_protocol, ARP_NET_PROTOCOLS ) { if ( arp_net_protocol->net_protocol->net_proto == net_proto ) { return arp_net_protocol; } diff --git a/gpxe/src/net/cachedhcp.c b/gpxe/src/net/cachedhcp.c new file mode 100644 index 00000000..37f344b6 --- /dev/null +++ b/gpxe/src/net/cachedhcp.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <gpxe/dhcp.h> +#include <gpxe/dhcppkt.h> +#include <gpxe/netdevice.h> +#include <gpxe/iobuf.h> +#include <gpxe/uaccess.h> + +/** @file + * + * Cached DHCP packet handling + * + */ + +/** + * Store cached DHCPACK packet + * + * @v data User pointer to cached DHCP packet data + * @v len Length of cached DHCP packet data + * @ret rc Return status code + * + * This function should be called by the architecture-specific + * get_cached_dhcpack() handler. + */ +void store_cached_dhcpack ( userptr_t data, size_t len ) { + struct dhcp_packet *dhcppkt; + struct dhcphdr *dhcphdr; + struct settings *parent; + int rc; + + /* Create DHCP packet */ + dhcppkt = zalloc ( sizeof ( *dhcppkt ) + len ); + if ( ! dhcppkt ) + return; + + /* Fill in data for DHCP packet */ + dhcphdr = ( ( ( void * ) dhcppkt ) + sizeof ( * dhcppkt ) ); + copy_from_user ( dhcphdr, data, 0, len ); + dhcppkt_init ( dhcppkt, dhcphdr, len ); + DBG_HD ( dhcppkt->options.data, dhcppkt->options.len ); + + /* Register settings on the last opened network device. + * This will have the effect of registering cached settings + * with a network device when "dhcp netX" is performed for that + * device, which is usually what we want. + */ + parent = netdev_settings ( last_opened_netdev() ); + if ( ( rc = register_settings ( &dhcppkt->settings, parent ) ) != 0 ) + DBG ( "DHCP could not register cached settings: %s\n", + strerror ( rc ) ); + + dhcppkt_put ( dhcppkt ); + + DBG ( "DHCP registered cached settings\n" ); +} diff --git a/gpxe/src/net/dhcpopts.c b/gpxe/src/net/dhcpopts.c index 1898011a..6482c627 100644 --- a/gpxe/src/net/dhcpopts.c +++ b/gpxe/src/net/dhcpopts.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stdint.h> #include <stdlib.h> #include <stdio.h> @@ -101,7 +103,7 @@ static unsigned int dhcp_option_len ( struct dhcp_option *option ) { * DHCP option block. Encapsulated options may be searched for by * using DHCP_ENCAP_OPT() to construct the tag value. * - * If the option is encapsulated, and @c encapsulator is non-NULL, it + * If the option is encapsulated, and @c encap_offset is non-NULL, it * will be filled in with the offset of the encapsulating option. * * This routine is designed to be paranoid. It does not assume that @@ -134,8 +136,15 @@ static int find_dhcp_option_with_encap ( struct dhcp_options *options, if ( remaining < 0 ) break; /* Check for explicit end marker */ - if ( option->tag == DHCP_END ) - break; + if ( option->tag == DHCP_END ) { + if ( tag == DHCP_END ) + /* Special case where the caller is interested + * in whether we have this marker or not. + */ + return offset; + else + break; + } /* Check for matching tag */ if ( option->tag == tag ) { DBGC ( options, "DHCPOPT %p found %s (length %d)\n", @@ -254,7 +263,7 @@ static int set_dhcp_option ( struct dhcp_options *options, unsigned int tag, static const uint8_t empty_encapsulator[] = { DHCP_END }; int offset; int encap_offset = -1; - int creation_offset = 0; + int creation_offset; struct dhcp_option *option; unsigned int encap_tag = DHCP_ENCAPSULATOR ( tag ); size_t old_len = 0; @@ -265,6 +274,10 @@ static int set_dhcp_option ( struct dhcp_options *options, unsigned int tag, if ( tag == DHCP_PAD ) return -ENOTTY; + creation_offset = find_dhcp_option_with_encap ( options, DHCP_END, + NULL ); + if ( creation_offset < 0 ) + creation_offset = options->len; /* Find old instance of this option, if any */ offset = find_dhcp_option_with_encap ( options, tag, &encap_offset ); if ( offset >= 0 ) { diff --git a/gpxe/src/net/dhcppkt.c b/gpxe/src/net/dhcppkt.c index 1f2d373c..20a0e666 100644 --- a/gpxe/src/net/dhcppkt.c +++ b/gpxe/src/net/dhcppkt.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stdint.h> #include <stdlib.h> #include <stdio.h> @@ -155,6 +157,8 @@ int dhcppkt_store ( struct dhcp_packet *dhcppkt, unsigned int tag, memset ( field_data, 0, field->len ); memcpy ( dhcp_packet_field ( dhcppkt->dhcphdr, field ), data, len ); + /* Erase any equivalent option from the options block */ + dhcpopt_store ( &dhcppkt->options, tag, NULL, 0 ); return 0; } @@ -181,14 +185,16 @@ int dhcppkt_fetch ( struct dhcp_packet *dhcppkt, unsigned int tag, void *data, size_t len ) { struct dhcp_packet_field *field; void *field_data; - size_t field_len; + size_t field_len = 0; - /* If this is a special field, return it */ + /* Identify special field, if any */ if ( ( field = find_dhcp_packet_field ( tag ) ) != NULL ) { field_data = dhcp_packet_field ( dhcppkt->dhcphdr, field ); field_len = field->used_len ( field_data, field->len ); - if ( ! field_len ) - return -ENOENT; + } + + /* Return special field, if it exists and is populated */ + if ( field_len ) { if ( len > field_len ) len = field_len; memcpy ( data, field_data, len ); diff --git a/gpxe/src/net/eapol.c b/gpxe/src/net/eapol.c new file mode 100644 index 00000000..507c8ce2 --- /dev/null +++ b/gpxe/src/net/eapol.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** @file + * + * 802.1X Extensible Authentication Protocol over LANs demultiplexer + * + */ + +#include <gpxe/netdevice.h> +#include <gpxe/iobuf.h> +#include <gpxe/if_ether.h> +#include <gpxe/eapol.h> +#include <errno.h> +#include <byteswap.h> + +/** + * Receive EAPOL network-layer packet + * + * @v iob I/O buffer + * @v netdev Network device + * @v ll_source Link-layer source address + * + * This function takes ownership of the I/O buffer passed to it. + */ +static int eapol_rx ( struct io_buffer *iob, struct net_device *netdev, + const void *ll_source ) +{ + struct eapol_frame *eapol = iob->data; + struct eapol_handler *handler; + + if ( iob_len ( iob ) < EAPOL_HDR_LEN ) { + free_iob ( iob ); + return -EINVAL; + } + + for_each_table_entry ( handler, EAPOL_HANDLERS ) { + if ( handler->type == eapol->type ) { + iob_pull ( iob, EAPOL_HDR_LEN ); + return handler->rx ( iob, netdev, ll_source ); + } + } + + free_iob ( iob ); + return -( ENOTSUP | ( ( eapol->type & 0x1f ) << 8 ) ); +} + +/** + * Transcribe EAPOL network-layer address + * + * @v net_addr Network-layer address + * @ret str String representation of network-layer address + * + * EAPOL doesn't have network-layer addresses, so we just return the + * string @c "<EAPOL>". + */ +static const char * eapol_ntoa ( const void *net_addr __unused ) +{ + return "<EAPOL>"; +} + +/** EAPOL network protocol */ +struct net_protocol eapol_protocol __net_protocol = { + .name = "EAPOL", + .rx = eapol_rx, + .ntoa = eapol_ntoa, + .net_proto = htons ( ETH_P_EAPOL ), +}; diff --git a/gpxe/src/net/ethernet.c b/gpxe/src/net/ethernet.c index b16135a9..79ed1dc6 100644 --- a/gpxe/src/net/ethernet.c +++ b/gpxe/src/net/ethernet.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stdint.h> #include <stdio.h> #include <string.h> @@ -41,13 +43,15 @@ static uint8_t eth_broadcast[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; /** * Add Ethernet link-layer header * + * @v netdev Network device * @v iobuf I/O buffer * @v ll_dest Link-layer destination address * @v ll_source Source link-layer address * @v net_proto Network-layer protocol, in network-byte order * @ret rc Return status code */ -static int eth_push ( struct io_buffer *iobuf, const void *ll_dest, +static int eth_push ( struct net_device *netdev __unused, + struct io_buffer *iobuf, const void *ll_dest, const void *ll_source, uint16_t net_proto ) { struct ethhdr *ethhdr = iob_push ( iobuf, sizeof ( *ethhdr ) ); @@ -62,13 +66,15 @@ static int eth_push ( struct io_buffer *iobuf, const void *ll_dest, /** * Remove Ethernet link-layer header * + * @v netdev Network device * @v iobuf I/O buffer * @ret ll_dest Link-layer destination address * @ret ll_source Source link-layer address * @ret net_proto Network-layer protocol, in network-byte order * @ret rc Return status code */ -static int eth_pull ( struct io_buffer *iobuf, const void **ll_dest, +static int eth_pull ( struct net_device *netdev __unused, + struct io_buffer *iobuf, const void **ll_dest, const void **ll_source, uint16_t *net_proto ) { struct ethhdr *ethhdr = iobuf->data; @@ -91,6 +97,16 @@ static int eth_pull ( struct io_buffer *iobuf, const void **ll_dest, } /** + * Initialise Ethernet address + * + * @v hw_addr Hardware address + * @v ll_addr Link-layer address + */ +void eth_init_addr ( const void *hw_addr, void *ll_addr ) { + memcpy ( ll_addr, hw_addr, ETH_ALEN ); +} + +/** * Transcribe Ethernet address * * @v ll_addr Link-layer address @@ -114,8 +130,7 @@ const char * eth_ntoa ( const void *ll_addr ) { * @v ll_addr Link-layer address to fill in * @ret rc Return status code */ -static int eth_mc_hash ( unsigned int af, const void *net_addr, - void *ll_addr ) { +int eth_mc_hash ( unsigned int af, const void *net_addr, void *ll_addr ) { const uint8_t *net_addr_bytes = net_addr; uint8_t *ll_addr_bytes = ll_addr; @@ -133,15 +148,46 @@ static int eth_mc_hash ( unsigned int af, const void *net_addr, } } +/** + * Generate Ethernet-compatible compressed link-layer address + * + * @v ll_addr Link-layer address + * @v eth_addr Ethernet-compatible address to fill in + */ +int eth_eth_addr ( const void *ll_addr, void *eth_addr ) { + memcpy ( eth_addr, ll_addr, ETH_ALEN ); + return 0; +} + /** Ethernet protocol */ struct ll_protocol ethernet_protocol __ll_protocol = { .name = "Ethernet", .ll_proto = htons ( ARPHRD_ETHER ), + .hw_addr_len = ETH_ALEN, .ll_addr_len = ETH_ALEN, .ll_header_len = ETH_HLEN, - .ll_broadcast = eth_broadcast, .push = eth_push, .pull = eth_pull, + .init_addr = eth_init_addr, .ntoa = eth_ntoa, .mc_hash = eth_mc_hash, + .eth_addr = eth_eth_addr, }; + +/** + * Allocate Ethernet device + * + * @v priv_size Size of driver private data + * @ret netdev Network device, or NULL + */ +struct net_device * alloc_etherdev ( size_t priv_size ) { + struct net_device *netdev; + + netdev = alloc_netdev ( priv_size ); + if ( netdev ) { + netdev->ll_protocol = ðernet_protocol; + netdev->ll_broadcast = eth_broadcast; + netdev->max_pkt_len = ETH_FRAME_LEN; + } + return netdev; +} diff --git a/gpxe/src/net/fakedhcp.c b/gpxe/src/net/fakedhcp.c index 0518789c..ad3f046f 100644 --- a/gpxe/src/net/fakedhcp.c +++ b/gpxe/src/net/fakedhcp.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stdint.h> #include <stdlib.h> #include <stdio.h> diff --git a/gpxe/src/net/icmp.c b/gpxe/src/net/icmp.c index 3e45c1f6..749c3454 100644 --- a/gpxe/src/net/icmp.c +++ b/gpxe/src/net/icmp.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <string.h> #include <errno.h> #include <gpxe/iobuf.h> diff --git a/gpxe/src/net/infiniband.c b/gpxe/src/net/infiniband.c index d79bdc2c..d7813249 100644 --- a/gpxe/src/net/infiniband.c +++ b/gpxe/src/net/infiniband.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stdint.h> #include <stdlib.h> #include <stdio.h> @@ -25,12 +27,15 @@ #include <errno.h> #include <assert.h> #include <gpxe/list.h> +#include <gpxe/errortab.h> #include <gpxe/if_arp.h> #include <gpxe/netdevice.h> #include <gpxe/iobuf.h> #include <gpxe/ipoib.h> #include <gpxe/process.h> #include <gpxe/infiniband.h> +#include <gpxe/ib_mi.h> +#include <gpxe/ib_sma.h> /** @file * @@ -41,6 +46,26 @@ /** List of Infiniband devices */ struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices ); +/** List of open Infiniband devices, in reverse order of opening */ +static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices ); + +/* Disambiguate the various possible EINPROGRESSes */ +#define EINPROGRESS_INIT ( EINPROGRESS | EUNIQ_01 ) +#define EINPROGRESS_ARMED ( EINPROGRESS | EUNIQ_02 ) + +/** Human-readable message for the link statuses */ +struct errortab infiniband_errors[] __errortab = { + { EINPROGRESS_INIT, "Initialising" }, + { EINPROGRESS_ARMED, "Armed" }, +}; + +/*************************************************************************** + * + * Completion queues + * + *************************************************************************** + */ + /** * Create completion queue * @@ -61,6 +86,8 @@ ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes, cq = zalloc ( sizeof ( *cq ) ); if ( ! cq ) goto err_alloc_cq; + cq->ibdev = ibdev; + list_add ( &cq->list, &ibdev->cqs ); cq->num_cqes = num_cqes; INIT_LIST_HEAD ( &cq->work_queues ); cq->op = op; @@ -79,6 +106,7 @@ ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes, ibdev->op->destroy_cq ( ibdev, cq ); err_dev_create_cq: + list_del ( &cq->list ); free ( cq ); err_alloc_cq: return NULL; @@ -96,26 +124,57 @@ void ib_destroy_cq ( struct ib_device *ibdev, ibdev, cq->cqn ); assert ( list_empty ( &cq->work_queues ) ); ibdev->op->destroy_cq ( ibdev, cq ); + list_del ( &cq->list ); free ( cq ); } /** + * Poll completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ +void ib_poll_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + struct ib_work_queue *wq; + + /* Poll completion queue */ + ibdev->op->poll_cq ( ibdev, cq ); + + /* Refill receive work queues */ + list_for_each_entry ( wq, &cq->work_queues, list ) { + if ( ! wq->is_send ) + ib_refill_recv ( ibdev, wq->qp ); + } +} + +/*************************************************************************** + * + * Work queues + * + *************************************************************************** + */ + +/** * Create queue pair * * @v ibdev Infiniband device + * @v type Queue pair type * @v num_send_wqes Number of send work queue entries * @v send_cq Send completion queue * @v num_recv_wqes Number of receive work queue entries * @v recv_cq Receive completion queue - * @v qkey Queue key * @ret qp Queue pair + * + * The queue pair will be left in the INIT state; you must call + * ib_modify_qp() before it is ready to use for sending and receiving. */ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, + enum ib_queue_pair_type type, unsigned int num_send_wqes, struct ib_completion_queue *send_cq, unsigned int num_recv_wqes, - struct ib_completion_queue *recv_cq, - unsigned long qkey ) { + struct ib_completion_queue *recv_cq ) { struct ib_queue_pair *qp; size_t total_size; int rc; @@ -131,16 +190,18 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, goto err_alloc_qp; qp->ibdev = ibdev; list_add ( &qp->list, &ibdev->qps ); - qp->qkey = qkey; + qp->type = type; qp->send.qp = qp; qp->send.is_send = 1; qp->send.cq = send_cq; list_add ( &qp->send.list, &send_cq->work_queues ); + qp->send.psn = ( random() & 0xffffffUL ); qp->send.num_wqes = num_send_wqes; qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) ); qp->recv.qp = qp; qp->recv.cq = recv_cq; list_add ( &qp->recv.list, &recv_cq->work_queues ); + qp->recv.psn = ( random() & 0xffffffUL ); qp->recv.num_wqes = num_recv_wqes; qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) )); @@ -152,7 +213,6 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, "%s\n", ibdev, strerror ( rc ) ); goto err_dev_create_qp; } - DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n", ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn ); DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n", @@ -161,6 +221,24 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n", ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs, ( ( ( void * ) qp ) + total_size ) ); + + /* Calculate externally-visible QPN */ + switch ( type ) { + case IB_QPT_SMI: + qp->ext_qpn = IB_QPN_SMI; + break; + case IB_QPT_GSI: + qp->ext_qpn = IB_QPN_GSI; + break; + default: + qp->ext_qpn = qp->qpn; + break; + } + if ( qp->ext_qpn != qp->qpn ) { + DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n", + ibdev, qp->qpn, qp->ext_qpn ); + } + return qp; ibdev->op->destroy_qp ( ibdev, qp ); @@ -178,20 +256,15 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, * * @v ibdev Infiniband device * @v qp Queue pair - * @v mod_list Modification list - * @v qkey New queue key, if applicable + * @v av New address vector, if applicable * @ret rc Return status code */ -int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp, - unsigned long mod_list, unsigned long qkey ) { +int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { int rc; DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn ); - if ( mod_list & IB_MODIFY_QKEY ) - qp->qkey = qkey; - - if ( ( rc = ibdev->op->modify_qp ( ibdev, qp, mod_list ) ) != 0 ) { + if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) { DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n", ibdev, qp->qpn, strerror ( rc ) ); return rc; @@ -251,7 +324,7 @@ struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev, struct ib_queue_pair *qp; list_for_each_entry ( qp, &ibdev->qps, list ) { - if ( qp->qpn == qpn ) + if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) ) return qp; } return NULL; @@ -311,6 +384,7 @@ struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_address_vector *av, struct io_buffer *iobuf ) { + struct ib_address_vector av_copy; int rc; /* Check queue fill level */ @@ -320,6 +394,20 @@ int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, return -ENOBUFS; } + /* Use default address vector if none specified */ + if ( ! av ) + av = &qp->av; + + /* Make modifiable copy of address vector */ + memcpy ( &av_copy, av, sizeof ( av_copy ) ); + av = &av_copy; + + /* Fill in optional parameters in address vector */ + if ( ! av->qkey ) + av->qkey = qp->qkey; + if ( ! av->rate ) + av->rate = IB_RATE_2_5; + /* Post to hardware */ if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) { DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: " @@ -343,6 +431,13 @@ int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct io_buffer *iobuf ) { int rc; + /* Check packet length */ + if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) { + DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n", + ibdev, qp->qpn, iob_tailroom ( iobuf ) ); + return -EINVAL; + } + /* Check queue fill level */ if ( qp->recv.fill >= qp->recv.num_wqes ) { DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n", @@ -371,7 +466,12 @@ int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, */ void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct io_buffer *iobuf, int rc ) { - qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc ); + + if ( qp->send.cq->op->complete_send ) { + qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc ); + } else { + free_iob ( iobuf ); + } qp->send.fill--; } @@ -387,11 +487,54 @@ void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_address_vector *av, struct io_buffer *iobuf, int rc ) { - qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc ); + + if ( qp->recv.cq->op->complete_recv ) { + qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc ); + } else { + free_iob ( iobuf ); + } qp->recv.fill--; } /** + * Refill receive work queue + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ +void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { + struct io_buffer *iobuf; + int rc; + + /* Keep filling while unfilled entries remain */ + while ( qp->recv.fill < qp->recv.num_wqes ) { + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE ); + if ( ! iobuf ) { + /* Non-fatal; we will refill on next attempt */ + return; + } + + /* Post I/O buffer */ + if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not refill: %s\n", + ibdev, strerror ( rc ) ); + free_iob ( iobuf ); + /* Give up */ + return; + } + } +} + +/*************************************************************************** + * + * Link control + * + *************************************************************************** + */ + +/** * Open port * * @v ibdev Infiniband device @@ -400,16 +543,59 @@ void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, int ib_open ( struct ib_device *ibdev ) { int rc; - /* Open device if this is the first requested opening */ - if ( ibdev->open_count == 0 ) { - if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) - return rc; + /* Increment device open request counter */ + if ( ibdev->open_count++ > 0 ) { + /* Device was already open; do nothing */ + return 0; } - /* Increment device open request counter */ - ibdev->open_count++; + /* Create subnet management interface */ + ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI ); + if ( ! ibdev->smi ) { + DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev ); + rc = -ENOMEM; + goto err_create_smi; + } + /* Create subnet management agent */ + if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n", + ibdev, strerror ( rc ) ); + goto err_create_sma; + } + + /* Create general services interface */ + ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI ); + if ( ! ibdev->gsi ) { + DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev ); + rc = -ENOMEM; + goto err_create_gsi; + } + + /* Open device */ + if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not open: %s\n", + ibdev, strerror ( rc ) ); + goto err_open; + } + + /* Add to head of open devices list */ + list_add ( &ibdev->open_list, &open_ib_devices ); + + assert ( ibdev->open_count == 1 ); return 0; + + ibdev->op->close ( ibdev ); + err_open: + ib_destroy_mi ( ibdev, ibdev->gsi ); + err_create_gsi: + ib_destroy_sma ( ibdev, ibdev->smi ); + err_create_sma: + ib_destroy_mi ( ibdev, ibdev->smi ); + err_create_smi: + assert ( ibdev->open_count == 1 ); + ibdev->open_count = 0; + return rc; } /** @@ -423,10 +609,38 @@ void ib_close ( struct ib_device *ibdev ) { ibdev->open_count--; /* Close device if this was the last remaining requested opening */ - if ( ibdev->open_count == 0 ) + if ( ibdev->open_count == 0 ) { + list_del ( &ibdev->open_list ); + ib_destroy_mi ( ibdev, ibdev->gsi ); + ib_destroy_sma ( ibdev, ibdev->smi ); + ib_destroy_mi ( ibdev, ibdev->smi ); ibdev->op->close ( ibdev ); + } +} + +/** + * Get link state + * + * @v ibdev Infiniband device + * @ret rc Link status code + */ +int ib_link_rc ( struct ib_device *ibdev ) { + switch ( ibdev->port_state ) { + case IB_PORT_STATE_DOWN: return -ENOTCONN; + case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT; + case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED; + case IB_PORT_STATE_ACTIVE: return 0; + default: return -EINVAL; + } } +/*************************************************************************** + * + * Multicast + * + *************************************************************************** + */ + /** * Attach to multicast group * @@ -434,6 +648,10 @@ void ib_close ( struct ib_device *ibdev ) { * @v qp Queue pair * @v gid Multicast GID * @ret rc Return status code + * + * Note that this function handles only the local device's attachment + * to the multicast GID; it does not issue the relevant MADs to join + * the multicast group on the subnet. */ int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_gid *gid ) { @@ -486,6 +704,89 @@ void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp, } } +/*************************************************************************** + * + * Miscellaneous + * + *************************************************************************** + */ + +/** + * Get Infiniband HCA information + * + * @v ibdev Infiniband device + * @ret hca_guid HCA GUID + * @ret num_ports Number of ports + */ +int ib_get_hca_info ( struct ib_device *ibdev, + struct ib_gid_half *hca_guid ) { + struct ib_device *tmp; + int num_ports = 0; + + /* Search for IB devices with the same physical device to + * identify port count and a suitable Node GUID. + */ + for_each_ibdev ( tmp ) { + if ( tmp->dev != ibdev->dev ) + continue; + if ( num_ports == 0 ) { + memcpy ( hca_guid, &tmp->gid.u.half[1], + sizeof ( *hca_guid ) ); + } + num_ports++; + } + return num_ports; +} + +/** + * Set port information + * + * @v ibdev Infiniband device + * @v mad Set port information MAD + */ +int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) { + int rc; + + /* Adapters with embedded SMAs do not need to support this method */ + if ( ! ibdev->op->set_port_info ) { + DBGC ( ibdev, "IBDEV %p does not support setting port " + "information\n", ibdev ); + return -ENOTSUP; + } + + if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not set port information: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + + return 0; +}; + +/** + * Set partition key table + * + * @v ibdev Infiniband device + * @v mad Set partition key table MAD + */ +int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) { + int rc; + + /* Adapters with embedded SMAs do not need to support this method */ + if ( ! ibdev->op->set_pkey_table ) { + DBGC ( ibdev, "IBDEV %p does not support setting partition " + "key table\n", ibdev ); + return -ENOTSUP; + } + + if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not set partition key table: " + "%s\n", ibdev, strerror ( rc ) ); + return rc; + } + + return 0; +}; /*************************************************************************** * @@ -506,6 +807,22 @@ void ib_link_state_changed ( struct ib_device *ibdev ) { } /** + * Poll event queue + * + * @v ibdev Infiniband device + */ +void ib_poll_eq ( struct ib_device *ibdev ) { + struct ib_completion_queue *cq; + + /* Poll device's event queue */ + ibdev->op->poll_eq ( ibdev ); + + /* Poll all completion queues */ + list_for_each_entry ( cq, &ibdev->cqs, list ) + ib_poll_cq ( ibdev, cq ); +} + +/** * Single-step the Infiniband event queue * * @v process Infiniband event queue process @@ -513,13 +830,13 @@ void ib_link_state_changed ( struct ib_device *ibdev ) { static void ib_step ( struct process *process __unused ) { struct ib_device *ibdev; - list_for_each_entry ( ibdev, &ib_devices, list ) { - ibdev->op->poll_eq ( ibdev ); - } + for_each_ibdev ( ibdev ) + ib_poll_eq ( ibdev ); } /** Infiniband event queue process */ struct process ib_process __permanent_process = { + .list = LIST_HEAD_INIT ( ib_process.list ), .step = ib_step, }; @@ -546,9 +863,11 @@ struct ib_device * alloc_ibdev ( size_t priv_size ) { if ( ibdev ) { drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) ); ib_set_drvdata ( ibdev, drv_priv ); + INIT_LIST_HEAD ( &ibdev->cqs ); INIT_LIST_HEAD ( &ibdev->qps ); + ibdev->port_state = IB_PORT_STATE_DOWN; ibdev->lid = IB_LID_NONE; - ibdev->pkey = IB_PKEY_NONE; + ibdev->pkey = IB_PKEY_DEFAULT; } return ibdev; } @@ -598,3 +917,35 @@ void unregister_ibdev ( struct ib_device *ibdev ) { ibdev_put ( ibdev ); DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev ); } + +/** + * Find Infiniband device by GID + * + * @v gid GID + * @ret ibdev Infiniband device, or NULL + */ +struct ib_device * find_ibdev ( struct ib_gid *gid ) { + struct ib_device *ibdev; + + for_each_ibdev ( ibdev ) { + if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 ) + return ibdev; + } + return NULL; +} + +/** + * Get most recently opened Infiniband device + * + * @ret ibdev Most recently opened Infiniband device, or NULL + */ +struct ib_device * last_opened_ibdev ( void ) { + struct ib_device *ibdev; + + list_for_each_entry ( ibdev, &open_ib_devices, open_list ) { + assert ( ibdev->open_count != 0 ); + return ibdev; + } + + return NULL; +} diff --git a/gpxe/src/net/infiniband/ib_cm.c b/gpxe/src/net/infiniband/ib_cm.c new file mode 100644 index 00000000..ebe65b33 --- /dev/null +++ b/gpxe/src/net/infiniband/ib_cm.c @@ -0,0 +1,413 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <assert.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_mi.h> +#include <gpxe/ib_pathrec.h> +#include <gpxe/ib_cm.h> + +/** + * @file + * + * Infiniband communication management + * + */ + +/** List of connections */ +static LIST_HEAD ( ib_cm_conns ); + +/** + * Send "ready to use" response + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v conn Connection + * @v av Address vector + * @ret rc Return status code + */ +static int ib_cm_send_rtu ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + struct ib_connection *conn, + struct ib_address_vector *av ) { + union ib_mad mad; + struct ib_cm_ready_to_use *ready = + &mad.cm.cm_data.ready_to_use; + int rc; + + /* Construct "ready to use" response */ + memset ( &mad, 0, sizeof ( mad ) ); + mad.hdr.mgmt_class = IB_MGMT_CLASS_CM; + mad.hdr.class_version = IB_CM_CLASS_VERSION; + mad.hdr.method = IB_MGMT_METHOD_SEND; + mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE ); + ready->local_id = htonl ( conn->local_id ); + ready->remote_id = htonl ( conn->remote_id ); + if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){ + DBGC ( conn, "CM %p could not send RTU: %s\n", + conn, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Handle duplicate connection replies + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + * @ret rc Return status code + * + * If a "ready to use" MAD is lost, the peer may resend the connection + * reply. We have to respond to these with duplicate "ready to use" + * MADs, otherwise the peer may time out and drop the connection. + */ +static void ib_cm_connect_rep ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_cm_connect_reply *connect_rep = + &mad->cm.cm_data.connect_reply; + struct ib_connection *conn; + int rc; + + /* Identify connection */ + list_for_each_entry ( conn, &ib_cm_conns, list ) { + if ( ntohl ( connect_rep->remote_id ) != conn->local_id ) + continue; + /* Try to send "ready to use" reply */ + if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) { + /* Ignore errors */ + return; + } + return; + } + + DBG ( "CM unidentified connection %08x\n", + ntohl ( connect_rep->remote_id ) ); +} + +/** Communication management agents */ +struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = { + { + .mgmt_class = IB_MGMT_CLASS_CM, + .class_version = IB_CM_CLASS_VERSION, + .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ), + .handle = ib_cm_connect_rep, + }, +}; + +/** + * Convert connection rejection reason to return status code + * + * @v reason Rejection reason (in network byte order) + * @ret rc Return status code + */ +static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) { + switch ( reason ) { + case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) : + return -ENODEV; + case htons ( IB_CM_REJECT_STALE_CONN ) : + return -EALREADY; + case htons ( IB_CM_REJECT_CONSUMER ) : + return -ENOTTY; + default: + return -EPERM; + } +} + +/** + * Handle connection request transaction completion + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + * @v rc Status code + * @v mad Received MAD (or NULL on error) + * @v av Source address vector (or NULL on error) + */ +static void ib_cm_req_complete ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + struct ib_mad_transaction *madx, + int rc, union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_connection *conn = ib_madx_get_ownerdata ( madx ); + struct ib_queue_pair *qp = conn->qp; + struct ib_cm_common *common = &mad->cm.cm_data.common; + struct ib_cm_connect_reply *connect_rep = + &mad->cm.cm_data.connect_reply; + struct ib_cm_connect_reject *connect_rej = + &mad->cm.cm_data.connect_reject; + void *private_data = NULL; + size_t private_data_len = 0; + + /* Report failures */ + if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) )) + rc = -EIO; + if ( rc != 0 ) { + DBGC ( conn, "CM %p connection request failed: %s\n", + conn, strerror ( rc ) ); + goto out; + } + + /* Record remote communication ID */ + conn->remote_id = ntohl ( common->local_id ); + + /* Handle response */ + switch ( mad->hdr.attr_id ) { + + case htons ( IB_CM_ATTR_CONNECT_REPLY ) : + /* Extract fields */ + qp->av.qpn = ( ntohl ( connect_rep->local_qpn ) >> 8 ); + qp->send.psn = ( ntohl ( connect_rep->starting_psn ) >> 8 ); + private_data = &connect_rep->private_data; + private_data_len = sizeof ( connect_rep->private_data ); + DBGC ( conn, "CM %p connected to QPN %lx PSN %x\n", + conn, qp->av.qpn, qp->send.psn ); + + /* Modify queue pair */ + if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( conn, "CM %p could not modify queue pair: %s\n", + conn, strerror ( rc ) ); + goto out; + } + + /* Send "ready to use" reply */ + if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) { + /* Treat as non-fatal */ + rc = 0; + } + break; + + case htons ( IB_CM_ATTR_CONNECT_REJECT ) : + /* Extract fields */ + DBGC ( conn, "CM %p connection rejected (reason %d)\n", + conn, ntohs ( connect_rej->reason ) ); + /* Private data is valid only for a Consumer Reject */ + if ( connect_rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) { + private_data = &connect_rej->private_data; + private_data_len = sizeof (connect_rej->private_data); + } + rc = ib_cm_rejection_reason_to_rc ( connect_rej->reason ); + break; + + default: + DBGC ( conn, "CM %p unexpected response (attribute %04x)\n", + conn, ntohs ( mad->hdr.attr_id ) ); + rc = -ENOTSUP; + break; + } + + out: + /* Destroy the completed transaction */ + ib_destroy_madx ( ibdev, ibdev->gsi, madx ); + conn->madx = NULL; + + /* Hand off to the upper completion handler */ + conn->op->changed ( ibdev, qp, conn, rc, private_data, + private_data_len ); +} + +/** Connection request operations */ +static struct ib_mad_transaction_operations ib_cm_req_op = { + .complete = ib_cm_req_complete, +}; + +/** + * Handle connection path transaction completion + * + * @v ibdev Infiniband device + * @v path Path + * @v rc Status code + * @v av Address vector, or NULL on error + */ +static void ib_cm_path_complete ( struct ib_device *ibdev, + struct ib_path *path, int rc, + struct ib_address_vector *av ) { + struct ib_connection *conn = ib_path_get_ownerdata ( path ); + struct ib_queue_pair *qp = conn->qp; + union ib_mad mad; + struct ib_cm_connect_request *connect_req = + &mad.cm.cm_data.connect_request; + size_t private_data_len; + + /* Report failures */ + if ( rc != 0 ) { + DBGC ( conn, "CM %p path lookup failed: %s\n", + conn, strerror ( rc ) ); + conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 ); + goto out; + } + + /* Update queue pair peer path */ + memcpy ( &qp->av, av, sizeof ( qp->av ) ); + + /* Construct connection request */ + memset ( &mad, 0, sizeof ( mad ) ); + mad.hdr.mgmt_class = IB_MGMT_CLASS_CM; + mad.hdr.class_version = IB_CM_CLASS_VERSION; + mad.hdr.method = IB_MGMT_METHOD_SEND; + mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST ); + connect_req->local_id = htonl ( conn->local_id ); + memcpy ( &connect_req->service_id, &conn->service_id, + sizeof ( connect_req->service_id ) ); + ib_get_hca_info ( ibdev, &connect_req->local_ca ); + connect_req->local_qpn__responder_resources = + htonl ( ( qp->qpn << 8 ) | 1 ); + connect_req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 ); + connect_req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl = + htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) | + ( 0 << 0 ) ); + connect_req->starting_psn__local_timeout__retry_count = + htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) | + ( 0x07 << 0 ) ); + connect_req->pkey = htons ( ibdev->pkey ); + connect_req->payload_mtu__rdc_exists__rnr_retry = + ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) ); + connect_req->max_cm_retries__srq = + ( ( 0x0f << 4 ) | ( 0 << 3 ) ); + connect_req->primary.local_lid = htons ( ibdev->lid ); + connect_req->primary.remote_lid = htons ( conn->qp->av.lid ); + memcpy ( &connect_req->primary.local_gid, &ibdev->gid, + sizeof ( connect_req->primary.local_gid ) ); + memcpy ( &connect_req->primary.remote_gid, &conn->qp->av.gid, + sizeof ( connect_req->primary.remote_gid ) ); + connect_req->primary.flow_label__rate = + htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) ); + connect_req->primary.hop_limit = 0; + connect_req->primary.sl__subnet_local = + ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) ); + connect_req->primary.local_ack_timeout = ( 0x13 << 3 ); + private_data_len = conn->private_data_len; + if ( private_data_len > sizeof ( connect_req->private_data ) ) + private_data_len = sizeof ( connect_req->private_data ); + memcpy ( &connect_req->private_data, &conn->private_data, + private_data_len ); + + /* Create connection request */ + av->qpn = IB_QPN_GSI; + av->qkey = IB_QKEY_GSI; + conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av, + &ib_cm_req_op ); + if ( ! conn->madx ) { + DBGC ( conn, "CM %p could not create connection request\n", + conn ); + conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 ); + goto out; + } + ib_madx_set_ownerdata ( conn->madx, conn ); + + out: + /* Destroy the completed transaction */ + ib_destroy_path ( ibdev, path ); + conn->path = NULL; +} + +/** Connection path operations */ +static struct ib_path_operations ib_cm_path_op = { + .complete = ib_cm_path_complete, +}; + +/** + * Create connection to remote QP + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v dgid Target GID + * @v service_id Target service ID + * @v private_data Connection request private data + * @v private_data_len Length of connection request private data + * @v op Connection operations + * @ret conn Connection + */ +struct ib_connection * +ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *dgid, struct ib_gid_half *service_id, + void *private_data, size_t private_data_len, + struct ib_connection_operations *op ) { + struct ib_connection *conn; + + /* Allocate and initialise request */ + conn = zalloc ( sizeof ( *conn ) + private_data_len ); + if ( ! conn ) + goto err_alloc_conn; + conn->ibdev = ibdev; + conn->qp = qp; + memset ( &qp->av, 0, sizeof ( qp->av ) ); + qp->av.gid_present = 1; + memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) ); + conn->local_id = random(); + memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) ); + conn->op = op; + conn->private_data_len = private_data_len; + memcpy ( &conn->private_data, private_data, private_data_len ); + + /* Create path */ + conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op ); + if ( ! conn->path ) + goto err_create_path; + ib_path_set_ownerdata ( conn->path, conn ); + + /* Add to list of connections */ + list_add ( &conn->list, &ib_cm_conns ); + + DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n", + conn, ibdev, qp->qpn ); + DBGC ( conn, "CM %p connecting to %08x:%08x:%08x:%08x %08x:%08x\n", + conn, ntohl ( dgid->u.dwords[0] ), ntohl ( dgid->u.dwords[1] ), + ntohl ( dgid->u.dwords[2] ), ntohl ( dgid->u.dwords[3] ), + ntohl ( service_id->u.dwords[0] ), + ntohl ( service_id->u.dwords[1] ) ); + + return conn; + + ib_destroy_path ( ibdev, conn->path ); + err_create_path: + free ( conn ); + err_alloc_conn: + return NULL; +} + +/** + * Destroy connection to remote QP + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v conn Connection + */ +void ib_destroy_conn ( struct ib_device *ibdev, + struct ib_queue_pair *qp __unused, + struct ib_connection *conn ) { + + list_del ( &conn->list ); + if ( conn->madx ) + ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx ); + if ( conn->path ) + ib_destroy_path ( ibdev, conn->path ); + free ( conn ); +} diff --git a/gpxe/src/net/infiniband/ib_cmrc.c b/gpxe/src/net/infiniband/ib_cmrc.c new file mode 100644 index 00000000..2d648115 --- /dev/null +++ b/gpxe/src/net/infiniband/ib_cmrc.c @@ -0,0 +1,436 @@ +/* + * Copyright (C) 2009 Fen Systems Ltd <mbrown@fensystems.co.uk>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +FILE_LICENCE ( BSD2 ); + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <gpxe/iobuf.h> +#include <gpxe/xfer.h> +#include <gpxe/process.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_cm.h> +#include <gpxe/ib_cmrc.h> + +/** + * @file + * + * Infiniband Communication-managed Reliable Connections + * + */ + +/** CMRC number of send WQEs + * + * This is a policy decision. + */ +#define IB_CMRC_NUM_SEND_WQES 4 + +/** CMRC number of receive WQEs + * + * This is a policy decision. + */ +#define IB_CMRC_NUM_RECV_WQES 2 + +/** CMRC number of completion queue entries + * + * This is a policy decision + */ +#define IB_CMRC_NUM_CQES 8 + +/** An Infiniband Communication-Managed Reliable Connection */ +struct ib_cmrc_connection { + /** Reference count */ + struct refcnt refcnt; + /** Data transfer interface */ + struct xfer_interface xfer; + /** Infiniband device */ + struct ib_device *ibdev; + /** Completion queue */ + struct ib_completion_queue *cq; + /** Queue pair */ + struct ib_queue_pair *qp; + /** Connection */ + struct ib_connection *conn; + /** Destination GID */ + struct ib_gid dgid; + /** Service ID */ + struct ib_gid_half service_id; + /** QP is connected */ + int connected; + /** Shutdown process */ + struct process shutdown; +}; + +/** + * Shut down CMRC connection gracefully + * + * @v process Process + * + * The Infiniband data structures are not reference-counted or + * guarded. It is therefore unsafe to shut them down while we may be + * in the middle of a callback from the Infiniband stack (e.g. in a + * receive completion handler). + * + * This shutdown process will run some time after the call to + * ib_cmrc_close(), after control has returned out of the Infiniband + * core, and will shut down the Infiniband interfaces cleanly. + * + * The shutdown process holds an implicit reference on the CMRC + * connection, ensuring that the structure is not freed before the + * shutdown process has run. + */ +static void ib_cmrc_shutdown ( struct process *process ) { + struct ib_cmrc_connection *cmrc = + container_of ( process, struct ib_cmrc_connection, shutdown ); + + DBGC ( cmrc, "CMRC %p shutting down\n", cmrc ); + + /* Shut down Infiniband interface */ + ib_destroy_conn ( cmrc->ibdev, cmrc->qp, cmrc->conn ); + ib_destroy_qp ( cmrc->ibdev, cmrc->qp ); + ib_destroy_cq ( cmrc->ibdev, cmrc->cq ); + ib_close ( cmrc->ibdev ); + + /* Remove process from run queue */ + process_del ( &cmrc->shutdown ); + + /* Drop the remaining reference */ + ref_put ( &cmrc->refcnt ); +} + +/** + * Close CMRC connection + * + * @v cmrc Communication-Managed Reliable Connection + * @v rc Reason for close + */ +static void ib_cmrc_close ( struct ib_cmrc_connection *cmrc, int rc ) { + + /* Close data transfer interface */ + xfer_nullify ( &cmrc->xfer ); + xfer_close ( &cmrc->xfer, rc ); + + /* Schedule shutdown process */ + process_add ( &cmrc->shutdown ); +} + +/** + * Handle change of CMRC connection status + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v conn Connection + * @v rc_cm Connection status code + * @v private_data Private data, if available + * @v private_data_len Length of private data + */ +static void ib_cmrc_changed ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_connection *conn __unused, int rc_cm, + void *private_data, size_t private_data_len ) { + struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp ); + int rc_xfer; + + /* Record connection status */ + if ( rc_cm == 0 ) { + DBGC ( cmrc, "CMRC %p connected\n", cmrc ); + cmrc->connected = 1; + } else { + DBGC ( cmrc, "CMRC %p disconnected: %s\n", + cmrc, strerror ( rc_cm ) ); + cmrc->connected = 0; + } + + /* Pass up any private data */ + DBGC2 ( cmrc, "CMRC %p received private data:\n", cmrc ); + DBGC2_HDA ( cmrc, 0, private_data, private_data_len ); + if ( private_data && + ( rc_xfer = xfer_deliver_raw ( &cmrc->xfer, private_data, + private_data_len ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not deliver private data: %s\n", + cmrc, strerror ( rc_xfer ) ); + ib_cmrc_close ( cmrc, rc_xfer ); + return; + } + + /* If we are disconnected, close the upper connection */ + if ( rc_cm != 0 ) { + ib_cmrc_close ( cmrc, rc_cm ); + return; + } +} + +/** CMRC connection operations */ +static struct ib_connection_operations ib_cmrc_conn_op = { + .changed = ib_cmrc_changed, +}; + +/** + * Handle CMRC send completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @v rc Completion status code + */ +static void ib_cmrc_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct io_buffer *iobuf, int rc ) { + struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp ); + + /* Free the completed I/O buffer */ + free_iob ( iobuf ); + + /* Close the connection on any send errors */ + if ( rc != 0 ) { + DBGC ( cmrc, "CMRC %p send error: %s\n", + cmrc, strerror ( rc ) ); + ib_cmrc_close ( cmrc, rc ); + return; + } +} + +/** + * Handle CMRC receive completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v av Address vector, or NULL + * @v iobuf I/O buffer + * @v rc Completion status code + */ +static void ib_cmrc_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_address_vector *av __unused, + struct io_buffer *iobuf, int rc ) { + struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp ); + + /* Close the connection on any receive errors */ + if ( rc != 0 ) { + DBGC ( cmrc, "CMRC %p receive error: %s\n", + cmrc, strerror ( rc ) ); + free_iob ( iobuf ); + ib_cmrc_close ( cmrc, rc ); + return; + } + + DBGC2 ( cmrc, "CMRC %p received:\n", cmrc ); + DBGC2_HDA ( cmrc, 0, iobuf->data, iob_len ( iobuf ) ); + + /* Pass up data */ + if ( ( rc = xfer_deliver_iob ( &cmrc->xfer, iobuf ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not deliver data: %s\n", + cmrc, strerror ( rc ) ); + ib_cmrc_close ( cmrc, rc ); + return; + } +} + +/** Infiniband CMRC completion operations */ +static struct ib_completion_queue_operations ib_cmrc_completion_ops = { + .complete_send = ib_cmrc_complete_send, + .complete_recv = ib_cmrc_complete_recv, +}; + +/** + * Send data via CMRC + * + * @v xfer Data transfer interface + * @v iobuf Datagram I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int ib_cmrc_xfer_deliver_iob ( struct xfer_interface *xfer, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + struct ib_cmrc_connection *cmrc = + container_of ( xfer, struct ib_cmrc_connection, xfer ); + int rc; + + /* If no connection has yet been attempted, send this datagram + * as the CM REQ private data. Otherwise, send it via the QP. + */ + if ( ! cmrc->connected ) { + + /* Abort if we have already sent a CM connection request */ + if ( cmrc->conn ) { + DBGC ( cmrc, "CMRC %p attempt to send before " + "connection is complete\n", cmrc ); + rc = -EIO; + goto out; + } + + /* Send via CM connection request */ + cmrc->conn = ib_create_conn ( cmrc->ibdev, cmrc->qp, + &cmrc->dgid, &cmrc->service_id, + iobuf->data, iob_len ( iobuf ), + &ib_cmrc_conn_op ); + if ( ! cmrc->conn ) { + DBGC ( cmrc, "CMRC %p could not connect\n", cmrc ); + rc = -ENOMEM; + goto out; + } + + } else { + + /* Send via QP */ + if ( ( rc = ib_post_send ( cmrc->ibdev, cmrc->qp, NULL, + iob_disown ( iobuf ) ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not send: %s\n", + cmrc, strerror ( rc ) ); + goto out; + } + + } + return 0; + + out: + /* Free the I/O buffer if necessary */ + free_iob ( iobuf ); + + /* Close the connection on any errors */ + if ( rc != 0 ) + ib_cmrc_close ( cmrc, rc ); + + return rc; +} + +/** + * Check CMRC flow control window + * + * @v xfer Data transfer interface + * @ret len Length of window + */ +static size_t ib_cmrc_xfer_window ( struct xfer_interface *xfer ) { + struct ib_cmrc_connection *cmrc = + container_of ( xfer, struct ib_cmrc_connection, xfer ); + + /* We indicate a window only when we are successfully + * connected. + */ + return ( cmrc->connected ? IB_MAX_PAYLOAD_SIZE : 0 ); +} + +/** + * Close CMRC data-transfer interface + * + * @v xfer Data transfer interface + * @v rc Reason for close + */ +static void ib_cmrc_xfer_close ( struct xfer_interface *xfer, int rc ) { + struct ib_cmrc_connection *cmrc = + container_of ( xfer, struct ib_cmrc_connection, xfer ); + + DBGC ( cmrc, "CMRC %p closed: %s\n", cmrc, strerror ( rc ) ); + ib_cmrc_close ( cmrc, rc ); +} + +/** CMRC data transfer interface operations */ +static struct xfer_interface_operations ib_cmrc_xfer_operations = { + .close = ib_cmrc_xfer_close, + .vredirect = ignore_xfer_vredirect, + .window = ib_cmrc_xfer_window, + .alloc_iob = default_xfer_alloc_iob, + .deliver_iob = ib_cmrc_xfer_deliver_iob, + .deliver_raw = xfer_deliver_as_iob, +}; + +/** + * Open CMRC connection + * + * @v xfer Data transfer interface + * @v ibdev Infiniband device + * @v dgid Destination GID + * @v service_id Service ID + * @ret rc Returns status code + */ +int ib_cmrc_open ( struct xfer_interface *xfer, struct ib_device *ibdev, + struct ib_gid *dgid, struct ib_gid_half *service_id ) { + struct ib_cmrc_connection *cmrc; + int rc; + + /* Allocate and initialise structure */ + cmrc = zalloc ( sizeof ( *cmrc ) ); + if ( ! cmrc ) { + rc = -ENOMEM; + goto err_alloc; + } + xfer_init ( &cmrc->xfer, &ib_cmrc_xfer_operations, &cmrc->refcnt ); + cmrc->ibdev = ibdev; + memcpy ( &cmrc->dgid, dgid, sizeof ( cmrc->dgid ) ); + memcpy ( &cmrc->service_id, service_id, sizeof ( cmrc->service_id ) ); + process_init_stopped ( &cmrc->shutdown, ib_cmrc_shutdown, + &cmrc->refcnt ); + + /* Open Infiniband device */ + if ( ( rc = ib_open ( ibdev ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not open device: %s\n", + cmrc, strerror ( rc ) ); + goto err_open; + } + + /* Create completion queue */ + cmrc->cq = ib_create_cq ( ibdev, IB_CMRC_NUM_CQES, + &ib_cmrc_completion_ops ); + if ( ! cmrc->cq ) { + DBGC ( cmrc, "CMRC %p could not create completion queue\n", + cmrc ); + rc = -ENOMEM; + goto err_create_cq; + } + + /* Create queue pair */ + cmrc->qp = ib_create_qp ( ibdev, IB_QPT_RC, IB_CMRC_NUM_SEND_WQES, + cmrc->cq, IB_CMRC_NUM_RECV_WQES, cmrc->cq ); + if ( ! cmrc->qp ) { + DBGC ( cmrc, "CMRC %p could not create queue pair\n", cmrc ); + rc = -ENOMEM; + goto err_create_qp; + } + ib_qp_set_ownerdata ( cmrc->qp, cmrc ); + DBGC ( cmrc, "CMRC %p using QPN %lx\n", cmrc, cmrc->qp->qpn ); + + /* Attach to parent interface, transfer reference (implicitly) + * to our shutdown process, and return. + */ + xfer_plug_plug ( &cmrc->xfer, xfer ); + return 0; + + ib_destroy_qp ( ibdev, cmrc->qp ); + err_create_qp: + ib_destroy_cq ( ibdev, cmrc->cq ); + err_create_cq: + ib_close ( ibdev ); + err_open: + ref_put ( &cmrc->refcnt ); + err_alloc: + return rc; +} diff --git a/gpxe/src/net/infiniband/ib_mcast.c b/gpxe/src/net/infiniband/ib_mcast.c new file mode 100644 index 00000000..5cb395de --- /dev/null +++ b/gpxe/src/net/infiniband/ib_mcast.c @@ -0,0 +1,218 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <gpxe/list.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_mi.h> +#include <gpxe/ib_mcast.h> + +/** @file + * + * Infiniband multicast groups + * + */ + +/** + * Generate multicast membership MAD + * + * @v ibdev Infiniband device + * @v gid Multicast GID + * @v join Join (rather than leave) group + * @v mad MAD to fill in + */ +static void ib_mcast_mad ( struct ib_device *ibdev, struct ib_gid *gid, + int join, union ib_mad *mad ) { + struct ib_mad_sa *sa = &mad->sa; + + /* Construct multicast membership record request */ + memset ( sa, 0, sizeof ( *sa ) ); + sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + sa->mad_hdr.class_version = IB_SA_CLASS_VERSION; + sa->mad_hdr.method = + ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE ); + sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ); + sa->sa_hdr.comp_mask[1] = + htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_JOIN_STATE ); + sa->sa_data.mc_member_record.scope__join_state = 1; + memcpy ( &sa->sa_data.mc_member_record.mgid, gid, + sizeof ( sa->sa_data.mc_member_record.mgid ) ); + memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid, + sizeof ( sa->sa_data.mc_member_record.port_gid ) ); +} + +/** + * Handle multicast membership record join response + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + * @v rc Status code + * @v mad Received MAD (or NULL on error) + * @v av Source address vector (or NULL on error) + */ +static void ib_mcast_complete ( struct ib_device *ibdev, + struct ib_mad_interface *mi __unused, + struct ib_mad_transaction *madx, + int rc, union ib_mad *mad, + struct ib_address_vector *av __unused ) { + struct ib_mc_membership *membership = ib_madx_get_ownerdata ( madx ); + struct ib_queue_pair *qp = membership->qp; + struct ib_gid *gid = &membership->gid; + struct ib_mc_member_record *mc_member_record = + &mad->sa.sa_data.mc_member_record; + int joined; + unsigned long qkey; + + /* Report failures */ + if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) )) + rc = -ENOTCONN; + if ( rc != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx join failed: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + goto out; + } + + /* Extract values from MAD */ + joined = ( mad->hdr.method == IB_MGMT_METHOD_GET_RESP ); + qkey = ntohl ( mc_member_record->qkey ); + DBGC ( ibdev, "IBDEV %p QPN %lx %s %08x:%08x:%08x:%08x qkey %lx\n", + ibdev, qp->qpn, ( joined ? "joined" : "left" ), + ntohl ( gid->u.dwords[0] ), ntohl ( gid->u.dwords[1] ), + ntohl ( gid->u.dwords[2] ), ntohl ( gid->u.dwords[3] ), + qkey ); + + /* Set queue key */ + qp->qkey = qkey; + if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not modify qkey: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + goto out; + } + + out: + /* Destroy the completed transaction */ + ib_destroy_madx ( ibdev, mi, madx ); + membership->madx = NULL; + + /* Hand off to upper completion handler */ + membership->complete ( ibdev, qp, membership, rc, mad ); +} + +/** Multicast membership management transaction completion operations */ +static struct ib_mad_transaction_operations ib_mcast_op = { + .complete = ib_mcast_complete, +}; + +/** + * Join multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v membership Multicast group membership + * @v gid Multicast GID to join + * @v joined Join completion handler + * @ret rc Return status code + */ +int ib_mcast_join ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_mc_membership *membership, struct ib_gid *gid, + void ( * complete ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_mc_membership *membership, + int rc, union ib_mad *mad ) ) { + union ib_mad mad; + int rc; + + DBGC ( ibdev, "IBDEV %p QPN %lx joining %08x:%08x:%08x:%08x\n", + ibdev, qp->qpn, ntohl ( gid->u.dwords[0] ), + ntohl ( gid->u.dwords[1] ), ntohl ( gid->u.dwords[2] ), + ntohl ( gid->u.dwords[3] ) ); + + /* Initialise structure */ + membership->qp = qp; + memcpy ( &membership->gid, gid, sizeof ( membership->gid ) ); + membership->complete = complete; + + /* Attach queue pair to multicast GID */ + if ( ( rc = ib_mcast_attach ( ibdev, qp, gid ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not attach: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + goto err_mcast_attach; + } + + /* Initiate multicast membership join */ + ib_mcast_mad ( ibdev, gid, 1, &mad ); + membership->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, NULL, + &ib_mcast_op ); + if ( ! membership->madx ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not create join " + "transaction\n", ibdev, qp->qpn ); + rc = -ENOMEM; + goto err_create_madx; + } + ib_madx_set_ownerdata ( membership->madx, membership ); + + return 0; + + ib_destroy_madx ( ibdev, ibdev->gsi, membership->madx ); + err_create_madx: + ib_mcast_detach ( ibdev, qp, gid ); + err_mcast_attach: + return rc; +} + +/** + * Leave multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v membership Multicast group membership + */ +void ib_mcast_leave ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_mc_membership *membership ) { + struct ib_gid *gid = &membership->gid; + union ib_mad mad; + int rc; + + DBGC ( ibdev, "IBDEV %p QPN %lx leaving %08x:%08x:%08x:%08x\n", + ibdev, qp->qpn, ntohl ( gid->u.dwords[0] ), + ntohl ( gid->u.dwords[1] ), ntohl ( gid->u.dwords[2] ), + ntohl ( gid->u.dwords[3] ) ); + + /* Detach from multicast GID */ + ib_mcast_detach ( ibdev, qp, &membership->gid ); + + /* Cancel multicast membership join, if applicable */ + if ( membership->madx ) { + ib_destroy_madx ( ibdev, ibdev->gsi, membership->madx ); + membership->madx = NULL; + } + + /* Send a single group leave MAD */ + ib_mcast_mad ( ibdev, &membership->gid, 0, &mad ); + if ( ( rc = ib_mi_send ( ibdev, ibdev->gsi, &mad, NULL ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not send leave request: " + "%s\n", ibdev, qp->qpn, strerror ( rc ) ); + } +} diff --git a/gpxe/src/net/infiniband/ib_mi.c b/gpxe/src/net/infiniband/ib_mi.c new file mode 100644 index 00000000..7511fd87 --- /dev/null +++ b/gpxe/src/net/infiniband/ib_mi.c @@ -0,0 +1,406 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <byteswap.h> +#include <gpxe/infiniband.h> +#include <gpxe/iobuf.h> +#include <gpxe/ib_mi.h> + +/** + * @file + * + * Infiniband management interfaces + * + */ + +/** Management interface number of send WQEs + * + * This is a policy decision. + */ +#define IB_MI_NUM_SEND_WQES 4 + +/** Management interface number of receive WQEs + * + * This is a policy decision. + */ +#define IB_MI_NUM_RECV_WQES 2 + +/** Management interface number of completion queue entries + * + * This is a policy decision + */ +#define IB_MI_NUM_CQES 8 + +/** TID magic signature */ +#define IB_MI_TID_MAGIC ( ( 'g' << 24 ) | ( 'P' << 16 ) | ( 'X' << 8 ) | 'E' ) + +/** TID to use for next MAD */ +static unsigned int next_tid; + +/** + * Handle received MAD + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + * @ret rc Return status code + */ +static int ib_mi_handle ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_mad_hdr *hdr = &mad->hdr; + struct ib_mad_transaction *madx; + struct ib_mad_agent *agent; + + /* Look for a matching transaction by TID */ + list_for_each_entry ( madx, &mi->madx, list ) { + if ( memcmp ( &hdr->tid, &madx->mad.hdr.tid, + sizeof ( hdr->tid ) ) != 0 ) + continue; + /* Found a matching transaction */ + madx->op->complete ( ibdev, mi, madx, 0, mad, av ); + return 0; + } + + /* If there is no matching transaction, look for a listening agent */ + for_each_table_entry ( agent, IB_MAD_AGENTS ) { + if ( ( ( agent->mgmt_class & IB_MGMT_CLASS_MASK ) != + ( hdr->mgmt_class & IB_MGMT_CLASS_MASK ) ) || + ( agent->class_version != hdr->class_version ) || + ( agent->attr_id != hdr->attr_id ) ) + continue; + /* Found a matching agent */ + agent->handle ( ibdev, mi, mad, av ); + return 0; + } + + /* Otherwise, ignore it */ + DBGC ( mi, "MI %p RX TID %08x%08x ignored\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) ); + return -ENOTSUP; +} + +/** + * Complete receive via management interface + * + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer + * @v rc Completion status code + */ +static void ib_mi_complete_recv ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_address_vector *av, + struct io_buffer *iobuf, int rc ) { + struct ib_mad_interface *mi = ib_qp_get_ownerdata ( qp ); + union ib_mad *mad; + struct ib_mad_hdr *hdr; + + /* Ignore errors */ + if ( rc != 0 ) { + DBGC ( mi, "MI %p RX error: %s\n", mi, strerror ( rc ) ); + goto out; + } + + /* Sanity checks */ + if ( iob_len ( iobuf ) != sizeof ( *mad ) ) { + DBGC ( mi, "MI %p RX bad size (%zd bytes)\n", + mi, iob_len ( iobuf ) ); + DBGC_HDA ( mi, 0, iobuf->data, iob_len ( iobuf ) ); + goto out; + } + mad = iobuf->data; + hdr = &mad->hdr; + if ( hdr->base_version != IB_MGMT_BASE_VERSION ) { + DBGC ( mi, "MI %p RX unsupported base version %x\n", + mi, hdr->base_version ); + DBGC_HDA ( mi, 0, mad, sizeof ( *mad ) ); + goto out; + } + DBGC ( mi, "MI %p RX TID %08x%08x (%02x,%02x,%02x,%04x) status " + "%04x\n", mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ), + hdr->mgmt_class, hdr->class_version, hdr->method, + ntohs ( hdr->attr_id ), ntohs ( hdr->status ) ); + DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) ); + + /* Handle MAD */ + if ( ( rc = ib_mi_handle ( ibdev, mi, mad, av ) ) != 0 ) + goto out; + + out: + free_iob ( iobuf ); +} + +/** Management interface completion operations */ +static struct ib_completion_queue_operations ib_mi_completion_ops = { + .complete_recv = ib_mi_complete_recv, +}; + +/** + * Transmit MAD + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad MAD + * @v av Destination address vector + * @ret rc Return status code + */ +int ib_mi_send ( struct ib_device *ibdev, struct ib_mad_interface *mi, + union ib_mad *mad, struct ib_address_vector *av ) { + struct ib_mad_hdr *hdr = &mad->hdr; + struct io_buffer *iobuf; + int rc; + + /* Set common fields */ + hdr->base_version = IB_MGMT_BASE_VERSION; + if ( ( hdr->tid[0] == 0 ) && ( hdr->tid[1] == 0 ) ) { + hdr->tid[0] = htonl ( IB_MI_TID_MAGIC ); + hdr->tid[1] = htonl ( ++next_tid ); + } + DBGC ( mi, "MI %p TX TID %08x%08x (%02x,%02x,%02x,%04x) status " + "%04x\n", mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ), + hdr->mgmt_class, hdr->class_version, hdr->method, + ntohs ( hdr->attr_id ), ntohs ( hdr->status ) ); + DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) ); + + /* Construct directed route portion of response, if necessary */ + if ( hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ) { + struct ib_mad_smp *smp = &mad->smp; + unsigned int hop_pointer; + unsigned int hop_count; + + smp->mad_hdr.status |= htons ( IB_SMP_STATUS_D_INBOUND ); + hop_pointer = smp->mad_hdr.class_specific.smp.hop_pointer; + hop_count = smp->mad_hdr.class_specific.smp.hop_count; + assert ( hop_count == hop_pointer ); + if ( hop_pointer < ( sizeof ( smp->return_path.hops ) / + sizeof ( smp->return_path.hops[0] ) ) ) { + smp->return_path.hops[hop_pointer] = ibdev->port; + } else { + DBGC ( mi, "MI %p TX TID %08x%08x invalid hop pointer " + "%d\n", mi, ntohl ( hdr->tid[0] ), + ntohl ( hdr->tid[1] ), hop_pointer ); + return -EINVAL; + } + } + + /* Construct I/O buffer */ + iobuf = alloc_iob ( sizeof ( *mad ) ); + if ( ! iobuf ) { + DBGC ( mi, "MI %p could not allocate buffer for TID " + "%08x%08x\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) ); + return -ENOMEM; + } + memcpy ( iob_put ( iobuf, sizeof ( *mad ) ), mad, sizeof ( *mad ) ); + + /* Send I/O buffer */ + if ( ( rc = ib_post_send ( ibdev, mi->qp, av, iobuf ) ) != 0 ) { + DBGC ( mi, "MI %p TX TID %08x%08x failed: %s\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ), + strerror ( rc ) ); + free_iob ( iobuf ); + return rc; + } + + return 0; +} + +/** + * Handle management transaction timer expiry + * + * @v timer Retry timer + * @v expired Failure indicator + */ +static void ib_mi_timer_expired ( struct retry_timer *timer, int expired ) { + struct ib_mad_transaction *madx = + container_of ( timer, struct ib_mad_transaction, timer ); + struct ib_mad_interface *mi = madx->mi; + struct ib_device *ibdev = mi->ibdev; + struct ib_mad_hdr *hdr = &madx->mad.hdr; + + /* Abandon transaction if we have tried too many times */ + if ( expired ) { + DBGC ( mi, "MI %p abandoning TID %08x%08x\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) ); + madx->op->complete ( ibdev, mi, madx, -ETIMEDOUT, NULL, NULL ); + return; + } + + /* Restart retransmission timer */ + start_timer ( timer ); + + /* Resend MAD */ + ib_mi_send ( ibdev, mi, &madx->mad, &madx->av ); +} + +/** + * Create management transaction + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad MAD to send + * @v av Destination address, or NULL to use SM's GSI + * @v op Management transaction operations + * @ret madx Management transaction, or NULL + */ +struct ib_mad_transaction * +ib_create_madx ( struct ib_device *ibdev, struct ib_mad_interface *mi, + union ib_mad *mad, struct ib_address_vector *av, + struct ib_mad_transaction_operations *op ) { + struct ib_mad_transaction *madx; + + /* Allocate and initialise structure */ + madx = zalloc ( sizeof ( *madx ) ); + if ( ! madx ) + return NULL; + madx->mi = mi; + madx->timer.expired = ib_mi_timer_expired; + madx->op = op; + + /* Determine address vector */ + if ( av ) { + memcpy ( &madx->av, av, sizeof ( madx->av ) ); + } else { + madx->av.lid = ibdev->sm_lid; + madx->av.sl = ibdev->sm_sl; + madx->av.qpn = IB_QPN_GSI; + madx->av.qkey = IB_QKEY_GSI; + } + + /* Copy MAD */ + memcpy ( &madx->mad, mad, sizeof ( madx->mad ) ); + + /* Add to list and start timer to send initial MAD */ + list_add ( &madx->list, &mi->madx ); + start_timer_nodelay ( &madx->timer ); + + return madx; +} + +/** + * Destroy management transaction + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + */ +void ib_destroy_madx ( struct ib_device *ibdev __unused, + struct ib_mad_interface *mi __unused, + struct ib_mad_transaction *madx ) { + + /* Stop timer and remove from list */ + stop_timer ( &madx->timer ); + list_del ( &madx->list ); + + /* Free transaction */ + free ( madx ); +} + +/** + * Create management interface + * + * @v ibdev Infiniband device + * @v type Queue pair type + * @ret mi Management agent, or NULL + */ +struct ib_mad_interface * ib_create_mi ( struct ib_device *ibdev, + enum ib_queue_pair_type type ) { + struct ib_mad_interface *mi; + int rc; + + /* Allocate and initialise fields */ + mi = zalloc ( sizeof ( *mi ) ); + if ( ! mi ) + goto err_alloc; + mi->ibdev = ibdev; + INIT_LIST_HEAD ( &mi->madx ); + + /* Create completion queue */ + mi->cq = ib_create_cq ( ibdev, IB_MI_NUM_CQES, &ib_mi_completion_ops ); + if ( ! mi->cq ) { + DBGC ( mi, "MI %p could not allocate completion queue\n", mi ); + goto err_create_cq; + } + + /* Create queue pair */ + mi->qp = ib_create_qp ( ibdev, type, IB_MI_NUM_SEND_WQES, mi->cq, + IB_MI_NUM_RECV_WQES, mi->cq ); + if ( ! mi->qp ) { + DBGC ( mi, "MI %p could not allocate queue pair\n", mi ); + goto err_create_qp; + } + ib_qp_set_ownerdata ( mi->qp, mi ); + DBGC ( mi, "MI %p (%s) running on QPN %#lx\n", + mi, ( ( type == IB_QPT_SMI ) ? "SMI" : "GSI" ), mi->qp->qpn ); + + /* Set queue key */ + mi->qp->qkey = ( ( type == IB_QPT_SMI ) ? IB_QKEY_SMI : IB_QKEY_GSI ); + if ( ( rc = ib_modify_qp ( ibdev, mi->qp ) ) != 0 ) { + DBGC ( mi, "MI %p could not set queue key: %s\n", + mi, strerror ( rc ) ); + goto err_modify_qp; + } + + /* Fill receive ring */ + ib_refill_recv ( ibdev, mi->qp ); + return mi; + + err_modify_qp: + ib_destroy_qp ( ibdev, mi->qp ); + err_create_qp: + ib_destroy_cq ( ibdev, mi->cq ); + err_create_cq: + free ( mi ); + err_alloc: + return NULL; +} + +/** + * Destroy management interface + * + * @v mi Management interface + */ +void ib_destroy_mi ( struct ib_device *ibdev, struct ib_mad_interface *mi ) { + struct ib_mad_transaction *madx; + struct ib_mad_transaction *tmp; + + /* Flush any outstanding requests */ + list_for_each_entry_safe ( madx, tmp, &mi->madx, list ) { + DBGC ( mi, "MI %p destroyed while TID %08x%08x in progress\n", + mi, ntohl ( madx->mad.hdr.tid[0] ), + ntohl ( madx->mad.hdr.tid[1] ) ); + madx->op->complete ( ibdev, mi, madx, -ECANCELED, NULL, NULL ); + } + + ib_destroy_qp ( ibdev, mi->qp ); + ib_destroy_cq ( ibdev, mi->cq ); + free ( mi ); +} diff --git a/gpxe/src/net/infiniband/ib_packet.c b/gpxe/src/net/infiniband/ib_packet.c new file mode 100644 index 00000000..08820ef3 --- /dev/null +++ b/gpxe/src/net/infiniband/ib_packet.c @@ -0,0 +1,244 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <byteswap.h> +#include <gpxe/iobuf.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_packet.h> + +/** + * @file + * + * Infiniband Packet Formats + * + */ + +/** + * Add IB headers + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer to contain headers + * @v qp Queue pair + * @v payload_len Payload length + * @v av Address vector + */ +int ib_push ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_queue_pair *qp, size_t payload_len, + const struct ib_address_vector *av ) { + struct ib_local_route_header *lrh; + struct ib_global_route_header *grh; + struct ib_base_transport_header *bth; + struct ib_datagram_extended_transport_header *deth; + size_t orig_iob_len = iob_len ( iobuf ); + size_t pad_len; + size_t lrh_len; + size_t grh_len; + unsigned int vl; + unsigned int lnh; + + DBGC2 ( ibdev, "IBDEV %p TX %04x:%08lx => %04x:%08lx (key %08lx)\n", + ibdev, ibdev->lid, qp->ext_qpn, av->lid, av->qpn, av->qkey ); + + /* Calculate packet length */ + pad_len = ( (-payload_len) & 0x3 ); + payload_len += pad_len; + payload_len += 4; /* ICRC */ + + /* Reserve space for headers */ + orig_iob_len = iob_len ( iobuf ); + deth = iob_push ( iobuf, sizeof ( *deth ) ); + bth = iob_push ( iobuf, sizeof ( *bth ) ); + grh_len = ( payload_len + iob_len ( iobuf ) - orig_iob_len ); + grh = ( av->gid_present ? + iob_push ( iobuf, sizeof ( *grh ) ) : NULL ); + lrh = iob_push ( iobuf, sizeof ( *lrh ) ); + lrh_len = ( payload_len + iob_len ( iobuf ) - orig_iob_len ); + + /* Construct LRH */ + vl = ( ( qp->ext_qpn == IB_QPN_SMI ) ? IB_VL_SMP : IB_VL_DEFAULT ); + lrh->vl__lver = ( vl << 4 ); + lnh = ( grh ? IB_LNH_GRH : IB_LNH_BTH ); + lrh->sl__lnh = ( ( av->sl << 4 ) | lnh ); + lrh->dlid = htons ( av->lid ); + lrh->length = htons ( lrh_len >> 2 ); + lrh->slid = htons ( ibdev->lid ); + + /* Construct GRH, if required */ + if ( grh ) { + grh->ipver__tclass__flowlabel = + htonl ( IB_GRH_IPVER_IPv6 << 28 ); + grh->paylen = htons ( grh_len ); + grh->nxthdr = IB_GRH_NXTHDR_IBA; + grh->hoplmt = 0; + memcpy ( &grh->sgid, &ibdev->gid, sizeof ( grh->sgid ) ); + memcpy ( &grh->dgid, &av->gid, sizeof ( grh->dgid ) ); + } + + /* Construct BTH */ + bth->opcode = BTH_OPCODE_UD_SEND; + bth->se__m__padcnt__tver = ( pad_len << 4 ); + bth->pkey = htons ( ibdev->pkey ); + bth->dest_qp = htonl ( av->qpn ); + bth->ack__psn = htonl ( ( qp->send.psn++ ) & 0xffffffUL ); + + /* Construct DETH */ + deth->qkey = htonl ( av->qkey ); + deth->src_qp = htonl ( qp->ext_qpn ); + + DBGCP_HDA ( ibdev, 0, iobuf->data, + ( iob_len ( iobuf ) - orig_iob_len ) ); + + return 0; +} + +/** + * Remove IB headers + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer containing headers + * @v qp Queue pair to fill in, or NULL + * @v payload_len Payload length to fill in, or NULL + * @v av Address vector to fill in + */ +int ib_pull ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_queue_pair **qp, size_t *payload_len, + struct ib_address_vector *av ) { + struct ib_local_route_header *lrh; + struct ib_global_route_header *grh; + struct ib_base_transport_header *bth; + struct ib_datagram_extended_transport_header *deth; + size_t orig_iob_len = iob_len ( iobuf ); + unsigned int lnh; + size_t pad_len; + unsigned long qpn; + unsigned int lid; + + /* Clear return values */ + if ( qp ) + *qp = NULL; + if ( payload_len ) + *payload_len = 0; + memset ( av, 0, sizeof ( *av ) ); + + /* Extract LRH */ + if ( iob_len ( iobuf ) < sizeof ( *lrh ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for LRH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + lrh = iobuf->data; + iob_pull ( iobuf, sizeof ( *lrh ) ); + av->lid = ntohs ( lrh->slid ); + av->sl = ( lrh->sl__lnh >> 4 ); + lnh = ( lrh->sl__lnh & 0x3 ); + lid = ntohs ( lrh->dlid ); + + /* Reject unsupported packets */ + if ( ! ( ( lnh == IB_LNH_BTH ) || ( lnh == IB_LNH_GRH ) ) ) { + DBGC ( ibdev, "IBDEV %p RX unsupported LNH %x\n", + ibdev, lnh ); + return -ENOTSUP; + } + + /* Extract GRH, if present */ + if ( lnh == IB_LNH_GRH ) { + if ( iob_len ( iobuf ) < sizeof ( *grh ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) " + "for GRH\n", ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + grh = iobuf->data; + iob_pull ( iobuf, sizeof ( *grh ) ); + av->gid_present = 1; + memcpy ( &av->gid, &grh->sgid, sizeof ( av->gid ) ); + } else { + grh = NULL; + } + + /* Extract BTH */ + if ( iob_len ( iobuf ) < sizeof ( *bth ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for BTH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + bth = iobuf->data; + iob_pull ( iobuf, sizeof ( *bth ) ); + if ( bth->opcode != BTH_OPCODE_UD_SEND ) { + DBGC ( ibdev, "IBDEV %p unsupported BTH opcode %x\n", + ibdev, bth->opcode ); + return -ENOTSUP; + } + qpn = ntohl ( bth->dest_qp ); + + /* Extract DETH */ + if ( iob_len ( iobuf ) < sizeof ( *deth ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for DETH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + deth = iobuf->data; + iob_pull ( iobuf, sizeof ( *deth ) ); + av->qpn = ntohl ( deth->src_qp ); + av->qkey = ntohl ( deth->qkey ); + + /* Calculate payload length, if applicable */ + if ( payload_len ) { + pad_len = ( ( bth->se__m__padcnt__tver >> 4 ) & 0x3 ); + *payload_len = ( ( ntohs ( lrh->length ) << 2 ) + - ( orig_iob_len - iob_len ( iobuf ) ) + - pad_len - 4 /* ICRC */ ); + } + + /* Determine destination QP, if applicable */ + if ( qp ) { + if ( IB_LID_MULTICAST ( lid ) && grh ) { + if ( ! ( *qp = ib_find_qp_mgid ( ibdev, &grh->dgid ))){ + DBGC ( ibdev, "IBDEV %p RX for unknown MGID " + "%08x:%08x:%08x:%08x\n", ibdev, + ntohl ( grh->dgid.u.dwords[0] ), + ntohl ( grh->dgid.u.dwords[1] ), + ntohl ( grh->dgid.u.dwords[2] ), + ntohl ( grh->dgid.u.dwords[3] ) ); + return -ENODEV; + } + } else { + if ( ! ( *qp = ib_find_qp_qpn ( ibdev, qpn ) ) ) { + DBGC ( ibdev, "IBDEV %p RX for nonexistent " + "QPN %lx\n", ibdev, qpn ); + return -ENODEV; + } + } + assert ( *qp ); + } + + DBGC2 ( ibdev, "IBDEV %p RX %04x:%08lx <= %04x:%08lx (key %08x)\n", + ibdev, lid, ( IB_LID_MULTICAST( lid ) ? + ( qp ? (*qp)->ext_qpn : -1UL ) : qpn ), + av->lid, av->qpn, ntohl ( deth->qkey ) ); + DBGCP_HDA ( ibdev, 0, + ( iobuf->data - ( orig_iob_len - iob_len ( iobuf ) ) ), + ( orig_iob_len - iob_len ( iobuf ) ) ); + + return 0; +} diff --git a/gpxe/src/net/infiniband/ib_pathrec.c b/gpxe/src/net/infiniband/ib_pathrec.c new file mode 100644 index 00000000..136e628d --- /dev/null +++ b/gpxe/src/net/infiniband/ib_pathrec.c @@ -0,0 +1,296 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_mi.h> +#include <gpxe/ib_pathrec.h> + +/** @file + * + * Infiniband path lookups + * + */ + +/** + * Handle path transaction completion + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + * @v rc Status code + * @v mad Received MAD (or NULL on error) + * @v av Source address vector (or NULL on error) + */ +static void ib_path_complete ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + struct ib_mad_transaction *madx, + int rc, union ib_mad *mad, + struct ib_address_vector *av __unused ) { + struct ib_path *path = ib_madx_get_ownerdata ( madx ); + struct ib_gid *dgid = &path->av.gid; + struct ib_path_record *pathrec = &mad->sa.sa_data.path_record; + + /* Report failures */ + if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) )) + rc = -ENETUNREACH; + if ( rc != 0 ) { + DBGC ( ibdev, "IBDEV %p path lookup for %08x:%08x:%08x:%08x " + "failed: %s\n", ibdev, htonl ( dgid->u.dwords[0] ), + htonl ( dgid->u.dwords[1] ), + htonl ( dgid->u.dwords[2] ), + htonl ( dgid->u.dwords[3] ), strerror ( rc ) ); + goto out; + } + + /* Extract values from MAD */ + path->av.lid = ntohs ( pathrec->dlid ); + path->av.sl = ( pathrec->reserved__sl & 0x0f ); + path->av.rate = ( pathrec->rate_selector__rate & 0x3f ); + DBGC ( ibdev, "IBDEV %p path to %08x:%08x:%08x:%08x is %04x sl %d " + "rate %d\n", ibdev, htonl ( dgid->u.dwords[0] ), + htonl ( dgid->u.dwords[1] ), htonl ( dgid->u.dwords[2] ), + htonl ( dgid->u.dwords[3] ), path->av.lid, path->av.sl, + path->av.rate ); + + out: + /* Destroy the completed transaction */ + ib_destroy_madx ( ibdev, mi, madx ); + path->madx = NULL; + + /* Hand off to upper completion handler */ + path->op->complete ( ibdev, path, rc, &path->av ); +} + +/** Path transaction completion operations */ +static struct ib_mad_transaction_operations ib_path_op = { + .complete = ib_path_complete, +}; + +/** + * Create path + * + * @v ibdev Infiniband device + * @v av Address vector to complete + * @v op Path operations + * @ret path Path + */ +struct ib_path * +ib_create_path ( struct ib_device *ibdev, struct ib_address_vector *av, + struct ib_path_operations *op ) { + struct ib_path *path; + union ib_mad mad; + struct ib_mad_sa *sa = &mad.sa; + + /* Allocate and initialise structure */ + path = zalloc ( sizeof ( *path ) ); + if ( ! path ) + goto err_alloc_path; + path->ibdev = ibdev; + memcpy ( &path->av, av, sizeof ( path->av ) ); + path->op = op; + + /* Construct path request */ + memset ( sa, 0, sizeof ( *sa ) ); + sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + sa->mad_hdr.class_version = IB_SA_CLASS_VERSION; + sa->mad_hdr.method = IB_MGMT_METHOD_GET; + sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); + sa->sa_hdr.comp_mask[1] = + htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); + memcpy ( &sa->sa_data.path_record.dgid, &path->av.gid, + sizeof ( sa->sa_data.path_record.dgid ) ); + memcpy ( &sa->sa_data.path_record.sgid, &ibdev->gid, + sizeof ( sa->sa_data.path_record.sgid ) ); + + /* Create management transaction */ + path->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, NULL, + &ib_path_op ); + if ( ! path->madx ) + goto err_create_madx; + ib_madx_set_ownerdata ( path->madx, path ); + + return path; + + ib_destroy_madx ( ibdev, ibdev->gsi, path->madx ); + err_create_madx: + free ( path ); + err_alloc_path: + return NULL; +} + +/** + * Destroy path + * + * @v ibdev Infiniband device + * @v path Path + */ +void ib_destroy_path ( struct ib_device *ibdev, struct ib_path *path ) { + + if ( path->madx ) + ib_destroy_madx ( ibdev, ibdev->gsi, path->madx ); + free ( path ); +} + +/** Number of path cache entries + * + * Must be a power of two. + */ +#define IB_NUM_CACHED_PATHS 4 + +/** A cached path */ +struct ib_cached_path { + /** Path */ + struct ib_path *path; +}; + +/** Path cache */ +static struct ib_cached_path ib_path_cache[IB_NUM_CACHED_PATHS]; + +/** Oldest path cache entry index */ +static unsigned int ib_path_cache_idx; + +/** + * Find path cache entry + * + * @v ibdev Infiniband device + * @v dgid Destination GID + * @ret path Path cache entry, or NULL + */ +static struct ib_cached_path * +ib_find_path_cache_entry ( struct ib_device *ibdev, struct ib_gid *dgid ) { + struct ib_cached_path *cached; + unsigned int i; + + for ( i = 0 ; i < IB_NUM_CACHED_PATHS ; i++ ) { + cached = &ib_path_cache[i]; + if ( ! cached->path ) + continue; + if ( cached->path->ibdev != ibdev ) + continue; + if ( memcmp ( &cached->path->av.gid, dgid, + sizeof ( cached->path->av.gid ) ) != 0 ) + continue; + return cached; + } + + return NULL; +} + +/** + * Handle cached path transaction completion + * + * @v ibdev Infiniband device + * @v path Path + * @v rc Status code + * @v av Address vector, or NULL on error + */ +static void ib_cached_path_complete ( struct ib_device *ibdev, + struct ib_path *path, int rc, + struct ib_address_vector *av __unused ) { + struct ib_cached_path *cached = ib_path_get_ownerdata ( path ); + + /* If the transaction failed, erase the cache entry */ + if ( rc != 0 ) { + /* Destroy the old cache entry */ + ib_destroy_path ( ibdev, path ); + memset ( cached, 0, sizeof ( *cached ) ); + return; + } + + /* Do not destroy the completed transaction; we still need to + * refer to the resolved path. + */ +} + +/** Cached path transaction completion operations */ +static struct ib_path_operations ib_cached_path_op = { + .complete = ib_cached_path_complete, +}; + +/** + * Resolve path + * + * @v ibdev Infiniband device + * @v av Address vector to complete + * @ret rc Return status code + * + * This provides a non-transactional way to resolve a path, via a + * cache similar to ARP. + */ +int ib_resolve_path ( struct ib_device *ibdev, struct ib_address_vector *av ) { + struct ib_gid *gid = &av->gid; + struct ib_cached_path *cached; + unsigned int cache_idx; + + /* Sanity check */ + if ( ! av->gid_present ) { + DBGC ( ibdev, "IBDEV %p attempt to look up path " + "without GID\n", ibdev ); + return -EINVAL; + } + + /* Look in cache for a matching entry */ + cached = ib_find_path_cache_entry ( ibdev, gid ); + if ( cached && cached->path->av.lid ) { + /* Populated entry found */ + av->lid = cached->path->av.lid; + av->rate = cached->path->av.rate; + av->sl = cached->path->av.sl; + DBGC2 ( ibdev, "IBDEV %p cache hit for %08x:%08x:%08x:%08x\n", + ibdev, htonl ( gid->u.dwords[0] ), + htonl ( gid->u.dwords[1] ), htonl ( gid->u.dwords[2] ), + htonl ( gid->u.dwords[3] ) ); + return 0; + } + DBGC ( ibdev, "IBDEV %p cache miss for %08x:%08x:%08x:%08x%s\n", + ibdev, htonl ( gid->u.dwords[0] ), htonl ( gid->u.dwords[1] ), + htonl ( gid->u.dwords[2] ), htonl ( gid->u.dwords[3] ), + ( cached ? " (in progress)" : "" ) ); + + /* If lookup is already in progress, do nothing */ + if ( cached ) + return -ENOENT; + + /* Locate a new cache entry to use */ + cache_idx = ( (ib_path_cache_idx++) % IB_NUM_CACHED_PATHS ); + cached = &ib_path_cache[cache_idx]; + + /* Destroy the old cache entry */ + if ( cached->path ) + ib_destroy_path ( ibdev, cached->path ); + memset ( cached, 0, sizeof ( *cached ) ); + + /* Create new path */ + cached->path = ib_create_path ( ibdev, av, &ib_cached_path_op ); + if ( ! cached->path ) { + DBGC ( ibdev, "IBDEV %p could not create path\n", + ibdev ); + return -ENOMEM; + } + ib_path_set_ownerdata ( cached->path, cached ); + + /* Not found yet */ + return -ENOENT; +} diff --git a/gpxe/src/net/infiniband/ib_sma.c b/gpxe/src/net/infiniband/ib_sma.c new file mode 100644 index 00000000..ff4cbbf6 --- /dev/null +++ b/gpxe/src/net/infiniband/ib_sma.c @@ -0,0 +1,369 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <byteswap.h> +#include <gpxe/settings.h> +#include <gpxe/infiniband.h> +#include <gpxe/iobuf.h> +#include <gpxe/ib_mi.h> +#include <gpxe/ib_sma.h> + +/** + * @file + * + * Infiniband Subnet Management Agent + * + */ + +/** + * Node information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_node_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_node_info *node_info = &mad->smp.smp_data.node_info; + int rc; + + /* Fill in information */ + memset ( node_info, 0, sizeof ( *node_info ) ); + node_info->base_version = IB_MGMT_BASE_VERSION; + node_info->class_version = IB_SMP_CLASS_VERSION; + node_info->node_type = IB_NODE_TYPE_HCA; + node_info->num_ports = ib_get_hca_info ( ibdev, &node_info->sys_guid ); + memcpy ( &node_info->node_guid, &node_info->sys_guid, + sizeof ( node_info->node_guid ) ); + memcpy ( &node_info->port_guid, &ibdev->gid.u.half[1], + sizeof ( node_info->port_guid ) ); + node_info->partition_cap = htons ( 1 ); + node_info->local_port_num = ibdev->port; + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send NodeInfo GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * Node description + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_node_desc ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_node_desc *node_desc = &mad->smp.smp_data.node_desc; + struct ib_gid_half guid; + char hostname[ sizeof ( node_desc->node_string ) ]; + int hostname_len; + int rc; + + /* Fill in information */ + memset ( node_desc, 0, sizeof ( *node_desc ) ); + ib_get_hca_info ( ibdev, &guid ); + hostname_len = fetch_string_setting ( NULL, &hostname_setting, + hostname, sizeof ( hostname ) ); + snprintf ( node_desc->node_string, sizeof ( node_desc->node_string ), + "gPXE %s%s%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x (%s)", + hostname, ( ( hostname_len >= 0 ) ? " " : "" ), + guid.u.bytes[0], guid.u.bytes[1], guid.u.bytes[2], + guid.u.bytes[3], guid.u.bytes[4], guid.u.bytes[5], + guid.u.bytes[6], guid.u.bytes[7], ibdev->dev->name ); + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send NodeDesc GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * GUID information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_guid_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_guid_info *guid_info = &mad->smp.smp_data.guid_info; + int rc; + + /* Fill in information */ + memset ( guid_info, 0, sizeof ( *guid_info ) ); + memcpy ( guid_info->guid[0], &ibdev->gid.u.half[1], + sizeof ( guid_info->guid[0] ) ); + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send GuidInfo GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * Set port information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @ret rc Return status code + */ +static int ib_sma_set_port_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad ) { + const struct ib_port_info *port_info = &mad->smp.smp_data.port_info; + unsigned int link_width_enabled; + unsigned int link_speed_enabled; + int rc; + + /* Set parameters */ + memcpy ( &ibdev->gid.u.half[0], port_info->gid_prefix, + sizeof ( ibdev->gid.u.half[0] ) ); + ibdev->lid = ntohs ( port_info->lid ); + ibdev->sm_lid = ntohs ( port_info->mastersm_lid ); + if ( ( link_width_enabled = port_info->link_width_enabled ) ) + ibdev->link_width_enabled = link_width_enabled; + if ( ( link_speed_enabled = + ( port_info->link_speed_active__link_speed_enabled & 0xf ) ) ) + ibdev->link_speed_enabled = link_speed_enabled; + ibdev->sm_sl = ( port_info->neighbour_mtu__mastersm_sl & 0xf ); + DBGC ( mi, "SMA %p set LID %04x SMLID %04x link width %02x speed " + "%02x\n", mi, ibdev->lid, ibdev->sm_lid, + ibdev->link_width_enabled, ibdev->link_speed_enabled ); + + /* Update parameters on device */ + if ( ( rc = ib_set_port_info ( ibdev, mad ) ) != 0 ) { + DBGC ( mi, "SMA %p could not set port information: %s\n", + mi, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Port information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_port_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_port_info *port_info = &mad->smp.smp_data.port_info; + int rc; + + /* Set parameters if applicable */ + if ( mad->hdr.method == IB_MGMT_METHOD_SET ) { + if ( ( rc = ib_sma_set_port_info ( ibdev, mi, mad ) ) != 0 ) { + mad->hdr.status = + htons ( IB_MGMT_STATUS_UNSUPPORTED_METHOD_ATTR ); + /* Fall through to generate GetResponse */ + } + } + + /* Fill in information */ + memset ( port_info, 0, sizeof ( *port_info ) ); + memcpy ( port_info->gid_prefix, &ibdev->gid.u.half[0], + sizeof ( port_info->gid_prefix ) ); + port_info->lid = ntohs ( ibdev->lid ); + port_info->mastersm_lid = ntohs ( ibdev->sm_lid ); + port_info->local_port_num = ibdev->port; + port_info->link_width_enabled = ibdev->link_width_enabled; + port_info->link_width_supported = ibdev->link_width_supported; + port_info->link_width_active = ibdev->link_width_active; + port_info->link_speed_supported__port_state = + ( ( ibdev->link_speed_supported << 4 ) | ibdev->port_state ); + port_info->port_phys_state__link_down_def_state = + ( ( IB_PORT_PHYS_STATE_POLLING << 4 ) | + IB_PORT_PHYS_STATE_POLLING ); + port_info->link_speed_active__link_speed_enabled = + ( ( ibdev->link_speed_active << 4 ) | + ibdev->link_speed_enabled ); + port_info->neighbour_mtu__mastersm_sl = + ( ( IB_MTU_2048 << 4 ) | ibdev->sm_sl ); + port_info->vl_cap__init_type = ( IB_VL_0 << 4 ); + port_info->init_type_reply__mtu_cap = IB_MTU_2048; + port_info->operational_vls__enforcement = ( IB_VL_0 << 4 ); + port_info->guid_cap = 1; + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send PortInfo GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * Set partition key table + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @ret rc Return status code + */ +static int ib_sma_set_pkey_table ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad ) { + struct ib_pkey_table *pkey_table = &mad->smp.smp_data.pkey_table; + int rc; + + /* Set parameters */ + ibdev->pkey = ntohs ( pkey_table->pkey[0] ); + DBGC ( mi, "SMA %p set pkey %04x\n", mi, ibdev->pkey ); + + /* Update parameters on device */ + if ( ( rc = ib_set_pkey_table ( ibdev, mad ) ) != 0 ) { + DBGC ( mi, "SMA %p could not set pkey table: %s\n", + mi, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Partition key table + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_pkey_table ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_pkey_table *pkey_table = &mad->smp.smp_data.pkey_table; + int rc; + + /* Set parameters, if applicable */ + if ( mad->hdr.method == IB_MGMT_METHOD_SET ) { + if ( ( rc = ib_sma_set_pkey_table ( ibdev, mi, mad ) ) != 0 ) { + mad->hdr.status = + htons ( IB_MGMT_STATUS_UNSUPPORTED_METHOD_ATTR ); + /* Fall through to generate GetResponse */ + } + } + + /* Fill in information */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + memset ( pkey_table, 0, sizeof ( *pkey_table ) ); + pkey_table->pkey[0] = htons ( ibdev->pkey ); + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send PKeyTable GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** Subnet management agent */ +struct ib_mad_agent ib_sma_agent[] __ib_mad_agent = { + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_NODE_INFO ), + .handle = ib_sma_node_info, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_NODE_DESC ), + .handle = ib_sma_node_desc, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_GUID_INFO ), + .handle = ib_sma_guid_info, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_PORT_INFO ), + .handle = ib_sma_port_info, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ), + .handle = ib_sma_pkey_table, + }, +}; + +/** + * Create subnet management agent and interface + * + * @v ibdev Infiniband device + * @v mi Management interface + * @ret rc Return status code + */ +int ib_create_sma ( struct ib_device *ibdev, struct ib_mad_interface *mi ) { + + /* Nothing to do */ + DBGC ( ibdev, "IBDEV %p SMA using SMI %p\n", ibdev, mi ); + + return 0; +} + +/** + * Destroy subnet management agent and interface + * + * @v ibdev Infiniband device + * @v mi Management interface + */ +void ib_destroy_sma ( struct ib_device *ibdev __unused, + struct ib_mad_interface *mi __unused ) { + /* Nothing to do */ +} diff --git a/gpxe/src/net/infiniband/ib_smc.c b/gpxe/src/net/infiniband/ib_smc.c new file mode 100644 index 00000000..d308dd9d --- /dev/null +++ b/gpxe/src/net/infiniband/ib_smc.c @@ -0,0 +1,179 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <byteswap.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_smc.h> + +/** + * @file + * + * Infiniband Subnet Management Client + * + */ + +/** + * Get port information + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_get_port_info ( struct ib_device *ibdev, + ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Construct MAD */ + memset ( mad, 0, sizeof ( *mad ) ); + mad->hdr.base_version = IB_MGMT_BASE_VERSION; + mad->hdr.mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->hdr.class_version = 1; + mad->hdr.method = IB_MGMT_METHOD_GET; + mad->hdr.attr_id = htons ( IB_SMP_ATTR_PORT_INFO ); + mad->hdr.attr_mod = htonl ( ibdev->port ); + + if ( ( rc = local_mad ( ibdev, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get port info: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get GUID information + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_get_guid_info ( struct ib_device *ibdev, + ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Construct MAD */ + memset ( mad, 0, sizeof ( *mad ) ); + mad->hdr.base_version = IB_MGMT_BASE_VERSION; + mad->hdr.mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->hdr.class_version = 1; + mad->hdr.method = IB_MGMT_METHOD_GET; + mad->hdr.attr_id = htons ( IB_SMP_ATTR_GUID_INFO ); + + if ( ( rc = local_mad ( ibdev, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get GUID info: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get partition key table + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_get_pkey_table ( struct ib_device *ibdev, + ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Construct MAD */ + memset ( mad, 0, sizeof ( *mad ) ); + mad->hdr.base_version = IB_MGMT_BASE_VERSION; + mad->hdr.mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->hdr.class_version = 1; + mad->hdr.method = IB_MGMT_METHOD_GET; + mad->hdr.attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ); + + if ( ( rc = local_mad ( ibdev, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get pkey table: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get MAD parameters + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @ret rc Return status code + */ +int ib_smc_update ( struct ib_device *ibdev, ib_local_mad_t local_mad ) { + union ib_mad mad; + struct ib_port_info *port_info = &mad.smp.smp_data.port_info; + struct ib_guid_info *guid_info = &mad.smp.smp_data.guid_info; + struct ib_pkey_table *pkey_table = &mad.smp.smp_data.pkey_table; + int rc; + + /* Port info gives us the link state, the first half of the + * port GID and the SM LID. + */ + if ( ( rc = ib_smc_get_port_info ( ibdev, local_mad, &mad ) ) != 0 ) + return rc; + memcpy ( &ibdev->gid.u.half[0], port_info->gid_prefix, + sizeof ( ibdev->gid.u.half[0] ) ); + ibdev->lid = ntohs ( port_info->lid ); + ibdev->sm_lid = ntohs ( port_info->mastersm_lid ); + ibdev->link_width_enabled = port_info->link_width_enabled; + ibdev->link_width_supported = port_info->link_width_supported; + ibdev->link_width_active = port_info->link_width_active; + ibdev->link_speed_supported = + ( port_info->link_speed_supported__port_state >> 4 ); + ibdev->port_state = + ( port_info->link_speed_supported__port_state & 0xf ); + ibdev->link_speed_active = + ( port_info->link_speed_active__link_speed_enabled >> 4 ); + ibdev->link_speed_enabled = + ( port_info->link_speed_active__link_speed_enabled & 0xf ); + ibdev->sm_sl = ( port_info->neighbour_mtu__mastersm_sl & 0xf ); + + /* GUID info gives us the second half of the port GID */ + if ( ( rc = ib_smc_get_guid_info ( ibdev, local_mad, &mad ) ) != 0 ) + return rc; + memcpy ( &ibdev->gid.u.half[1], guid_info->guid[0], + sizeof ( ibdev->gid.u.half[1] ) ); + + /* Get partition key */ + if ( ( rc = ib_smc_get_pkey_table ( ibdev, local_mad, &mad ) ) != 0 ) + return rc; + ibdev->pkey = ntohs ( pkey_table->pkey[0] ); + + DBGC ( ibdev, "IBDEV %p port GID is %08x:%08x:%08x:%08x\n", ibdev, + htonl ( ibdev->gid.u.dwords[0] ), + htonl ( ibdev->gid.u.dwords[1] ), + htonl ( ibdev->gid.u.dwords[2] ), + htonl ( ibdev->gid.u.dwords[3] ) ); + + return 0; +} diff --git a/gpxe/src/net/infiniband/ib_srp.c b/gpxe/src/net/infiniband/ib_srp.c new file mode 100644 index 00000000..c156d3ae --- /dev/null +++ b/gpxe/src/net/infiniband/ib_srp.c @@ -0,0 +1,406 @@ +/* + * Copyright (C) 2009 Fen Systems Ltd <mbrown@fensystems.co.uk>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +FILE_LICENCE ( BSD2 ); + +#include <stdlib.h> +#include <errno.h> +#include <gpxe/srp.h> +#include <gpxe/infiniband.h> +#include <gpxe/ib_cmrc.h> +#include <gpxe/ib_srp.h> + +/** + * @file + * + * SCSI RDMA Protocol over Infiniband + * + */ + +/* Disambiguate the various possible EINVALs */ +#define EINVAL_BYTE_STRING_LEN ( EINVAL | EUNIQ_01 ) +#define EINVAL_BYTE_STRING ( EINVAL | EUNIQ_02 ) +#define EINVAL_INTEGER ( EINVAL | EUNIQ_03 ) +#define EINVAL_RP_TOO_SHORT ( EINVAL | EUNIQ_04 ) + +/** IB SRP parse flags */ +enum ib_srp_parse_flags { + IB_SRP_PARSE_REQUIRED = 0x0000, + IB_SRP_PARSE_OPTIONAL = 0x8000, + IB_SRP_PARSE_FLAG_MASK = 0xf000, +}; + +/** IB SRP root path parameters */ +struct ib_srp_root_path { + /** SCSI LUN */ + struct scsi_lun *lun; + /** SRP port IDs */ + struct srp_port_ids *port_ids; + /** IB SRP parameters */ + struct ib_srp_parameters *ib; +}; + +/** + * Parse IB SRP root path byte-string value + * + * @v rp_comp Root path component string + * @v default_value Default value to use if component string is empty + * @ret value Value + */ +static int ib_srp_parse_byte_string ( const char *rp_comp, uint8_t *bytes, + unsigned int size_flags ) { + size_t size = ( size_flags & ~IB_SRP_PARSE_FLAG_MASK ); + size_t rp_comp_len = strlen ( rp_comp ); + char buf[3]; + char *buf_end; + + /* Allow optional components to be empty */ + if ( ( rp_comp_len == 0 ) && + ( size_flags & IB_SRP_PARSE_OPTIONAL ) ) + return 0; + + /* Check string length */ + if ( rp_comp_len != ( 2 * size ) ) + return -EINVAL_BYTE_STRING_LEN; + + /* Parse byte string */ + for ( ; size ; size--, rp_comp += 2, bytes++ ) { + memcpy ( buf, rp_comp, 2 ); + buf[2] = '\0'; + *bytes = strtoul ( buf, &buf_end, 16 ); + if ( buf_end != &buf[2] ) + return -EINVAL_BYTE_STRING; + } + return 0; +} + +/** + * Parse IB SRP root path integer value + * + * @v rp_comp Root path component string + * @v default_value Default value to use if component string is empty + * @ret value Value + */ +static int ib_srp_parse_integer ( const char *rp_comp, int default_value ) { + int value; + char *end; + + value = strtoul ( rp_comp, &end, 16 ); + if ( *end ) + return -EINVAL_INTEGER; + + if ( end == rp_comp ) + return default_value; + + return value; +} + +/** + * Parse IB SRP root path literal component + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_literal ( const char *rp_comp __unused, + struct ib_srp_root_path *rp __unused ) { + /* Ignore */ + return 0; +} + +/** + * Parse IB SRP root path source GID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_sgid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + struct ib_device *ibdev; + + /* Default to the GID of the last opened Infiniband device */ + if ( ( ibdev = last_opened_ibdev() ) != NULL ) + memcpy ( &rp->ib->sgid, &ibdev->gid, sizeof ( rp->ib->sgid ) ); + + return ib_srp_parse_byte_string ( rp_comp, rp->ib->sgid.u.bytes, + ( sizeof ( rp->ib->sgid ) | + IB_SRP_PARSE_OPTIONAL ) ); +} + +/** + * Parse IB SRP root path initiator identifier extension + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_initiator_id_ext ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + struct ib_srp_initiator_port_id *port_id = + ib_srp_initiator_port_id ( rp->port_ids ); + + return ib_srp_parse_byte_string ( rp_comp, port_id->id_ext.u.bytes, + ( sizeof ( port_id->id_ext ) | + IB_SRP_PARSE_OPTIONAL ) ); +} + +/** + * Parse IB SRP root path initiator HCA GUID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_initiator_hca_guid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + struct ib_srp_initiator_port_id *port_id = + ib_srp_initiator_port_id ( rp->port_ids ); + + /* Default to the GUID portion of the source GID */ + memcpy ( &port_id->hca_guid, &rp->ib->sgid.u.half[1], + sizeof ( port_id->hca_guid ) ); + + return ib_srp_parse_byte_string ( rp_comp, port_id->hca_guid.u.bytes, + ( sizeof ( port_id->hca_guid ) | + IB_SRP_PARSE_OPTIONAL ) ); +} + +/** + * Parse IB SRP root path destination GID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_dgid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + return ib_srp_parse_byte_string ( rp_comp, rp->ib->dgid.u.bytes, + ( sizeof ( rp->ib->dgid ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** + * Parse IB SRP root path partition key + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_pkey ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + int pkey; + + if ( ( pkey = ib_srp_parse_integer ( rp_comp, IB_PKEY_DEFAULT ) ) < 0 ) + return pkey; + rp->ib->pkey = pkey; + return 0; +} + +/** + * Parse IB SRP root path service ID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_service_id ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + return ib_srp_parse_byte_string ( rp_comp, rp->ib->service_id.u.bytes, + ( sizeof ( rp->ib->service_id ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** + * Parse IB SRP root path LUN + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_lun ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + return scsi_parse_lun ( rp_comp, rp->lun ); +} + +/** + * Parse IB SRP root path target identifier extension + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_target_id_ext ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + struct ib_srp_target_port_id *port_id = + ib_srp_target_port_id ( rp->port_ids ); + + return ib_srp_parse_byte_string ( rp_comp, port_id->id_ext.u.bytes, + ( sizeof ( port_id->id_ext ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** + * Parse IB SRP root path target I/O controller GUID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_target_ioc_guid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + struct ib_srp_target_port_id *port_id = + ib_srp_target_port_id ( rp->port_ids ); + + return ib_srp_parse_byte_string ( rp_comp, port_id->ioc_guid.u.bytes, + ( sizeof ( port_id->ioc_guid ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** IB SRP root path component parser */ +struct ib_srp_root_path_parser { + /** + * Parse IB SRP root path component + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ + int ( * parse ) ( const char *rp_comp, struct ib_srp_root_path *rp ); +}; + +/** IB SRP root path components */ +static struct ib_srp_root_path_parser ib_srp_rp_parser[] = { + { ib_srp_parse_literal }, + { ib_srp_parse_sgid }, + { ib_srp_parse_initiator_id_ext }, + { ib_srp_parse_initiator_hca_guid }, + { ib_srp_parse_dgid }, + { ib_srp_parse_pkey }, + { ib_srp_parse_service_id }, + { ib_srp_parse_lun }, + { ib_srp_parse_target_id_ext }, + { ib_srp_parse_target_ioc_guid }, +}; + +/** Number of IB SRP root path components */ +#define IB_SRP_NUM_RP_COMPONENTS \ + ( sizeof ( ib_srp_rp_parser ) / sizeof ( ib_srp_rp_parser[0] ) ) + +/** + * Parse IB SRP root path + * + * @v srp SRP device + * @v rp_string Root path + * @ret rc Return status code + */ +static int ib_srp_parse_root_path ( struct srp_device *srp, + const char *rp_string ) { + struct ib_srp_parameters *ib_params = ib_srp_params ( srp ); + struct ib_srp_root_path rp = { + .lun = &srp->lun, + .port_ids = &srp->port_ids, + .ib = ib_params, + }; + char rp_string_copy[ strlen ( rp_string ) + 1 ]; + char *rp_comp[IB_SRP_NUM_RP_COMPONENTS]; + char *rp_string_tmp = rp_string_copy; + unsigned int i = 0; + int rc; + + /* Split root path into component parts */ + strcpy ( rp_string_copy, rp_string ); + while ( 1 ) { + rp_comp[i++] = rp_string_tmp; + if ( i == IB_SRP_NUM_RP_COMPONENTS ) + break; + for ( ; *rp_string_tmp != ':' ; rp_string_tmp++ ) { + if ( ! *rp_string_tmp ) { + DBGC ( srp, "SRP %p root path \"%s\" too " + "short\n", srp, rp_string ); + return -EINVAL_RP_TOO_SHORT; + } + } + *(rp_string_tmp++) = '\0'; + } + + /* Parse root path components */ + for ( i = 0 ; i < IB_SRP_NUM_RP_COMPONENTS ; i++ ) { + if ( ( rc = ib_srp_rp_parser[i].parse ( rp_comp[i], + &rp ) ) != 0 ) { + DBGC ( srp, "SRP %p could not parse \"%s\" in root " + "path \"%s\": %s\n", srp, rp_comp[i], + rp_string, strerror ( rc ) ); + return rc; + } + } + + return 0; +} + +/** + * Connect IB SRP session + * + * @v srp SRP device + * @ret rc Return status code + */ +static int ib_srp_connect ( struct srp_device *srp ) { + struct ib_srp_parameters *ib_params = ib_srp_params ( srp ); + struct ib_device *ibdev; + int rc; + + /* Identify Infiniband device */ + ibdev = find_ibdev ( &ib_params->sgid ); + if ( ! ibdev ) { + DBGC ( srp, "SRP %p could not identify Infiniband device\n", + srp ); + return -ENODEV; + } + + /* Configure remaining SRP parameters */ + srp->memory_handle = ibdev->rdma_key; + + /* Open CMRC socket */ + if ( ( rc = ib_cmrc_open ( &srp->socket, ibdev, &ib_params->dgid, + &ib_params->service_id ) ) != 0 ) { + DBGC ( srp, "SRP %p could not open CMRC socket: %s\n", + srp, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** IB SRP transport type */ +struct srp_transport_type ib_srp_transport = { + .priv_len = sizeof ( struct ib_srp_parameters ), + .parse_root_path = ib_srp_parse_root_path, + .connect = ib_srp_connect, +}; diff --git a/gpxe/src/net/iobpad.c b/gpxe/src/net/iobpad.c index 9961edca..cbae221a 100644 --- a/gpxe/src/net/iobpad.c +++ b/gpxe/src/net/iobpad.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + /** * @file * diff --git a/gpxe/src/net/ipv4.c b/gpxe/src/net/ipv4.c index 8668d44b..4c1393f2 100644 --- a/gpxe/src/net/ipv4.c +++ b/gpxe/src/net/ipv4.c @@ -21,6 +21,8 @@ * */ +FILE_LICENCE ( GPL2_OR_LATER ); + /* Unique IP datagram identification number */ static uint16_t next_ident = 0; @@ -38,7 +40,7 @@ static LIST_HEAD ( frag_buffers ); * @v netdev Network device * @v address IPv4 address * @v netmask Subnet mask - * @v gateway Gateway address (or @c INADDR_NONE for no gateway) + * @v gateway Gateway address (if any) * @ret miniroute Routing table entry, or NULL */ static struct ipv4_miniroute * __malloc @@ -48,7 +50,7 @@ add_ipv4_miniroute ( struct net_device *netdev, struct in_addr address, DBG ( "IPv4 add %s", inet_ntoa ( address ) ); DBG ( "/%s ", inet_ntoa ( netmask ) ); - if ( gateway.s_addr != INADDR_NONE ) + if ( gateway.s_addr ) DBG ( "gw %s ", inet_ntoa ( gateway ) ); DBG ( "via %s\n", netdev->name ); @@ -68,7 +70,7 @@ add_ipv4_miniroute ( struct net_device *netdev, struct in_addr address, /* Add to end of list if we have a gateway, otherwise * to start of list. */ - if ( gateway.s_addr != INADDR_NONE ) { + if ( gateway.s_addr ) { list_add_tail ( &miniroute->list, &ipv4_miniroutes ); } else { list_add ( &miniroute->list, &ipv4_miniroutes ); @@ -86,7 +88,7 @@ static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) { DBG ( "IPv4 del %s", inet_ntoa ( miniroute->address ) ); DBG ( "/%s ", inet_ntoa ( miniroute->netmask ) ); - if ( miniroute->gateway.s_addr != INADDR_NONE ) + if ( miniroute->gateway.s_addr ) DBG ( "gw %s ", inet_ntoa ( miniroute->gateway ) ); DBG ( "via %s\n", miniroute->netdev->name ); @@ -116,9 +118,11 @@ static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) { /* Find first usable route in routing table */ list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) { + if ( ! ( miniroute->netdev->state & NETDEV_OPEN ) ) + continue; local = ( ( ( dest->s_addr ^ miniroute->address.s_addr ) & miniroute->netmask.s_addr ) == 0 ); - has_gw = ( miniroute->gateway.s_addr != INADDR_NONE ); + has_gw = ( miniroute->gateway.s_addr ); if ( local || has_gw ) { if ( ! local ) *dest = miniroute->gateway; @@ -269,7 +273,7 @@ static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src, if ( dest.s_addr == INADDR_BROADCAST ) { /* Broadcast address */ - memcpy ( ll_dest, ll_protocol->ll_broadcast, + memcpy ( ll_dest, netdev->ll_broadcast, ll_protocol->ll_addr_len ); return 0; } else if ( IN_MULTICAST ( ntohl ( dest.s_addr ) ) ) { @@ -584,7 +588,7 @@ static int ipv4_create_routes ( void ) { struct settings *settings; struct in_addr address = { 0 }; struct in_addr netmask = { 0 }; - struct in_addr gateway = { INADDR_NONE }; + struct in_addr gateway = { 0 }; /* Delete all existing routes */ list_for_each_entry_safe ( miniroute, tmp, &ipv4_miniroutes, list ) @@ -598,20 +602,19 @@ static int ipv4_create_routes ( void ) { fetch_ipv4_setting ( settings, &ip_setting, &address ); if ( ! address.s_addr ) continue; - /* Calculate default netmask */ - if ( IN_CLASSA ( ntohl ( address.s_addr ) ) ) { - netmask.s_addr = htonl ( IN_CLASSA_NET ); - } else if ( IN_CLASSB ( ntohl ( address.s_addr ) ) ) { - netmask.s_addr = htonl ( IN_CLASSB_NET ); - } else if ( IN_CLASSC ( ntohl ( address.s_addr ) ) ) { - netmask.s_addr = htonl ( IN_CLASSC_NET ); - } else { - netmask.s_addr = 0; - } - /* Override with subnet mask, if present */ + /* Get subnet mask */ fetch_ipv4_setting ( settings, &netmask_setting, &netmask ); + /* Calculate default netmask, if necessary */ + if ( ! netmask.s_addr ) { + if ( IN_CLASSA ( ntohl ( address.s_addr ) ) ) { + netmask.s_addr = htonl ( IN_CLASSA_NET ); + } else if ( IN_CLASSB ( ntohl ( address.s_addr ) ) ) { + netmask.s_addr = htonl ( IN_CLASSB_NET ); + } else if ( IN_CLASSC ( ntohl ( address.s_addr ) ) ) { + netmask.s_addr = htonl ( IN_CLASSC_NET ); + } + } /* Get default gateway, if present */ - gateway.s_addr = INADDR_NONE; fetch_ipv4_setting ( settings, &gateway_setting, &gateway ); /* Configure route */ miniroute = add_ipv4_miniroute ( netdev, address, diff --git a/gpxe/src/net/mii.c b/gpxe/src/net/mii.c new file mode 100644 index 00000000..0de64428 --- /dev/null +++ b/gpxe/src/net/mii.c @@ -0,0 +1,147 @@ +/* + + mii.c: MII interface library + + Ported to gPXE by Daniel Verkamp <daniel@drv.nu> + from Linux drivers/net/mii.c + + Maintained by Jeff Garzik <jgarzik@pobox.com> + Copyright 2001,2002 Jeff Garzik + + Various code came from myson803.c and other files by + Donald Becker. Copyright: + + Written 1998-2002 by Donald Becker. + + This software may be used and distributed according + to the terms of the GNU General Public License (GPL), + incorporated herein by reference. Drivers based on + or derived from this code fall under the GPL and must + retain the authorship, copyright and license notice. + This file is not a complete program and may only be + used when the entire operating system is licensed + under the GPL. + + The author may be reached as becker@scyld.com, or C/O + Scyld Computing Corporation + 410 Severn Ave., Suite 210 + Annapolis MD 21403 + +*/ + +#include <mii.h> + +/** + * mii_link_ok - is link status up/ok + * @mii: the MII interface + * + * Returns 1 if the MII reports link status up/ok, 0 otherwise. + */ +int +mii_link_ok ( struct mii_if_info *mii ) +{ + /* first, a dummy read, needed to latch some MII phys */ + mii->mdio_read ( mii->dev, mii->phy_id, MII_BMSR ); + if ( mii->mdio_read ( mii->dev, mii->phy_id, MII_BMSR ) & BMSR_LSTATUS ) + return 1; + return 0; +} + +/** + * mii_check_link - check MII link status + * @mii: MII interface + * + * If the link status changed (previous != current), call + * netif_carrier_on() if current link status is Up or call + * netif_carrier_off() if current link status is Down. + */ +void +mii_check_link ( struct mii_if_info *mii ) +{ + int cur_link = mii_link_ok ( mii ); + int prev_link = netdev_link_ok ( mii->dev ); + + if ( cur_link && !prev_link ) + netdev_link_up ( mii->dev ); + else if (prev_link && !cur_link) + netdev_link_down ( mii->dev ); +} + + +/** + * mii_check_media - check the MII interface for a duplex change + * @mii: the MII interface + * @ok_to_print: OK to print link up/down messages + * @init_media: OK to save duplex mode in @mii + * + * Returns 1 if the duplex mode changed, 0 if not. + * If the media type is forced, always returns 0. + */ +unsigned int +mii_check_media ( struct mii_if_info *mii, + unsigned int ok_to_print, + unsigned int init_media ) +{ + unsigned int old_carrier, new_carrier; + int advertise, lpa, media, duplex; + int lpa2 = 0; + + /* if forced media, go no further */ + if (mii->force_media) + return 0; /* duplex did not change */ + + /* check current and old link status */ + old_carrier = netdev_link_ok ( mii->dev ) ? 1 : 0; + new_carrier = (unsigned int) mii_link_ok ( mii ); + + /* if carrier state did not change, this is a "bounce", + * just exit as everything is already set correctly + */ + if ( ( ! init_media ) && ( old_carrier == new_carrier ) ) + return 0; /* duplex did not change */ + + /* no carrier, nothing much to do */ + if ( ! new_carrier ) { + netdev_link_down ( mii->dev ); + if ( ok_to_print ) + DBG ( "%s: link down\n", mii->dev->name); + return 0; /* duplex did not change */ + } + + /* + * we have carrier, see who's on the other end + */ + netdev_link_up ( mii->dev ); + + /* get MII advertise and LPA values */ + if ( ( ! init_media ) && ( mii->advertising ) ) { + advertise = mii->advertising; + } else { + advertise = mii->mdio_read ( mii->dev, mii->phy_id, MII_ADVERTISE ); + mii->advertising = advertise; + } + lpa = mii->mdio_read ( mii->dev, mii->phy_id, MII_LPA ); + if ( mii->supports_gmii ) + lpa2 = mii->mdio_read ( mii->dev, mii->phy_id, MII_STAT1000 ); + + /* figure out media and duplex from advertise and LPA values */ + media = mii_nway_result ( lpa & advertise ); + duplex = ( media & ADVERTISE_FULL ) ? 1 : 0; + if ( lpa2 & LPA_1000FULL ) + duplex = 1; + + if ( ok_to_print ) + DBG ( "%s: link up, %sMbps, %s-duplex, lpa 0x%04X\n", + mii->dev->name, + lpa2 & ( LPA_1000FULL | LPA_1000HALF ) ? "1000" : + media & ( ADVERTISE_100FULL | ADVERTISE_100HALF ) ? "100" : "10", + duplex ? "full" : "half", + lpa); + + if ( ( init_media ) || ( mii->full_duplex != duplex ) ) { + mii->full_duplex = duplex; + return 1; /* duplex changed */ + } + + return 0; /* duplex did not change */ +} diff --git a/gpxe/src/net/netdev_settings.c b/gpxe/src/net/netdev_settings.c index 44aca7d8..d814193b 100644 --- a/gpxe/src/net/netdev_settings.c +++ b/gpxe/src/net/netdev_settings.c @@ -16,10 +16,14 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <string.h> #include <errno.h> +#include <byteswap.h> #include <gpxe/dhcp.h> #include <gpxe/settings.h> +#include <gpxe/device.h> #include <gpxe/netdevice.h> /** @file @@ -34,6 +38,11 @@ struct setting mac_setting __setting = { .description = "MAC address", .type = &setting_type_hex, }; +struct setting busid_setting __setting = { + .name = "busid", + .description = "Bus ID", + .type = &setting_type_hex, +}; /** * Store value of network device setting @@ -54,9 +63,9 @@ static int netdev_store ( struct settings *settings, struct setting *setting, return -EINVAL; memcpy ( netdev->ll_addr, data, len ); return 0; - } else { - return simple_settings_store ( settings, setting, data, len ); } + + return generic_settings_store ( settings, setting, data, len ); } /** @@ -72,19 +81,40 @@ static int netdev_fetch ( struct settings *settings, struct setting *setting, void *data, size_t len ) { struct net_device *netdev = container_of ( settings, struct net_device, settings.settings ); + struct device_description *desc = &netdev->dev->desc; + struct dhcp_netdev_desc dhcp_desc; if ( setting_cmp ( setting, &mac_setting ) == 0 ) { if ( len > netdev->ll_protocol->ll_addr_len ) len = netdev->ll_protocol->ll_addr_len; memcpy ( data, netdev->ll_addr, len ); return netdev->ll_protocol->ll_addr_len; - } else { - return simple_settings_fetch ( settings, setting, data, len ); } + if ( setting_cmp ( setting, &busid_setting ) == 0 ) { + dhcp_desc.type = desc->bus_type; + dhcp_desc.vendor = htons ( desc->vendor ); + dhcp_desc.device = htons ( desc->device ); + if ( len > sizeof ( dhcp_desc ) ) + len = sizeof ( dhcp_desc ); + memcpy ( data, &dhcp_desc, len ); + return sizeof ( dhcp_desc ); + } + + return generic_settings_fetch ( settings, setting, data, len ); +} + +/** + * Clear network device settings + * + * @v settings Settings block + */ +static void netdev_clear ( struct settings *settings ) { + generic_settings_clear ( settings ); } /** Network device configuration settings operations */ struct settings_operations netdev_settings_operations = { .store = netdev_store, .fetch = netdev_fetch, + .clear = netdev_clear, }; diff --git a/gpxe/src/net/netdevice.c b/gpxe/src/net/netdevice.c index 9e142d27..ee0d0b72 100644 --- a/gpxe/src/net/netdevice.c +++ b/gpxe/src/net/netdevice.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stdint.h> #include <stdlib.h> #include <stdio.h> @@ -28,6 +30,7 @@ #include <gpxe/process.h> #include <gpxe/init.h> #include <gpxe/device.h> +#include <gpxe/errortab.h> #include <gpxe/netdevice.h> /** @file @@ -36,17 +39,39 @@ * */ -/** Registered network-layer protocols */ -static struct net_protocol net_protocols[0] - __table_start ( struct net_protocol, net_protocols ); -static struct net_protocol net_protocols_end[0] - __table_end ( struct net_protocol, net_protocols ); - /** List of network devices */ struct list_head net_devices = LIST_HEAD_INIT ( net_devices ); /** List of open network devices, in reverse order of opening */ -struct list_head open_net_devices = LIST_HEAD_INIT ( open_net_devices ); +static struct list_head open_net_devices = LIST_HEAD_INIT ( open_net_devices ); + +/** Default link status code */ +#define EUNKNOWN_LINK_STATUS EINPROGRESS + +/** Human-readable message for the default link status */ +struct errortab netdev_errors[] __errortab = { + { EUNKNOWN_LINK_STATUS, "Unknown" }, +}; + +/** + * Mark network device as having link down + * + * @v netdev Network device + */ +void netdev_link_down ( struct net_device *netdev ) { + + switch ( netdev->link_rc ) { + case 0: + case -EUNKNOWN_LINK_STATUS: + netdev->link_rc = -ENOTCONN; + break; + default: + /* Avoid clobbering a more detailed link status code, + * if one is already set. + */ + break; + } +} /** * Record network device statistic @@ -286,6 +311,7 @@ static void free_netdev ( struct refcnt *refcnt ) { netdev_tx_flush ( netdev ); netdev_rx_flush ( netdev ); + clear_settings ( netdev_settings ( netdev ) ); free ( netdev ); } @@ -305,11 +331,10 @@ struct net_device * alloc_netdev ( size_t priv_size ) { netdev = zalloc ( total_len ); if ( netdev ) { netdev->refcnt.free = free_netdev; + netdev->link_rc = -EUNKNOWN_LINK_STATUS; INIT_LIST_HEAD ( &netdev->tx_queue ); INIT_LIST_HEAD ( &netdev->rx_queue ); - settings_init ( netdev_settings ( netdev ), - &netdev_settings_operations, &netdev->refcnt, - netdev->name, 0 ); + netdev_settings_init ( netdev ); netdev->priv = ( ( ( void * ) netdev ) + sizeof ( *netdev ) ); } return netdev; @@ -332,6 +357,9 @@ int register_netdev ( struct net_device *netdev ) { snprintf ( netdev->name, sizeof ( netdev->name ), "net%d", ifindex++ ); + /* Set initial link-layer address */ + netdev->ll_protocol->init_addr ( netdev->hw_addr, netdev->ll_addr ); + /* Register per-netdev configuration settings */ if ( ( rc = register_settings ( netdev_settings ( netdev ), NULL ) ) != 0 ) { @@ -345,7 +373,7 @@ int register_netdev ( struct net_device *netdev ) { list_add_tail ( &netdev->list, &net_devices ); DBGC ( netdev, "NETDEV %p registered as %s (phys %s hwaddr %s)\n", netdev, netdev->name, netdev->dev->name, - netdev_hwaddr ( netdev ) ); + netdev_addr ( netdev ) ); return 0; } @@ -514,7 +542,7 @@ int net_tx ( struct io_buffer *iobuf, struct net_device *netdev, netdev_poll ( netdev ); /* Add link-layer header */ - if ( ( rc = ll_protocol->push ( iobuf, ll_dest, netdev->ll_addr, + if ( ( rc = ll_protocol->push ( netdev, iobuf, ll_dest, netdev->ll_addr, net_protocol->net_proto ) ) != 0 ) { free_iob ( iobuf ); return rc; @@ -538,12 +566,13 @@ int net_rx ( struct io_buffer *iobuf, struct net_device *netdev, struct net_protocol *net_protocol; /* Hand off to network-layer protocol, if any */ - for ( net_protocol = net_protocols ; net_protocol < net_protocols_end ; - net_protocol++ ) { - if ( net_protocol->net_proto == net_proto ) { + for_each_table_entry ( net_protocol, NET_PROTOCOLS ) { + if ( net_protocol->net_proto == net_proto ) return net_protocol->rx ( iobuf, netdev, ll_source ); - } } + + DBGC ( netdev, "NETDEV %p unknown network protocol %04x\n", + netdev, ntohs ( net_proto ) ); free_iob ( iobuf ); return 0; } @@ -585,8 +614,8 @@ static void net_step ( struct process *process __unused ) { /* Remove link-layer header */ ll_protocol = netdev->ll_protocol; - if ( ( rc = ll_protocol->pull ( iobuf, &ll_dest, - &ll_source, + if ( ( rc = ll_protocol->pull ( netdev, iobuf, + &ll_dest, &ll_source, &net_proto ) ) != 0 ) { free_iob ( iobuf ); continue; @@ -599,5 +628,6 @@ static void net_step ( struct process *process __unused ) { /** Networking stack process */ struct process net_process __permanent_process = { + .list = LIST_HEAD_INIT ( net_process.list ), .step = net_step, }; diff --git a/gpxe/src/net/nullnet.c b/gpxe/src/net/nullnet.c index 7e199ce3..381f02a6 100644 --- a/gpxe/src/net/nullnet.c +++ b/gpxe/src/net/nullnet.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stdint.h> #include <errno.h> #include <gpxe/iobuf.h> diff --git a/gpxe/src/net/rarp.c b/gpxe/src/net/rarp.c index bb5e6ad7..1d0dd961 100644 --- a/gpxe/src/net/rarp.c +++ b/gpxe/src/net/rarp.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stdint.h> #include <byteswap.h> #include <gpxe/netdevice.h> diff --git a/gpxe/src/net/retry.c b/gpxe/src/net/retry.c index cd793a7f..40f656f2 100644 --- a/gpxe/src/net/retry.c +++ b/gpxe/src/net/retry.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stddef.h> #include <gpxe/timer.h> #include <gpxe/list.h> @@ -185,5 +187,6 @@ static void retry_step ( struct process *process __unused ) { /** Retry timer process */ struct process retry_process __permanent_process = { + .list = LIST_HEAD_INIT ( retry_process.list ), .step = retry_step, }; diff --git a/gpxe/src/net/tcp.c b/gpxe/src/net/tcp.c index 6bcd193c..a0619622 100644 --- a/gpxe/src/net/tcp.c +++ b/gpxe/src/net/tcp.c @@ -21,6 +21,8 @@ * */ +FILE_LICENCE ( GPL2_OR_LATER ); + /** A TCP connection */ struct tcp_connection { /** Reference counter */ @@ -142,15 +144,15 @@ tcp_dump_state ( struct tcp_connection *tcp ) { static inline __attribute__ (( always_inline )) void tcp_dump_flags ( struct tcp_connection *tcp, unsigned int flags ) { if ( flags & TCP_RST ) - DBGC ( tcp, " RST" ); + DBGC2 ( tcp, " RST" ); if ( flags & TCP_SYN ) - DBGC ( tcp, " SYN" ); + DBGC2 ( tcp, " SYN" ); if ( flags & TCP_PSH ) - DBGC ( tcp, " PSH" ); + DBGC2 ( tcp, " PSH" ); if ( flags & TCP_FIN ) - DBGC ( tcp, " FIN" ); + DBGC2 ( tcp, " FIN" ); if ( flags & TCP_ACK ) - DBGC ( tcp, " ACK" ); + DBGC2 ( tcp, " ACK" ); } /*************************************************************************** @@ -397,9 +399,10 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) { void *payload; unsigned int flags; size_t len = 0; - size_t seq_len; - size_t app_win; - size_t max_rcv_win; + uint32_t seq_len; + uint32_t app_win; + uint32_t max_rcv_win; + int rc; /* If retransmission timer is already running, do nothing */ if ( timer_running ( &tcp->timer ) ) @@ -436,7 +439,9 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) { /* Allocate I/O buffer */ iobuf = alloc_iob ( len + MAX_HDR_LEN ); if ( ! iobuf ) { - DBGC ( tcp, "TCP %p could not allocate data buffer\n", tcp ); + DBGC ( tcp, "TCP %p could not allocate iobuf for %08x..%08x " + "%08x\n", tcp, tcp->snd_seq, ( tcp->snd_seq + seq_len ), + tcp->rcv_ack ); return -ENOMEM; } iob_reserve ( iobuf, MAX_HDR_LEN ); @@ -471,6 +476,8 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) { tsopt->tsopt.tsval = ntohl ( currticks() ); tsopt->tsopt.tsecr = ntohl ( tcp->ts_recent ); } + if ( ! ( flags & TCP_SYN ) ) + flags |= TCP_PSH; tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) ); memset ( tcphdr, 0, sizeof ( *tcphdr ) ); tcphdr->src = tcp->local_port; @@ -478,21 +485,28 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) { tcphdr->seq = htonl ( tcp->snd_seq ); tcphdr->ack = htonl ( tcp->rcv_ack ); tcphdr->hlen = ( ( payload - iobuf->data ) << 2 ); - tcphdr->flags = ( flags | TCP_PSH ); + tcphdr->flags = flags; tcphdr->win = htons ( tcp->rcv_win ); tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); /* Dump header */ - DBGC ( tcp, "TCP %p TX %d->%d %08x..%08zx %08x %4zd", - tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), - ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) + seq_len ), - ntohl ( tcphdr->ack ), len ); + DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4zd", + tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), + ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) + seq_len ), + ntohl ( tcphdr->ack ), len ); tcp_dump_flags ( tcp, tcphdr->flags ); - DBGC ( tcp, "\n" ); + DBGC2 ( tcp, "\n" ); /* Transmit packet */ - return tcpip_tx ( iobuf, &tcp_protocol, NULL, &tcp->peer, NULL, - &tcphdr->csum ); + if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, &tcp->peer, NULL, + &tcphdr->csum ) ) != 0 ) { + DBGC ( tcp, "TCP %p could not transmit %08x..%08x %08x: %s\n", + tcp, tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), + tcp->rcv_ack, strerror ( rc ) ); + return rc; + } + + return 0; } /** @@ -506,8 +520,9 @@ static void tcp_expired ( struct retry_timer *timer, int over ) { container_of ( timer, struct tcp_connection, timer ); int graceful_close = TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ); - DBGC ( tcp, "TCP %p timer %s in %s\n", tcp, - ( over ? "expired" : "fired" ), tcp_state ( tcp->tcp_state ) ); + DBGC ( tcp, "TCP %p timer %s in %s for %08x..%08x %08x\n", tcp, + ( over ? "expired" : "fired" ), tcp_state ( tcp->tcp_state ), + tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack ); assert ( ( tcp->tcp_state == TCP_SYN_SENT ) || ( tcp->tcp_state == TCP_SYN_RCVD ) || @@ -542,11 +557,14 @@ static int tcp_xmit_reset ( struct tcp_connection *tcp, struct tcp_header *in_tcphdr ) { struct io_buffer *iobuf; struct tcp_header *tcphdr; + int rc; /* Allocate space for dataless TX buffer */ iobuf = alloc_iob ( MAX_HDR_LEN ); if ( ! iobuf ) { - DBGC ( tcp, "TCP %p could not allocate data buffer\n", tcp ); + DBGC ( tcp, "TCP %p could not allocate iobuf for RST " + "%08x..%08x %08x\n", tcp, ntohl ( in_tcphdr->ack ), + ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ) ); return -ENOMEM; } iob_reserve ( iobuf, MAX_HDR_LEN ); @@ -564,16 +582,24 @@ static int tcp_xmit_reset ( struct tcp_connection *tcp, tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); /* Dump header */ - DBGC ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4d", - tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), - ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) ), - ntohl ( tcphdr->ack ), 0 ); + DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4d", + tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), + ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) ), + ntohl ( tcphdr->ack ), 0 ); tcp_dump_flags ( tcp, tcphdr->flags ); - DBGC ( tcp, "\n" ); + DBGC2 ( tcp, "\n" ); /* Transmit packet */ - return tcpip_tx ( iobuf, &tcp_protocol, NULL, st_dest, - NULL, &tcphdr->csum ); + if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, st_dest, + NULL, &tcphdr->csum ) ) != 0 ) { + DBGC ( tcp, "TCP %p could not transmit RST %08x..%08x %08x: " + "%s\n", tcp, ntohl ( in_tcphdr->ack ), + ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ), + strerror ( rc ) ); + return rc; + } + + return 0; } /*************************************************************************** @@ -645,7 +671,7 @@ static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data, * @v tcp TCP connection * @v seq_len Sequence space length to consume */ -static void tcp_rx_seq ( struct tcp_connection *tcp, size_t seq_len ) { +static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) { tcp->rcv_ack += seq_len; if ( tcp->rcv_win > seq_len ) { tcp->rcv_win -= seq_len; @@ -696,20 +722,42 @@ static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq, */ static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, uint32_t win ) { - size_t ack_len = ( ack - tcp->snd_seq ); + uint32_t ack_len = ( ack - tcp->snd_seq ); size_t len; unsigned int acked_flags; - /* Ignore duplicate or out-of-range ACK */ + /* Check for out-of-range or old duplicate ACKs */ if ( ack_len > tcp->snd_sent ) { - DBGC ( tcp, "TCP %p received ACK for [%08x,%08zx), " - "sent only [%08x,%08x)\n", tcp, tcp->snd_seq, + DBGC ( tcp, "TCP %p received ACK for %08x..%08x, " + "sent only %08x..%08x\n", tcp, tcp->snd_seq, ( tcp->snd_seq + ack_len ), tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ) ); - return -EINVAL; + + if ( TCP_HAS_BEEN_ESTABLISHED ( tcp->tcp_state ) ) { + /* Just ignore what might be old duplicate ACKs */ + return 0; + } else { + /* Send RST if an out-of-range ACK is received + * on a not-yet-established connection, as per + * RFC 793. + */ + return -EINVAL; + } } - /* Acknowledge any flags being sent */ + /* Ignore ACKs that don't actually acknowledge any new data. + * (In particular, do not stop the retransmission timer; this + * avoids creating a sorceror's apprentice syndrome when a + * duplicate ACK is received and we still have data in our + * transmit queue.) + */ + if ( ack_len == 0 ) + return 0; + + /* Stop the retransmission timer */ + stop_timer ( &tcp->timer ); + + /* Determine acknowledged flags and data length */ len = ack_len; acked_flags = ( TCP_FLAGS_SENDING ( tcp->tcp_state ) & ( TCP_SYN | TCP_FIN ) ); @@ -721,9 +769,6 @@ static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, tcp->snd_sent = 0; tcp->snd_win = win; - /* Stop the retransmission timer */ - stop_timer ( &tcp->timer ); - /* Remove any acknowledged data from transmit queue */ tcp_process_queue ( tcp, len, NULL, 1 ); @@ -750,8 +795,8 @@ static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, */ static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq, struct io_buffer *iobuf ) { - size_t already_rcvd; - size_t len; + uint32_t already_rcvd; + uint32_t len; int rc; /* Ignore duplicate or out-of-order data */ @@ -765,8 +810,11 @@ static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq, len -= already_rcvd; /* Deliver data to application */ - if ( ( rc = xfer_deliver_iob ( &tcp->xfer, iobuf ) ) != 0 ) + if ( ( rc = xfer_deliver_iob ( &tcp->xfer, iobuf ) ) != 0 ) { + DBGC ( tcp, "TCP %p could not deliver %08x..%08x: %s\n", + tcp, seq, ( seq + len ), strerror ( rc ) ); return rc; + } /* Acknowledge new data */ tcp_rx_seq ( tcp, len ); @@ -824,6 +872,7 @@ static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) { tcp_dump_state ( tcp ); tcp_close ( tcp, -ECONNRESET ); + DBGC ( tcp, "TCP %p connection reset by peer\n", tcp ); return -ECONNRESET; } @@ -894,13 +943,13 @@ static int tcp_rx ( struct io_buffer *iobuf, len = iob_len ( iobuf ); /* Dump header */ - DBGC ( tcp, "TCP %p RX %d<-%d %08x %08x..%08zx %4zd", - tcp, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ), - ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ), - ( ntohl ( tcphdr->seq ) + len + - ( ( tcphdr->flags & ( TCP_SYN | TCP_FIN ) ) ? 1 : 0 ) ), len); + DBGC2 ( tcp, "TCP %p RX %d<-%d %08x %08x..%08zx %4zd", + tcp, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ), + ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ), + ( ntohl ( tcphdr->seq ) + len + + ( ( tcphdr->flags & ( TCP_SYN | TCP_FIN ) ) ? 1 : 0 )), len); tcp_dump_flags ( tcp, tcphdr->flags ); - DBGC ( tcp, "\n" ); + DBGC2 ( tcp, "\n" ); /* If no connection was found, send RST */ if ( ! tcp ) { diff --git a/gpxe/src/net/tcp/ftp.c b/gpxe/src/net/tcp/ftp.c index 445e32bb..920e537a 100644 --- a/gpxe/src/net/tcp/ftp.c +++ b/gpxe/src/net/tcp/ftp.c @@ -131,11 +131,33 @@ static const char * ftp_uri_path ( struct ftp_request *ftp ) { return ftp->uri->path; } +/** + * Retrieve FTP user + * + * @v ftp FTP request + * @ret user FTP user + */ +static const char * ftp_user ( struct ftp_request *ftp ) { + static char *ftp_default_user = "anonymous"; + return ftp->uri->user ? ftp->uri->user : ftp_default_user; +} + +/** + * Retrieve FTP password + * + * @v ftp FTP request + * @ret password FTP password + */ +static const char * ftp_password ( struct ftp_request *ftp ) { + static char *ftp_default_password = "etherboot@etherboot.org"; + return ftp->uri->password ? ftp->uri->password : ftp_default_password; +} + /** FTP control channel strings */ static struct ftp_control_string ftp_strings[] = { [FTP_CONNECT] = { NULL, NULL }, - [FTP_USER] = { "USER anonymous", NULL }, - [FTP_PASS] = { "PASS etherboot@etherboot.org", NULL }, + [FTP_USER] = { "USER ", ftp_user }, + [FTP_PASS] = { "PASS ", ftp_password }, [FTP_TYPE] = { "TYPE I", NULL }, [FTP_PASV] = { "PASV", NULL }, [FTP_RETR] = { "RETR ", ftp_uri_path }, @@ -335,7 +357,7 @@ static int ftp_control_deliver_raw ( struct xfer_interface *control, /** FTP control channel operations */ static struct xfer_interface_operations ftp_control_operations = { .close = ftp_control_close, - .vredirect = xfer_vopen, + .vredirect = xfer_vreopen, .window = unlimited_xfer_window, .alloc_iob = default_xfer_alloc_iob, .deliver_iob = xfer_deliver_as_raw, @@ -402,7 +424,7 @@ static int ftp_data_deliver_iob ( struct xfer_interface *data, /** FTP data channel operations */ static struct xfer_interface_operations ftp_data_operations = { .close = ftp_data_closed, - .vredirect = xfer_vopen, + .vredirect = xfer_vreopen, .window = unlimited_xfer_window, .alloc_iob = default_xfer_alloc_iob, .deliver_iob = ftp_data_deliver_iob, diff --git a/gpxe/src/net/tcp/http.c b/gpxe/src/net/tcp/http.c index 93ccfd3b..a365b2a4 100644 --- a/gpxe/src/net/tcp/http.c +++ b/gpxe/src/net/tcp/http.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + /** * @file * @@ -138,6 +140,8 @@ static void http_done ( struct http_request *http, int rc ) { static int http_response_to_rc ( unsigned int response ) { switch ( response ) { case 200: + case 301: + case 302: return 0; case 404: return -ENOENT; @@ -181,6 +185,28 @@ static int http_rx_response ( struct http_request *http, char *response ) { } /** + * Handle HTTP Location header + * + * @v http HTTP request + * @v value HTTP header value + * @ret rc Return status code + */ +static int http_rx_location ( struct http_request *http, const char *value ) { + int rc; + + /* Redirect to new location */ + DBGC ( http, "HTTP %p redirecting to %s\n", http, value ); + if ( ( rc = xfer_redirect ( &http->xfer, LOCATION_URI_STRING, + value ) ) != 0 ) { + DBGC ( http, "HTTP %p could not redirect: %s\n", + http, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** * Handle HTTP Content-Length header * * @v http HTTP request @@ -223,6 +249,10 @@ struct http_header_handler { /** List of HTTP header handlers */ static struct http_header_handler http_header_handlers[] = { { + .header = "Location", + .rx = http_rx_location, + }, + { .header = "Content-Length", .rx = http_rx_content_length, }, @@ -387,9 +417,7 @@ static int http_socket_deliver_iob ( struct xfer_interface *socket, static void http_step ( struct process *process ) { struct http_request *http = container_of ( process, struct http_request, process ); - const char *path = http->uri->path; const char *host = http->uri->host; - const char *query = http->uri->query; const char *user = http->uri->user; const char *password = ( http->uri->password ? http->uri->password : "" ); @@ -399,27 +427,24 @@ static void http_step ( struct process *process ) { char user_pw[ user_pw_len + 1 /* NUL */ ]; char user_pw_base64[ user_pw_base64_len + 1 /* NUL */ ]; int rc; + int request_len = unparse_uri ( NULL, 0, http->uri, + URI_PATH_BIT | URI_QUERY_BIT ); if ( xfer_window ( &http->socket ) ) { + char request[request_len + 1]; + + /* Construct path?query request */ + unparse_uri ( request, sizeof ( request ), http->uri, + URI_PATH_BIT | URI_QUERY_BIT ); /* We want to execute only once */ process_del ( &http->process ); /* Construct authorisation, if applicable */ if ( user ) { - char *buf = user_pw; - ssize_t remaining = sizeof ( user_pw ); - size_t len; - - /* URI-decode the username and password */ - len = uri_decode ( user, buf, remaining ); - buf += len; - remaining -= len; - *(remaining--, buf++) = ':'; - len = uri_decode ( password, buf, remaining ); - buf += len; - remaining -= len; - assert ( remaining >= 0 ); + /* Make "user:password" string from decoded fields */ + snprintf ( user_pw, sizeof ( user_pw ), "%s:%s", + user, password ); /* Base64-encode the "user:password" string */ base64_encode ( user_pw, user_pw_base64 ); @@ -427,14 +452,13 @@ static void http_step ( struct process *process ) { /* Send GET request */ if ( ( rc = xfer_printf ( &http->socket, - "GET %s%s%s HTTP/1.0\r\n" + "GET %s%s HTTP/1.0\r\n" "User-Agent: gPXE/" VERSION "\r\n" "%s%s%s" "Host: %s\r\n" "\r\n", - ( path ? path : "/" ), - ( query ? "?" : "" ), - ( query ? query : "" ), + http->uri->path ? "" : "/", + request, ( user ? "Authorization: Basic " : "" ), ( user ? user_pw_base64 : "" ), @@ -464,7 +488,7 @@ static void http_socket_close ( struct xfer_interface *socket, int rc ) { /** HTTP socket operations */ static struct xfer_interface_operations http_socket_operations = { .close = http_socket_close, - .vredirect = xfer_vopen, + .vredirect = xfer_vreopen, .window = unlimited_xfer_window, .alloc_iob = default_xfer_alloc_iob, .deliver_iob = http_socket_deliver_iob, diff --git a/gpxe/src/net/tcp/https.c b/gpxe/src/net/tcp/https.c index 15ab32ef..7a2961f2 100644 --- a/gpxe/src/net/tcp/https.c +++ b/gpxe/src/net/tcp/https.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + /** * @file * diff --git a/gpxe/src/net/tcp/iscsi.c b/gpxe/src/net/tcp/iscsi.c index 45519e66..771384b9 100644 --- a/gpxe/src/net/tcp/iscsi.c +++ b/gpxe/src/net/tcp/iscsi.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stddef.h> #include <string.h> #include <stdlib.h> @@ -180,9 +182,10 @@ static void iscsi_close_connection ( struct iscsi_session *iscsi, int rc ) { static void iscsi_scsi_done ( struct iscsi_session *iscsi, int rc ) { assert ( iscsi->tx_state == ISCSI_TX_IDLE ); + assert ( iscsi->command != NULL ); + iscsi->command->rc = rc; iscsi->command = NULL; - iscsi->rc = rc; } /**************************************************************************** @@ -1514,7 +1517,7 @@ static int iscsi_vredirect ( struct xfer_interface *socket, int type, va_end ( tmp ); } - return xfer_vopen ( socket, type, args ); + return xfer_vreopen ( socket, type, args ); } @@ -1548,37 +1551,24 @@ static int iscsi_command ( struct scsi_device *scsi, container_of ( scsi->backend, struct iscsi_session, refcnt ); int rc; + /* Abort immediately if we have a recorded permanent failure */ + if ( iscsi->instant_rc ) + return iscsi->instant_rc; + /* Record SCSI command */ iscsi->command = command; - /* Abort immediately if we have a recorded permanent failure */ - if ( iscsi->instant_rc ) { - rc = iscsi->instant_rc; - goto done; - } - /* Issue command or open connection as appropriate */ if ( iscsi->status ) { iscsi_start_command ( iscsi ); } else { - if ( ( rc = iscsi_open_connection ( iscsi ) ) != 0 ) - goto done; + if ( ( rc = iscsi_open_connection ( iscsi ) ) != 0 ) { + iscsi->command = NULL; + return rc; + } } - /* Wait for command to complete */ - iscsi->rc = -EINPROGRESS; - while ( iscsi->rc == -EINPROGRESS ) - step(); - rc = iscsi->rc; - - done: - iscsi->command = NULL; - return rc; -} - -static int iscsi_detached_command ( struct scsi_device *scsi __unused, - struct scsi_command *command __unused ) { - return -ENODEV; + return 0; } /** @@ -1593,7 +1583,7 @@ void iscsi_detach ( struct scsi_device *scsi ) { xfer_nullify ( &iscsi->socket ); iscsi_close_connection ( iscsi, 0 ); process_del ( &iscsi->process ); - scsi->command = iscsi_detached_command; + scsi->command = scsi_detached_command; ref_put ( scsi->backend ); scsi->backend = NULL; } @@ -1616,42 +1606,6 @@ enum iscsi_root_path_component { }; /** - * Parse iSCSI LUN - * - * @v iscsi iSCSI session - * @v lun_string LUN string representation (as per RFC4173) - * @ret rc Return status code - */ -static int iscsi_parse_lun ( struct iscsi_session *iscsi, - const char *lun_string ) { - union { - uint64_t u64; - uint16_t u16[4]; - } lun; - char *p; - int i; - - memset ( &lun, 0, sizeof ( lun ) ); - if ( lun_string ) { - p = ( char * ) lun_string; - - for ( i = 0 ; i < 4 ; i++ ) { - lun.u16[i] = htons ( strtoul ( p, &p, 16 ) ); - if ( *p == '\0' ) - break; - if ( *p != '-' ) - return -EINVAL; - p++; - } - if ( *p ) - return -EINVAL; - } - - iscsi->lun = lun.u64; - return 0; -} - -/** * Parse iSCSI root path * * @v iscsi iSCSI session @@ -1689,7 +1643,7 @@ static int iscsi_parse_root_path ( struct iscsi_session *iscsi, iscsi->target_port = strtoul ( rp_comp[RP_PORT], NULL, 10 ); if ( ! iscsi->target_port ) iscsi->target_port = ISCSI_PORT; - if ( ( rc = iscsi_parse_lun ( iscsi, rp_comp[RP_LUN] ) ) != 0 ) { + if ( ( rc = scsi_parse_lun ( rp_comp[RP_LUN], &iscsi->lun ) ) != 0 ) { DBGC ( iscsi, "iSCSI %p invalid LUN \"%s\"\n", iscsi, rp_comp[RP_LUN] ); return rc; @@ -1809,7 +1763,6 @@ int iscsi_attach ( struct scsi_device *scsi, const char *root_path ) { /* Attach parent interface, mortalise self, and return */ scsi->backend = ref_get ( &iscsi->refcnt ); scsi->command = iscsi_command; - scsi->lun = iscsi->lun; ref_put ( &iscsi->refcnt ); return 0; diff --git a/gpxe/src/net/tcpip.c b/gpxe/src/net/tcpip.c index d4542b05..932fd482 100644 --- a/gpxe/src/net/tcpip.c +++ b/gpxe/src/net/tcpip.c @@ -14,17 +14,7 @@ * TCP/IP transport-network layer interface */ -/** Registered network-layer protocols that support TCP/IP */ -static struct tcpip_net_protocol tcpip_net_protocols[0] - __table_start ( struct tcpip_net_protocol, tcpip_net_protocols ); -static struct tcpip_net_protocol tcpip_net_protocols_end[0] - __table_end ( struct tcpip_net_protocol, tcpip_net_protocols ); - -/** Registered transport-layer protocols that support TCP/IP */ -static struct tcpip_protocol tcpip_protocols[0] - __table_start ( struct tcpip_protocol, tcpip_protocols ); -static struct tcpip_protocol tcpip_protocols_end[0] - __table_end ( struct tcpip_protocol, tcpip_protocols ); +FILE_LICENCE ( GPL2_OR_LATER ); /** Process a received TCP/IP packet * @@ -48,7 +38,7 @@ int tcpip_rx ( struct io_buffer *iobuf, uint8_t tcpip_proto, struct tcpip_protocol *tcpip; /* Hand off packet to the appropriate transport-layer protocol */ - for ( tcpip = tcpip_protocols; tcpip < tcpip_protocols_end; tcpip++ ) { + for_each_table_entry ( tcpip, TCPIP_PROTOCOLS ) { if ( tcpip->tcpip_proto == tcpip_proto ) { DBG ( "TCP/IP received %s packet\n", tcpip->name ); return tcpip->rx ( iobuf, st_src, st_dest, pshdr_csum ); @@ -76,8 +66,7 @@ int tcpip_tx ( struct io_buffer *iobuf, struct tcpip_protocol *tcpip_protocol, struct tcpip_net_protocol *tcpip_net; /* Hand off packet to the appropriate network-layer protocol */ - for ( tcpip_net = tcpip_net_protocols ; - tcpip_net < tcpip_net_protocols_end ; tcpip_net++ ) { + for_each_table_entry ( tcpip_net, TCPIP_NET_PROTOCOLS ) { if ( tcpip_net->sa_family == st_dest->st_family ) { DBG ( "TCP/IP sending %s packet\n", tcpip_net->name ); return tcpip_net->tx ( iobuf, tcpip_protocol, st_src, diff --git a/gpxe/src/net/tls.c b/gpxe/src/net/tls.c index 73f9ad06..a5b126ed 100644 --- a/gpxe/src/net/tls.c +++ b/gpxe/src/net/tls.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + /** * @file * @@ -1625,7 +1627,7 @@ static int tls_cipherstream_deliver_raw ( struct xfer_interface *xfer, /** TLS ciphertext stream operations */ static struct xfer_interface_operations tls_cipherstream_operations = { .close = tls_cipherstream_close, - .vredirect = xfer_vopen, + .vredirect = xfer_vreopen, .window = filter_window, .alloc_iob = default_xfer_alloc_iob, .deliver_iob = xfer_deliver_as_raw, diff --git a/gpxe/src/net/udp.c b/gpxe/src/net/udp.c index c3a1eba3..771655e0 100644 --- a/gpxe/src/net/udp.c +++ b/gpxe/src/net/udp.c @@ -16,6 +16,8 @@ * UDP protocol */ +FILE_LICENCE ( GPL2_OR_LATER ); + /** * A UDP connection * @@ -238,7 +240,7 @@ static int udp_tx ( struct udp_connection *udp, struct io_buffer *iobuf, * @ret udp UDP connection, or NULL */ static struct udp_connection * udp_demux ( struct sockaddr_tcpip *local ) { - static const struct sockaddr_tcpip empty_sockaddr; + static const struct sockaddr_tcpip empty_sockaddr = { .pad = { 0, } }; struct udp_connection *udp; list_for_each_entry ( udp, &udp_conns, list ) { diff --git a/gpxe/src/net/udp/dhcp.c b/gpxe/src/net/udp/dhcp.c index ab843ce1..ce2c8207 100644 --- a/gpxe/src/net/udp/dhcp.c +++ b/gpxe/src/net/udp/dhcp.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <string.h> #include <stdlib.h> #include <stdio.h> @@ -69,6 +71,7 @@ static const uint8_t dhcp_op[] = { /** Raw option data for options common to all DHCP requests */ static uint8_t dhcp_request_options_data[] = { + DHCP_MESSAGE_TYPE, DHCP_BYTE ( 0 ), DHCP_MAX_MESSAGE_SIZE, DHCP_WORD ( ETH_MAX_MTU - 20 /* IP header */ - 8 /* UDP header */ ), DHCP_CLIENT_ARCHITECTURE, DHCP_WORD ( 0 ), @@ -88,10 +91,6 @@ static uint8_t dhcp_request_options_data[] = { DHCP_END }; -/** DHCP feature codes */ -static uint8_t dhcp_features[0] __table_start ( uint8_t, dhcp_features ); -static uint8_t dhcp_features_end[0] __table_end ( uint8_t, dhcp_features ); - /** Version number feature */ FEATURE_VERSION ( VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH ); @@ -111,6 +110,14 @@ struct setting user_class_setting __setting = { .type = &setting_type_string, }; +/** Use cached network settings */ +struct setting use_cached_setting __setting = { + .name = "use-cached", + .description = "Use cached network settings", + .tag = DHCP_EB_USE_CACHED, + .type = &setting_type_uint8, +}; + /** * Name a DHCP packet type * @@ -199,6 +206,36 @@ static struct dhcp_session_state dhcp_state_request; static struct dhcp_session_state dhcp_state_proxy; static struct dhcp_session_state dhcp_state_pxebs; +/** DHCP offer is valid for IP lease */ +#define DHCP_OFFER_IP 1 + +/** DHCP offer is valid for PXE options */ +#define DHCP_OFFER_PXE 2 + +/** A DHCP offer */ +struct dhcp_offer { + /** IP address of server granting offer */ + struct in_addr server; + + /** IP address being offered, or 0.0.0.0 for a pure proxy */ + struct in_addr ip; + + /** DHCP packet containing PXE options; NULL if missing or proxied */ + struct dhcp_packet *pxe; + + /** Valid uses for this offer, a combination of DHCP_OFFER bits */ + uint8_t valid; + + /** Priority of this offer */ + int8_t priority; + + /** Whether to ignore PXE DHCP extensions */ + uint8_t no_pxedhcp; +}; + +/** Maximum number of DHCP offers to queue */ +#define DHCP_MAX_OFFERS 6 + /** A DHCP session */ struct dhcp_session { /** Reference counter */ @@ -215,20 +252,6 @@ struct dhcp_session { /** State of the session */ struct dhcp_session_state *state; - /** Offered IP address */ - struct in_addr offer; - /** DHCP server */ - struct in_addr server; - /** DHCP offer priority */ - int priority; - - /** ProxyDHCP protocol extensions should be ignored */ - int no_pxedhcp; - /** ProxyDHCP server */ - struct in_addr proxy_server; - /** ProxyDHCP server priority */ - int proxy_priority; - /** PXE Boot Server type */ uint16_t pxe_type; /** List of PXE Boot Servers to attempt */ @@ -240,6 +263,11 @@ struct dhcp_session { struct retry_timer timer; /** Start time of the current state (in ticks) */ unsigned long start; + + /** DHCP offer just requested */ + struct dhcp_offer *current_offer; + /** List of DHCP offers received */ + struct dhcp_offer offers[DHCP_MAX_OFFERS]; }; /** @@ -250,6 +278,12 @@ struct dhcp_session { static void dhcp_free ( struct refcnt *refcnt ) { struct dhcp_session *dhcp = container_of ( refcnt, struct dhcp_session, refcnt ); + int i; + + for ( i = 0 ; i < DHCP_MAX_OFFERS ; i++ ) { + if ( dhcp->offers[i].pxe ) + dhcppkt_put ( dhcp->offers[i].pxe ); + } netdev_put ( dhcp->netdev ); free ( dhcp ); @@ -294,6 +328,35 @@ static void dhcp_set_state ( struct dhcp_session *dhcp, start_timer_nodelay ( &dhcp->timer ); } +/** + * Determine next DHCP offer to try + * + * @v dhcp DHCP session + * @v type DHCP offer type + * @ret offer Next DHCP offer to try + * + * Offers are ranked by priority, then by completeness (combined + * IP+PXE are tried before @a type alone), then by order of receipt. + */ +static struct dhcp_offer * dhcp_next_offer ( struct dhcp_session *dhcp, + uint8_t type ) { + + struct dhcp_offer *offer; + struct dhcp_offer *best = NULL; + + for ( offer = dhcp->offers ; offer < dhcp->offers + DHCP_MAX_OFFERS ; + offer++ ) { + if ( ( offer->valid & type ) && + ( ( best == NULL ) || + ( offer->priority > best->priority ) || + ( ( offer->priority == best->priority ) && + ( offer->valid & ~best->valid ) ) ) ) + best = offer; + } + + return best; +} + /**************************************************************************** * * DHCP state machine @@ -321,7 +384,7 @@ static int dhcp_discovery_tx ( struct dhcp_session *dhcp, } /** - * Handle received packet during DHCP discovery + * Handle received DHCPOFFER during any state * * @v dhcp DHCP session * @v dhcppkt DHCP packet @@ -329,17 +392,17 @@ static int dhcp_discovery_tx ( struct dhcp_session *dhcp, * @v msgtype DHCP message type * @v server_id DHCP server ID */ -static void dhcp_discovery_rx ( struct dhcp_session *dhcp, - struct dhcp_packet *dhcppkt, - struct sockaddr_in *peer, uint8_t msgtype, - struct in_addr server_id ) { - struct in_addr ip; +static void dhcp_rx_offer ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer, uint8_t msgtype, + struct in_addr server_id ) { char vci[9]; /* "PXEClient" */ int vci_len; int has_pxeclient; - int8_t priority = 0; - uint8_t no_pxedhcp = 0; - unsigned long elapsed; + int pxeopts_len; + int has_pxeopts; + struct dhcp_offer *offer; + int i; DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp, dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ), @@ -348,47 +411,87 @@ static void dhcp_discovery_rx ( struct dhcp_session *dhcp, DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) ); /* Identify offered IP address */ - ip = dhcppkt->dhcphdr->yiaddr; - if ( ip.s_addr ) - DBGC ( dhcp, " for %s", inet_ntoa ( ip ) ); + if ( dhcppkt->dhcphdr->yiaddr.s_addr ) + DBGC ( dhcp, " for %s", inet_ntoa ( dhcppkt->dhcphdr->yiaddr )); + + /* Enqueue an offer to be filled in */ + for ( i = 0 ; i < DHCP_MAX_OFFERS ; i++ ) { + if ( dhcp->offers[i].server.s_addr == server_id.s_addr ) { + DBGC ( dhcp, " dup\n" ); + return; + } + + if ( ! dhcp->offers[i].valid ) + break; + } + if ( i == DHCP_MAX_OFFERS ) { + DBGC ( dhcp, " dropped\n" ); + return; + } + + offer = &dhcp->offers[i]; + offer->server = server_id; + offer->ip = dhcppkt->dhcphdr->yiaddr; /* Identify "PXEClient" vendor class */ vci_len = dhcppkt_fetch ( dhcppkt, DHCP_VENDOR_CLASS_ID, vci, sizeof ( vci ) ); has_pxeclient = ( ( vci_len >= ( int ) sizeof ( vci ) ) && ( strncmp ( "PXEClient", vci, sizeof (vci) ) == 0 )); + + /* Identify presence of PXE-specific options */ + pxeopts_len = dhcppkt_fetch ( dhcppkt, DHCP_PXE_BOOT_MENU, NULL, 0 ); + has_pxeopts = ( pxeopts_len >= 0 ); if ( has_pxeclient ) - DBGC ( dhcp, " pxe" ); + DBGC ( dhcp, "%s", ( has_pxeopts ? " pxe" : " proxy" ) ); + + if ( has_pxeclient && has_pxeopts ) { + /* Save reference to packet for future use */ + if ( offer->pxe ) + dhcppkt_put ( offer->pxe ); + offer->pxe = dhcppkt_get ( dhcppkt ); + } /* Identify priority */ - dhcppkt_fetch ( dhcppkt, DHCP_EB_PRIORITY, &priority, - sizeof ( priority ) ); - if ( priority ) - DBGC ( dhcp, " pri %d", priority ); + dhcppkt_fetch ( dhcppkt, DHCP_EB_PRIORITY, &offer->priority, + sizeof ( offer->priority ) ); + if ( offer->priority ) + DBGC ( dhcp, " pri %d", offer->priority ); /* Identify ignore-PXE flag */ - dhcppkt_fetch ( dhcppkt, DHCP_EB_NO_PXEDHCP, &no_pxedhcp, - sizeof ( no_pxedhcp ) ); - if ( no_pxedhcp ) + dhcppkt_fetch ( dhcppkt, DHCP_EB_NO_PXEDHCP, &offer->no_pxedhcp, + sizeof ( offer->no_pxedhcp ) ); + if ( offer->no_pxedhcp ) DBGC ( dhcp, " nopxe" ); DBGC ( dhcp, "\n" ); - /* Select as DHCP offer, if applicable */ - if ( ip.s_addr && ( peer->sin_port == htons ( BOOTPS_PORT ) ) && - ( ( msgtype == DHCPOFFER ) || ( ! msgtype /* BOOTP */ ) ) && - ( priority >= dhcp->priority ) ) { - dhcp->offer = ip; - dhcp->server = server_id; - dhcp->priority = priority; - dhcp->no_pxedhcp = no_pxedhcp; - } + /* Determine roles this offer can fill */ + if ( offer->ip.s_addr && + ( peer->sin_port == htons ( BOOTPS_PORT ) ) && + ( ( msgtype == DHCPOFFER ) || ( ! msgtype /* BOOTP */ ) ) ) + offer->valid |= DHCP_OFFER_IP; - /* Select as ProxyDHCP offer, if applicable */ - if ( has_pxeclient && ( msgtype == DHCPOFFER ) && - ( priority >= dhcp->proxy_priority ) ) { - dhcp->proxy_server = server_id; - dhcp->proxy_priority = priority; - } + if ( has_pxeclient && ( msgtype == DHCPOFFER ) ) + offer->valid |= DHCP_OFFER_PXE; +} + +/** + * Handle received packet during DHCP discovery + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer DHCP server address + * @v msgtype DHCP message type + * @v server_id DHCP server ID + */ +static void dhcp_discovery_rx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer, uint8_t msgtype, + struct in_addr server_id ) { + unsigned long elapsed; + struct dhcp_offer *ip_offer; + + dhcp_rx_offer ( dhcp, dhcppkt, peer, msgtype, server_id ); /* We can exit the discovery state when we have a valid * DHCPOFFER, and either: @@ -399,12 +502,14 @@ static void dhcp_discovery_rx ( struct dhcp_session *dhcp, */ /* If we don't yet have a DHCPOFFER, do nothing */ - if ( ! dhcp->offer.s_addr ) + ip_offer = dhcp_next_offer ( dhcp, DHCP_OFFER_IP ); + if ( ! ip_offer ) return; /* If we can't yet transition to DHCPREQUEST, do nothing */ elapsed = ( currticks() - dhcp->start ); - if ( ! ( dhcp->no_pxedhcp || dhcp->proxy_server.s_addr || + if ( ! ( ip_offer->no_pxedhcp || + dhcp_next_offer ( dhcp, DHCP_OFFER_PXE ) || ( elapsed > PROXYDHCP_MAX_TIMEOUT ) ) ) return; @@ -421,7 +526,8 @@ static void dhcp_discovery_expired ( struct dhcp_session *dhcp ) { unsigned long elapsed = ( currticks() - dhcp->start ); /* Give up waiting for ProxyDHCP before we reach the failure point */ - if ( dhcp->offer.s_addr && ( elapsed > PROXYDHCP_MAX_TIMEOUT ) ) { + if ( dhcp_next_offer ( dhcp, DHCP_OFFER_IP ) && + ( elapsed > PROXYDHCP_MAX_TIMEOUT ) ) { dhcp_set_state ( dhcp, &dhcp_state_request ); return; } @@ -451,21 +557,23 @@ static int dhcp_request_tx ( struct dhcp_session *dhcp, struct dhcp_packet *dhcppkt, struct sockaddr_in *peer ) { int rc; + struct dhcp_offer *offer; + + offer = dhcp->current_offer = dhcp_next_offer ( dhcp, DHCP_OFFER_IP ); DBGC ( dhcp, "DHCP %p DHCPREQUEST to %s:%d", - dhcp, inet_ntoa ( dhcp->server ), BOOTPS_PORT ); - DBGC ( dhcp, " for %s\n", inet_ntoa ( dhcp->offer ) ); + dhcp, inet_ntoa ( offer->server ), BOOTPS_PORT ); + DBGC ( dhcp, " for %s\n", inet_ntoa ( offer->ip ) ); /* Set server ID */ if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_SERVER_IDENTIFIER, - &dhcp->server, - sizeof ( dhcp->server ) ) ) != 0 ) + &offer->server, + sizeof ( offer->server ) ) ) != 0 ) return rc; /* Set requested IP address */ if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_REQUESTED_ADDRESS, - &dhcp->offer, - sizeof ( dhcp->offer ) ) ) != 0 ) + &offer->ip, sizeof ( offer->ip ) ) ) != 0 ) return rc; /* Set server address */ @@ -491,6 +599,18 @@ static void dhcp_request_rx ( struct dhcp_session *dhcp, struct in_addr ip; struct settings *parent; int rc; + struct dhcp_offer *pxe_offer; + + if ( msgtype == DHCPOFFER ) { + dhcp_rx_offer ( dhcp, dhcppkt, peer, msgtype, server_id ); + if ( dhcp_next_offer ( dhcp, DHCP_OFFER_IP ) != + dhcp->current_offer ) { + /* Restart due to higher-priority offer received */ + DBGC ( dhcp, "DHCP %p re-requesting\n", dhcp ); + dhcp_set_state ( dhcp, &dhcp_state_request ); + } + return; + } DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp, dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ), @@ -509,7 +629,7 @@ static void dhcp_request_rx ( struct dhcp_session *dhcp, return; if ( msgtype /* BOOTP */ && ( msgtype != DHCPACK ) ) return; - if ( server_id.s_addr != dhcp->server.s_addr ) + if ( server_id.s_addr != dhcp->current_offer->server.s_addr ) return; /* Record assigned address */ @@ -524,14 +644,31 @@ static void dhcp_request_rx ( struct dhcp_session *dhcp, return; } - /* Start ProxyDHCPREQUEST if applicable */ - if ( dhcp->proxy_server.s_addr && ( ! dhcp->no_pxedhcp ) ) { + /* Locate best source of PXE settings */ + pxe_offer = dhcp_next_offer ( dhcp, DHCP_OFFER_PXE ); + + if ( ( ! pxe_offer ) || /* No PXE available */ + /* IP offer instructs us to ignore PXE */ + dhcp->current_offer->no_pxedhcp || + /* PXE settings already registered with IP offer */ + ( ( dhcp->current_offer == pxe_offer ) && ( pxe_offer->pxe ) ) ) { + + /* Terminate DHCP */ + dhcp_finished ( dhcp, 0 ); + + } else if ( pxe_offer->pxe ) { + /* Register PXE settings and terminate DHCP */ + pxe_offer->pxe->settings.name = PROXYDHCP_SETTINGS_NAME; + if ( ( rc = register_settings ( &pxe_offer->pxe->settings, + NULL ) ) != 0 ) { + DBGC ( dhcp, "DHCP %p could not register settings: " + "%s\n", dhcp, strerror ( rc ) ); + } + dhcp_finished ( dhcp, rc ); + } else { + /* Start ProxyDHCP */ dhcp_set_state ( dhcp, &dhcp_state_proxy ); - return; } - - /* Terminate DHCP */ - dhcp_finished ( dhcp, 0 ); } /** @@ -566,18 +703,21 @@ static int dhcp_proxy_tx ( struct dhcp_session *dhcp, struct dhcp_packet *dhcppkt, struct sockaddr_in *peer ) { int rc; + struct dhcp_offer *offer; + + offer = dhcp->current_offer = dhcp_next_offer ( dhcp, DHCP_OFFER_PXE ); - DBGC ( dhcp, "DHCP %p ProxyDHCP REQUEST to %s:%d\n", - dhcp, inet_ntoa ( dhcp->proxy_server ), PXE_PORT ); + DBGC ( dhcp, "DHCP %p ProxyDHCP REQUEST to %s:%d\n", dhcp, + inet_ntoa ( offer->server ), PXE_PORT ); /* Set server ID */ if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_SERVER_IDENTIFIER, - &dhcp->proxy_server, - sizeof ( dhcp->proxy_server ) ) ) != 0 ) + &offer->server, + sizeof ( offer->server ) ) ) != 0 ) return rc; /* Set server address */ - peer->sin_addr = dhcp->proxy_server; + peer->sin_addr = offer->server; peer->sin_port = htons ( PXE_PORT ); return 0; @@ -598,6 +738,13 @@ static void dhcp_proxy_rx ( struct dhcp_session *dhcp, struct in_addr server_id ) { int rc; + /* Enqueue last-minute DHCPOFFERs for use in case of failure */ + if ( peer->sin_port == htons ( BOOTPS_PORT ) && + msgtype == DHCPOFFER ) { + dhcp_rx_offer ( dhcp, dhcppkt, peer, msgtype, server_id ); + return; + } + DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp, dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ), ntohs ( peer->sin_port ) ); @@ -608,10 +755,10 @@ static void dhcp_proxy_rx ( struct dhcp_session *dhcp, /* Filter out unacceptable responses */ if ( peer->sin_port != htons ( PXE_PORT ) ) return; - if ( msgtype != DHCPACK ) + if ( msgtype != DHCPACK && msgtype != DHCPOFFER ) return; if ( server_id.s_addr /* Linux PXE server omits server ID */ && - ( server_id.s_addr != dhcp->proxy_server.s_addr ) ) + ( server_id.s_addr != dhcp->current_offer->server.s_addr ) ) return; /* Register settings */ @@ -637,6 +784,28 @@ static void dhcp_proxy_expired ( struct dhcp_session *dhcp ) { /* Give up waiting for ProxyDHCP before we reach the failure point */ if ( elapsed > PROXYDHCP_MAX_TIMEOUT ) { + + /* Mark failed offer as unsuitable for ProxyDHCP */ + dhcp->current_offer->valid &= ~DHCP_OFFER_PXE; + + /* Prefer not to use only half of a PXE+IP offer if we + * have other offers available + */ + dhcp->current_offer->priority = -1; + + /* If we have any other PXE offers we can try, go back + * to DHCPREQUEST (since they might not be proxied + * offers, or might be coupled to a new IP address). + * We should probably DHCPRELEASE our old IP, but the + * standard does not require it. + */ + if ( dhcp_next_offer ( dhcp, DHCP_OFFER_PXE ) ) { + dhcp->local.sin_addr.s_addr = 0; + dhcp_set_state ( dhcp, &dhcp_state_request ); + return; + } + + /* No possibilities left; finish without PXE options */ dhcp_finished ( dhcp, 0 ); return; } @@ -668,9 +837,14 @@ static int dhcp_pxebs_tx ( struct dhcp_session *dhcp, struct dhcp_pxe_boot_menu_item menu_item = { 0, 0 }; int rc; + /* Set server address */ + peer->sin_addr = *(dhcp->pxe_attempt); + peer->sin_port = ( ( peer->sin_addr.s_addr == INADDR_BROADCAST ) ? + htons ( BOOTPS_PORT ) : htons ( PXE_PORT ) ); + DBGC ( dhcp, "DHCP %p PXEBS REQUEST to %s:%d for type %d\n", - dhcp, inet_ntoa ( *(dhcp->pxe_attempt) ), PXE_PORT, - ntohs ( dhcp->pxe_type ) ); + dhcp, inet_ntoa ( peer->sin_addr ), ntohs ( peer->sin_port ), + le16_to_cpu ( dhcp->pxe_type ) ); /* Set boot menu item */ menu_item.type = dhcp->pxe_type; @@ -678,10 +852,6 @@ static int dhcp_pxebs_tx ( struct dhcp_session *dhcp, &menu_item, sizeof ( menu_item ) ) ) != 0 ) return rc; - /* Set server address */ - peer->sin_addr = *(dhcp->pxe_attempt); - peer->sin_port = htons ( PXE_PORT ); - return 0; } @@ -741,7 +911,8 @@ static void dhcp_pxebs_rx ( struct dhcp_session *dhcp, DBGC ( dhcp, "\n" ); /* Filter out unacceptable responses */ - if ( peer->sin_port != htons ( PXE_PORT ) ) + if ( ( peer->sin_port != htons ( BOOTPS_PORT ) ) && + ( peer->sin_port != htons ( PXE_PORT ) ) ) return; if ( msgtype != DHCPACK ) return; @@ -807,6 +978,46 @@ static struct dhcp_session_state dhcp_state_pxebs = { */ /** + * Construct DHCP client hardware address field and broadcast flag + * + * @v netdev Network device + * @v hlen DHCP hardware address length to fill in + * @v flags DHCP flags to fill in + * @ret chaddr DHCP client hardware address + */ +void * dhcp_chaddr ( struct net_device *netdev, uint8_t *hlen, + uint16_t *flags ) { + struct ll_protocol *ll_protocol = netdev->ll_protocol; + typeof ( ( ( struct dhcphdr * ) NULL )->chaddr ) chaddr; + + /* If the link-layer address cannot fit into the chaddr field + * (as is the case for IPoIB) then try using the hardware + * address instead. If we do this, set the broadcast flag, + * since chaddr then does not represent a valid link-layer + * address for the return path. + * + * If even the hardware address is too large, use an empty + * chaddr field and set the broadcast flag. + * + * This goes against RFC4390, but RFC4390 mandates that we use + * a DHCP client identifier that conforms with RFC4361, which + * we cannot do without either persistent (NIC-independent) + * storage, or by eliminating the hardware address completely + * from the DHCP packet, which seems unfriendly to users. + */ + if ( ( *hlen = ll_protocol->ll_addr_len ) <= sizeof ( chaddr ) ) { + return netdev->ll_addr; + } + *flags = htons ( BOOTP_FL_BROADCAST ); + if ( ( *hlen = ll_protocol->hw_addr_len ) <= sizeof ( chaddr ) ) { + return netdev->hw_addr; + } else { + *hlen = 0; + return NULL; + } +} + +/** * Create a DHCP packet * * @v dhcppkt DHCP packet structure to fill in @@ -826,7 +1037,7 @@ int dhcp_create_packet ( struct dhcp_packet *dhcppkt, const void *options, size_t options_len, void *data, size_t max_len ) { struct dhcphdr *dhcphdr = data; - unsigned int hlen; + void *chaddr; int rc; /* Sanity check */ @@ -839,16 +1050,8 @@ int dhcp_create_packet ( struct dhcp_packet *dhcppkt, dhcphdr->magic = htonl ( DHCP_MAGIC_COOKIE ); dhcphdr->htype = ntohs ( netdev->ll_protocol->ll_proto ); dhcphdr->op = dhcp_op[msgtype]; - /* If hardware length exceeds the chaddr field length, don't - * use the chaddr field. This is as per RFC4390. - */ - hlen = netdev->ll_protocol->ll_addr_len; - if ( hlen > sizeof ( dhcphdr->chaddr ) ) { - hlen = 0; - dhcphdr->flags = htons ( BOOTP_FL_BROADCAST ); - } - dhcphdr->hlen = hlen; - memcpy ( dhcphdr->chaddr, netdev->ll_addr, hlen ); + chaddr = dhcp_chaddr ( netdev, &dhcphdr->hlen, &dhcphdr->flags ); + memcpy ( dhcphdr->chaddr, chaddr, dhcphdr->hlen ); memcpy ( dhcphdr->options, options, options_len ); /* Initialise DHCP packet structure */ @@ -880,10 +1083,10 @@ int dhcp_create_packet ( struct dhcp_packet *dhcppkt, int dhcp_create_request ( struct dhcp_packet *dhcppkt, struct net_device *netdev, unsigned int msgtype, struct in_addr ciaddr, void *data, size_t max_len ) { - struct device_description *desc = &netdev->dev->desc; struct dhcp_netdev_desc dhcp_desc; struct dhcp_client_id client_id; struct dhcp_client_uuid client_uuid; + uint8_t *dhcp_features; size_t dhcp_features_len; size_t ll_addr_len; ssize_t len; @@ -903,7 +1106,8 @@ int dhcp_create_request ( struct dhcp_packet *dhcppkt, dhcppkt->dhcphdr->ciaddr = ciaddr; /* Add options to identify the feature list */ - dhcp_features_len = ( dhcp_features_end - dhcp_features ); + dhcp_features = table_start ( DHCP_FEATURES ); + dhcp_features_len = table_num_entries ( DHCP_FEATURES ); if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_EB_ENCAP, dhcp_features, dhcp_features_len ) ) != 0 ) { DBG ( "DHCP could not set features list option: %s\n", @@ -912,9 +1116,8 @@ int dhcp_create_request ( struct dhcp_packet *dhcppkt, } /* Add options to identify the network device */ - dhcp_desc.type = desc->bus_type; - dhcp_desc.vendor = htons ( desc->vendor ); - dhcp_desc.device = htons ( desc->device ); + fetch_setting ( &netdev->settings.settings, &busid_setting, &dhcp_desc, + sizeof ( dhcp_desc ) ); if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_EB_BUS_ID, &dhcp_desc, sizeof ( dhcp_desc ) ) ) != 0 ) { DBG ( "DHCP could not set bus ID option: %s\n", @@ -1109,7 +1312,7 @@ static int dhcp_deliver_iob ( struct xfer_interface *xfer, /** DHCP data transfer interface operations */ static struct xfer_interface_operations dhcp_xfer_operations = { .close = ignore_xfer_close, - .vredirect = xfer_vopen, + .vredirect = xfer_vreopen, .window = unlimited_xfer_window, .alloc_iob = default_xfer_alloc_iob, .deliver_iob = dhcp_deliver_iob, @@ -1184,16 +1387,28 @@ static struct sockaddr dhcp_peer = { * * @v job Job control interface * @v netdev Network device - * @ret rc Return status code + * @ret rc Return status code, or positive if cached * * Starts DHCP on the specified network device. If successful, the * DHCPACK (and ProxyDHCPACK, if applicable) will be registered as * option sources. + * + * On a return of 0, a background job has been started to perform the + * DHCP request. Any nonzero return means the job has not been + * started; a positive return value indicates the success condition of + * having fetched the appropriate data from cached information. */ int start_dhcp ( struct job_interface *job, struct net_device *netdev ) { struct dhcp_session *dhcp; int rc; + /* Check for cached DHCP information */ + get_cached_dhcpack(); + if ( fetch_uintz_setting ( NULL, &use_cached_setting ) ) { + DBG ( "DHCP using cached network settings\n" ); + return 1; + } + /* Allocate and initialise structure */ dhcp = zalloc ( sizeof ( *dhcp ) ); if ( ! dhcp ) @@ -1309,7 +1524,7 @@ int start_pxebs ( struct job_interface *job, struct net_device *netdev, fetch_ipv4_setting ( netdev_settings ( netdev ), &ip_setting, &dhcp->local.sin_addr ); dhcp->local.sin_port = htons ( BOOTPC_PORT ); - dhcp->pxe_type = htons ( pxe_type ); + dhcp->pxe_type = cpu_to_le16 ( pxe_type ); dhcp->timer.expired = dhcp_timer_expired; /* Construct PXE boot server IP address lists */ diff --git a/gpxe/src/net/udp/dns.c b/gpxe/src/net/udp/dns.c index a498aefc..f94094aa 100644 --- a/gpxe/src/net/udp/dns.c +++ b/gpxe/src/net/udp/dns.c @@ -19,6 +19,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stdint.h> #include <stdlib.h> #include <string.h> @@ -459,7 +461,7 @@ static void dns_xfer_close ( struct xfer_interface *socket, int rc ) { /** DNS socket operations */ static struct xfer_interface_operations dns_socket_operations = { .close = dns_xfer_close, - .vredirect = xfer_vopen, + .vredirect = xfer_vreopen, .window = unlimited_xfer_window, .alloc_iob = default_xfer_alloc_iob, .deliver_iob = xfer_deliver_as_raw, diff --git a/gpxe/src/net/udp/slam.c b/gpxe/src/net/udp/slam.c index 6add99bc..396f69b0 100644 --- a/gpxe/src/net/udp/slam.c +++ b/gpxe/src/net/udp/slam.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stdint.h> #include <stdlib.h> #include <string.h> @@ -614,7 +616,7 @@ static void slam_socket_close ( struct xfer_interface *socket, int rc ) { /** SLAM unicast socket data transfer operations */ static struct xfer_interface_operations slam_socket_operations = { .close = slam_socket_close, - .vredirect = xfer_vopen, + .vredirect = xfer_vreopen, .window = unlimited_xfer_window, .alloc_iob = default_xfer_alloc_iob, .deliver_iob = slam_socket_deliver, @@ -640,7 +642,7 @@ static void slam_mc_socket_close ( struct xfer_interface *mc_socket, int rc ){ /** SLAM multicast socket data transfer operations */ static struct xfer_interface_operations slam_mc_socket_operations = { .close = slam_mc_socket_close, - .vredirect = xfer_vopen, + .vredirect = xfer_vreopen, .window = unlimited_xfer_window, .alloc_iob = default_xfer_alloc_iob, .deliver_iob = slam_mc_socket_deliver, diff --git a/gpxe/src/net/udp/tftp.c b/gpxe/src/net/udp/tftp.c index 19525f79..3de2fb9b 100644 --- a/gpxe/src/net/udp/tftp.c +++ b/gpxe/src/net/udp/tftp.c @@ -16,6 +16,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +FILE_LICENCE ( GPL2_OR_LATER ); + #include <stdint.h> #include <stdlib.h> #include <stdio.h> @@ -131,6 +133,8 @@ enum { TFTP_FL_RRQ_MULTICAST = 0x0004, /** Perform MTFTP recovery on timeout */ TFTP_FL_MTFTP_RECOVERY = 0x0008, + /** Only get filesize and then abort the transfer */ + TFTP_FL_SIZEONLY = 0x0010, }; /** Maximum number of MTFTP open requests before falling back to TFTP */ @@ -409,6 +413,42 @@ static int tftp_send_ack ( struct tftp_request *tftp ) { } /** + * Transmit ERROR (Abort) + * + * @v tftp TFTP connection + * @v errcode TFTP error code + * @v errmsg Error message string + * @ret rc Return status code + */ +static int tftp_send_error ( struct tftp_request *tftp, int errcode, + const char *errmsg ) { + struct tftp_error *err; + struct io_buffer *iobuf; + struct xfer_metadata meta = { + .dest = ( struct sockaddr * ) &tftp->peer, + }; + size_t msglen; + + DBGC2 ( tftp, "TFTP %p sending ERROR %d: %s\n", tftp, errcode, + errmsg ); + + /* Allocate buffer */ + msglen = sizeof ( *err ) + strlen ( errmsg ) + 1 /* NUL */; + iobuf = xfer_alloc_iob ( &tftp->socket, msglen ); + if ( ! iobuf ) + return -ENOMEM; + + /* Build ERROR */ + err = iob_put ( iobuf, msglen ); + err->opcode = htons ( TFTP_ERROR ); + err->errcode = htons ( errcode ); + strcpy ( err->errmsg, errmsg ); + + /* ERR always goes to the peer recorded from the RRQ response */ + return xfer_deliver_iob_meta ( &tftp->socket, iobuf, &meta ); +} + +/** * Transmit next relevant packet * * @v tftp TFTP connection @@ -416,9 +456,16 @@ static int tftp_send_ack ( struct tftp_request *tftp ) { */ static int tftp_send_packet ( struct tftp_request *tftp ) { - /* Update retransmission timer */ + /* Update retransmission timer. While name resolution takes place the + * window is zero. Avoid unnecessary delay after name resolution + * completes by retrying immediately. + */ stop_timer ( &tftp->timer ); - start_timer ( &tftp->timer ); + if ( xfer_window ( &tftp->socket ) ) { + start_timer ( &tftp->timer ); + } else { + start_timer_nodelay ( &tftp->timer ); + } /* Send RRQ or ACK as appropriate */ if ( ! tftp->peer.st_family ) { @@ -670,6 +717,7 @@ static int tftp_rx_oack ( struct tftp_request *tftp, void *buf, size_t len ) { char *end = buf + len; char *name; char *value; + char *next; int rc = 0; /* Sanity check */ @@ -679,26 +727,41 @@ static int tftp_rx_oack ( struct tftp_request *tftp, void *buf, size_t len ) { rc = -EINVAL; goto done; } - if ( end[-1] != '\0' ) { - DBGC ( tftp, "TFTP %p received OACK missing final NUL\n", - tftp ); - rc = -EINVAL; - goto done; - } /* Process each option in turn */ - name = oack->data; - while ( name < end ) { - value = ( name + strlen ( name ) + 1 ); + for ( name = oack->data ; name < end ; name = next ) { + + /* Parse option name and value + * + * We treat parsing errors as non-fatal, because there + * exists at least one TFTP server (IBM Tivoli PXE + * Server 5.1.0.3) that has been observed to send + * malformed OACKs containing trailing garbage bytes. + */ + value = ( name + strnlen ( name, ( end - name ) ) + 1 ); + if ( value > end ) { + DBGC ( tftp, "TFTP %p received OACK with malformed " + "option name:\n", tftp ); + DBGC_HD ( tftp, oack, len ); + break; + } if ( value == end ) { DBGC ( tftp, "TFTP %p received OACK missing value " "for option \"%s\"\n", tftp, name ); - rc = -EINVAL; - goto done; + DBGC_HD ( tftp, oack, len ); + break; } + next = ( value + strnlen ( value, ( end - value ) ) + 1 ); + if ( next > end ) { + DBGC ( tftp, "TFTP %p received OACK with malformed " + "value for option \"%s\":\n", tftp, name ); + DBGC_HD ( tftp, oack, len ); + break; + } + + /* Process option */ if ( ( rc = tftp_process_option ( tftp, name, value ) ) != 0 ) goto done; - name = ( value + strlen ( value ) + 1 ); } /* Process tsize information, if available */ @@ -707,6 +770,14 @@ static int tftp_rx_oack ( struct tftp_request *tftp, void *buf, size_t len ) { goto done; } + /* Abort request if only trying to determine file size */ + if ( tftp->flags & TFTP_FL_SIZEONLY ) { + rc = 0; + tftp_send_error ( tftp, 0, "TFTP Aborted" ); + tftp_done ( tftp, rc ); + return rc; + } + /* Request next data block */ tftp_send_packet ( tftp ); @@ -729,11 +800,18 @@ static int tftp_rx_data ( struct tftp_request *tftp, struct io_buffer *iobuf ) { struct tftp_data *data = iobuf->data; struct xfer_metadata meta; - int block; + unsigned int block; off_t offset; size_t data_len; int rc; + if ( tftp->flags & TFTP_FL_SIZEONLY ) { + /* If we get here then server doesn't support SIZE option */ + rc = -ENOTSUP; + tftp_send_error ( tftp, 0, "TFTP Aborted" ); + goto done; + } + /* Sanity check */ if ( iob_len ( iobuf ) < sizeof ( *data ) ) { DBGC ( tftp, "TFTP %p received underlength DATA packet " @@ -741,14 +819,17 @@ static int tftp_rx_data ( struct tftp_request *tftp, rc = -EINVAL; goto done; } - if ( data->block == 0 ) { + + /* Calculate block number */ + block = ( ( bitmap_first_gap ( &tftp->bitmap ) + 1 ) & ~0xffff ); + if ( data->block == 0 && block == 0 ) { DBGC ( tftp, "TFTP %p received data block 0\n", tftp ); rc = -EINVAL; goto done; } + block += ( ntohs ( data->block ) - 1 ); /* Extract data */ - block = ( ntohs ( data->block ) - 1 ); offset = ( block * tftp->blksize ); iob_pull ( iobuf, sizeof ( *data ) ); data_len = iob_len ( iobuf ); @@ -934,7 +1015,7 @@ static int tftp_socket_deliver_iob ( struct xfer_interface *socket, /** TFTP socket operations */ static struct xfer_interface_operations tftp_socket_operations = { .close = ignore_xfer_close, - .vredirect = xfer_vopen, + .vredirect = xfer_vreopen, .window = unlimited_xfer_window, .alloc_iob = default_xfer_alloc_iob, .deliver_iob = tftp_socket_deliver_iob, @@ -961,7 +1042,7 @@ static int tftp_mc_socket_deliver_iob ( struct xfer_interface *mc_socket, /** TFTP multicast socket operations */ static struct xfer_interface_operations tftp_mc_socket_operations = { .close = ignore_xfer_close, - .vredirect = xfer_vopen, + .vredirect = xfer_vreopen, .window = unlimited_xfer_window, .alloc_iob = default_xfer_alloc_iob, .deliver_iob = tftp_mc_socket_deliver_iob, @@ -1093,6 +1174,26 @@ struct uri_opener tftp_uri_opener __uri_opener = { }; /** + * Initiate TFTP-size request + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int tftpsize_open ( struct xfer_interface *xfer, struct uri *uri ) { + return tftp_core_open ( xfer, uri, TFTP_PORT, NULL, + ( TFTP_FL_RRQ_SIZES | + TFTP_FL_SIZEONLY ) ); + +} + +/** TFTP URI opener */ +struct uri_opener tftpsize_uri_opener __uri_opener = { + .scheme = "tftpsize", + .open = tftpsize_open, +}; + +/** * Initiate TFTM download * * @v xfer Data transfer interface |