summaryrefslogtreecommitdiff
path: root/gpxe/src/drivers/net/ipoib.c
diff options
context:
space:
mode:
Diffstat (limited to 'gpxe/src/drivers/net/ipoib.c')
-rw-r--r--gpxe/src/drivers/net/ipoib.c565
1 files changed, 331 insertions, 234 deletions
diff --git a/gpxe/src/drivers/net/ipoib.c b/gpxe/src/drivers/net/ipoib.c
index 16b2a0c8..f0d52044 100644
--- a/gpxe/src/drivers/net/ipoib.c
+++ b/gpxe/src/drivers/net/ipoib.c
@@ -33,9 +33,6 @@
* IP over Infiniband
*/
-/** IPoIB MTU */
-#define IPOIB_MTU 2048
-
/** Number of IPoIB data send work queue entries */
#define IPOIB_DATA_NUM_SEND_WQES 2
@@ -60,8 +57,6 @@ struct ipoib_queue_set {
struct ib_completion_queue *cq;
/** Queue pair */
struct ib_queue_pair *qp;
- /** Receive work queue fill level */
- unsigned int recv_fill;
/** Receive work queue maximum fill level */
unsigned int recv_max_fill;
};
@@ -90,49 +85,146 @@ struct ipoib_device {
int broadcast_attached;
};
+/** TID half used to identify get path record replies */
+#define IPOIB_TID_GET_PATH_REC 0x11111111UL
+
+/** TID half used to identify multicast member record replies */
+#define IPOIB_TID_MC_MEMBER_REC 0x22222222UL
+
+/** IPoIB metadata TID */
+static uint32_t ipoib_meta_tid = 0;
+
+/** Broadcast QPN used in IPoIB MAC addresses
+ *
+ * This is a guaranteed invalid real QPN
+ */
+#define IPOIB_BROADCAST_QPN 0xffffffffUL
+
+/** Broadcast IPoIB address */
+static struct ipoib_mac ipoib_broadcast = {
+ .qpn = ntohl ( IPOIB_BROADCAST_QPN ),
+ .gid.u.bytes = { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff },
+};
+
+/****************************************************************************
+ *
+ * IPoIB peer cache
+ *
+ ****************************************************************************
+ */
+
/**
- * IPoIB path cache entry
+ * IPoIB peer address
*
* This serves a similar role to the ARP cache for Ethernet. (ARP
* *is* used on IPoIB; we have two caches to maintain.)
*/
-struct ipoib_cached_path {
- /** Destination GID */
- struct ib_gid gid;
- /** Destination LID */
- unsigned int dlid;
+struct ipoib_peer {
+ /** Key */
+ uint8_t key;
+ /** MAC address */
+ struct ipoib_mac mac;
+ /** LID */
+ unsigned int lid;
/** Service level */
unsigned int sl;
/** Rate */
unsigned int rate;
};
-/** Number of IPoIB path cache entries */
-#define IPOIB_NUM_CACHED_PATHS 2
+/** Number of IPoIB peer cache entries
+ *
+ * Must be a power of two.
+ */
+#define IPOIB_NUM_CACHED_PEERS 4
-/** IPoIB path cache */
-static struct ipoib_cached_path ipoib_path_cache[IPOIB_NUM_CACHED_PATHS];
+/** IPoIB peer address cache */
+static struct ipoib_peer ipoib_peer_cache[IPOIB_NUM_CACHED_PEERS];
-/** Oldest IPoIB path cache entry index */
-static unsigned int ipoib_path_cache_idx = 0;
+/** Oldest IPoIB peer cache entry index */
+static unsigned int ipoib_peer_cache_idx = 1;
-/** TID half used to identify get path record replies */
-#define IPOIB_TID_GET_PATH_REC 0x11111111UL
+/**
+ * Look up cached peer by key
+ *
+ * @v key Peer cache key
+ * @ret peer Peer cache entry, or NULL
+ */
+static struct ipoib_peer * ipoib_lookup_peer_by_key ( unsigned int key ) {
+ struct ipoib_peer *peer;
+ unsigned int i;
-/** TID half used to identify multicast member record replies */
-#define IPOIB_TID_MC_MEMBER_REC 0x22222222UL
+ for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
+ peer = &ipoib_peer_cache[i];
+ if ( peer->key == key )
+ return peer;
+ }
-/** IPoIB metadata TID */
-static uint32_t ipoib_meta_tid = 0;
+ if ( key != 0 ) {
+ DBG ( "IPoIB warning: peer cache lost track of key %x while "
+ "still in use\n", key );
+ }
+ return NULL;
+}
-/** IPv4 broadcast GID */
-static const struct ib_gid ipv4_broadcast_gid = {
- { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } }
-};
+/**
+ * Look up cached peer by GID
+ *
+ * @v gid Peer GID
+ * @ret peer Peer cache entry, or NULL
+ */
+static struct ipoib_peer *
+ipoib_lookup_peer_by_gid ( const struct ib_gid *gid ) {
+ struct ipoib_peer *peer;
+ unsigned int i;
-/** Maximum time we will wait for the broadcast join to succeed */
-#define IPOIB_JOIN_MAX_DELAY_MS 1000
+ for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
+ peer = &ipoib_peer_cache[i];
+ if ( memcmp ( &peer->mac.gid, gid,
+ sizeof ( peer->mac.gid) ) == 0 ) {
+ return peer;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * Store GID and QPN in peer cache
+ *
+ * @v gid Peer GID
+ * @v qpn Peer QPN
+ * @ret peer Peer cache entry
+ */
+static struct ipoib_peer *
+ipoib_cache_peer ( const struct ib_gid *gid, unsigned long qpn ) {
+ struct ipoib_peer *peer;
+ unsigned int key;
+
+ /* Look for existing cache entry */
+ peer = ipoib_lookup_peer_by_gid ( gid );
+ if ( peer ) {
+ assert ( peer->mac.qpn = ntohl ( qpn ) );
+ return peer;
+ }
+
+ /* No entry found: create a new one */
+ key = ipoib_peer_cache_idx++;
+ peer = &ipoib_peer_cache[ key % IPOIB_NUM_CACHED_PEERS ];
+ if ( peer->key )
+ DBG ( "IPoIB peer %x evicted from cache\n", peer->key );
+
+ memset ( peer, 0, sizeof ( *peer ) );
+ peer->key = key;
+ peer->mac.qpn = htonl ( qpn );
+ memcpy ( &peer->mac.gid, gid, sizeof ( peer->mac.gid ) );
+ DBG ( "IPoIB peer %x has GID %08lx:%08lx:%08lx:%08lx and QPN %lx\n",
+ peer->key, htonl ( gid->u.dwords[0] ),
+ htonl ( gid->u.dwords[1] ), htonl ( gid->u.dwords[2] ),
+ htonl ( gid->u.dwords[3] ), qpn );
+ return peer;
+}
/****************************************************************************
*
@@ -141,37 +233,32 @@ static const struct ib_gid ipv4_broadcast_gid = {
****************************************************************************
*/
-/** Broadcast QPN used in IPoIB MAC addresses
- *
- * This is a guaranteed invalid real QPN
- */
-#define IPOIB_BROADCAST_QPN 0xffffffffUL
-
-/** Broadcast IPoIB address */
-static struct ipoib_mac ipoib_broadcast = {
- .qpn = ntohl ( IPOIB_BROADCAST_QPN ),
-};
-
/**
* Add IPoIB link-layer header
*
* @v iobuf I/O buffer
- * @v netdev Network device
- * @v net_protocol Network-layer protocol
* @v ll_dest Link-layer destination address
+ * @v ll_source Source link-layer address
+ * @v net_proto Network-layer protocol, in network-byte order
+ * @ret rc Return status code
*/
-static int ipoib_push ( struct io_buffer *iobuf,
- struct net_device *netdev __unused,
- struct net_protocol *net_protocol,
- const void *ll_dest ) {
+static int ipoib_push ( struct io_buffer *iobuf, const void *ll_dest,
+ const void *ll_source __unused, uint16_t net_proto ) {
struct ipoib_hdr *ipoib_hdr =
iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
+ const struct ipoib_mac *dest_mac = ll_dest;
+ const struct ipoib_mac *src_mac = ll_source;
+ struct ipoib_peer *dest;
+ struct ipoib_peer *src;
+
+ /* Add link-layer addresses to cache */
+ dest = ipoib_cache_peer ( &dest_mac->gid, ntohl ( dest_mac->qpn ) );
+ src = ipoib_cache_peer ( &src_mac->gid, ntohl ( src_mac->qpn ) );
/* Build IPoIB header */
- memcpy ( &ipoib_hdr->pseudo.peer, ll_dest,
- sizeof ( ipoib_hdr->pseudo.peer ) );
- ipoib_hdr->real.proto = net_protocol->net_proto;
- ipoib_hdr->real.reserved = 0;
+ ipoib_hdr->proto = net_proto;
+ ipoib_hdr->u.peer.dest = dest->key;
+ ipoib_hdr->u.peer.src = src->key;
return 0;
}
@@ -180,15 +267,16 @@ static int ipoib_push ( struct io_buffer *iobuf,
* Remove IPoIB link-layer header
*
* @v iobuf I/O buffer
- * @v netdev Network device
- * @v net_proto Network-layer protocol, in network-byte order
- * @v ll_source Source link-layer address
+ * @ret ll_dest Link-layer destination address
+ * @ret ll_source Source link-layer address
+ * @ret net_proto Network-layer protocol, in network-byte order
* @ret rc Return status code
*/
-static int ipoib_pull ( struct io_buffer *iobuf,
- struct net_device *netdev __unused,
- uint16_t *net_proto, const void **ll_source ) {
+static int ipoib_pull ( struct io_buffer *iobuf, const void **ll_dest,
+ const void **ll_source, uint16_t *net_proto ) {
struct ipoib_hdr *ipoib_hdr = iobuf->data;
+ struct ipoib_peer *dest;
+ struct ipoib_peer *source;
/* Sanity check */
if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
@@ -200,9 +288,17 @@ static int ipoib_pull ( struct io_buffer *iobuf,
/* Strip off IPoIB header */
iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
+ /* Identify source and destination addresses, and clear
+ * reserved word in IPoIB header
+ */
+ dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
+ source = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.src );
+ ipoib_hdr->u.reserved = 0;
+
/* Fill in required fields */
- *net_proto = ipoib_hdr->real.proto;
- *ll_source = &ipoib_hdr->pseudo.peer;
+ *ll_dest = ( dest ? &dest->mac : &ipoib_broadcast );
+ *ll_source = ( source ? &source->mac : &ipoib_broadcast );
+ *net_proto = ipoib_hdr->proto;
return 0;
}
@@ -225,6 +321,21 @@ const char * ipoib_ntoa ( const void *ll_addr ) {
return buf;
}
+/**
+ * Hash multicast address
+ *
+ * @v af Address family
+ * @v net_addr Network-layer address
+ * @v ll_addr Link-layer address to fill in
+ * @ret rc Return status code
+ */
+static int ipoib_mc_hash ( unsigned int af __unused,
+ const void *net_addr __unused,
+ void *ll_addr __unused ) {
+
+ return -ENOTSUP;
+}
+
/** IPoIB protocol */
struct ll_protocol ipoib_protocol __ll_protocol = {
.name = "IPoIB",
@@ -235,6 +346,7 @@ struct ll_protocol ipoib_protocol __ll_protocol = {
.push = ipoib_push,
.pull = ipoib_pull,
.ntoa = ipoib_ntoa,
+ .mc_hash = ipoib_mc_hash,
};
/****************************************************************************
@@ -266,11 +378,17 @@ static void ipoib_destroy_qset ( struct ipoib_device *ipoib,
*
* @v ipoib IPoIB device
* @v qset Queue set
+ * @v num_cqes Number of completion queue entries
+ * @v cq_op Completion queue operations
+ * @v num_send_wqes Number of send work queue entries
+ * @v num_recv_wqes Number of receive work queue entries
+ * @v qkey Queue key
* @ret rc Return status code
*/
static int ipoib_create_qset ( struct ipoib_device *ipoib,
struct ipoib_queue_set *qset,
unsigned int num_cqes,
+ struct ib_completion_queue_operations *cq_op,
unsigned int num_send_wqes,
unsigned int num_recv_wqes,
unsigned long qkey ) {
@@ -285,7 +403,7 @@ static int ipoib_create_qset ( struct ipoib_device *ipoib,
qset->recv_max_fill = num_recv_wqes;
/* Allocate completion queue */
- qset->cq = ib_create_cq ( ibdev, num_cqes );
+ qset->cq = ib_create_cq ( ibdev, num_cqes, cq_op );
if ( ! qset->cq ) {
DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n",
ipoib );
@@ -312,28 +430,6 @@ static int ipoib_create_qset ( struct ipoib_device *ipoib,
}
/**
- * Find path cache entry by GID
- *
- * @v gid GID
- * @ret entry Path cache entry, or NULL
- */
-static struct ipoib_cached_path *
-ipoib_find_cached_path ( struct ib_gid *gid ) {
- struct ipoib_cached_path *path;
- unsigned int i;
-
- for ( i = 0 ; i < IPOIB_NUM_CACHED_PATHS ; i++ ) {
- path = &ipoib_path_cache[i];
- if ( memcmp ( &path->gid, gid, sizeof ( *gid ) ) == 0 )
- return path;
- }
- DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx cache miss\n",
- htonl ( gid->u.dwords[0] ), htonl ( gid->u.dwords[1] ),
- htonl ( gid->u.dwords[2] ), htonl ( gid->u.dwords[3] ) );
- return NULL;
-}
-
-/**
* Transmit path record request
*
* @v ipoib IPoIB device
@@ -344,36 +440,38 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib,
struct ib_gid *gid ) {
struct ib_device *ibdev = ipoib->ibdev;
struct io_buffer *iobuf;
- struct ib_mad_path_record *path_record;
+ struct ib_mad_sa *sa;
struct ib_address_vector av;
int rc;
/* Allocate I/O buffer */
- iobuf = alloc_iob ( sizeof ( *path_record ) );
+ iobuf = alloc_iob ( sizeof ( *sa ) );
if ( ! iobuf )
return -ENOMEM;
- iob_put ( iobuf, sizeof ( *path_record ) );
- path_record = iobuf->data;
- memset ( path_record, 0, sizeof ( *path_record ) );
+ iob_put ( iobuf, sizeof ( *sa ) );
+ sa = iobuf->data;
+ memset ( sa, 0, sizeof ( *sa ) );
/* Construct path record request */
- path_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
- path_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
- path_record->mad_hdr.class_version = 2;
- path_record->mad_hdr.method = IB_MGMT_METHOD_GET;
- path_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC );
- path_record->mad_hdr.tid[0] = IPOIB_TID_GET_PATH_REC;
- path_record->mad_hdr.tid[1] = ipoib_meta_tid++;
- path_record->sa_hdr.comp_mask[1] =
+ sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ sa->mad_hdr.class_version = 2;
+ sa->mad_hdr.method = IB_MGMT_METHOD_GET;
+ sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC );
+ sa->mad_hdr.tid[0] = IPOIB_TID_GET_PATH_REC;
+ sa->mad_hdr.tid[1] = ipoib_meta_tid++;
+ sa->sa_hdr.comp_mask[1] =
htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID );
- memcpy ( &path_record->dgid, gid, sizeof ( path_record->dgid ) );
- memcpy ( &path_record->sgid, &ibdev->port_gid,
- sizeof ( path_record->sgid ) );
+ memcpy ( &sa->sa_data.path_record.dgid, gid,
+ sizeof ( sa->sa_data.path_record.dgid ) );
+ memcpy ( &sa->sa_data.path_record.sgid, &ibdev->gid,
+ sizeof ( sa->sa_data.path_record.sgid ) );
/* Construct address vector */
memset ( &av, 0, sizeof ( av ) );
- av.dlid = ibdev->sm_lid;
- av.dest_qp = IB_SA_QPN;
+ av.lid = ibdev->sm_lid;
+ av.sl = ibdev->sm_sl;
+ av.qpn = IB_SA_QPN;
av.qkey = IB_GLOBAL_QKEY;
/* Post send request */
@@ -400,40 +498,41 @@ static int ipoib_mc_member_record ( struct ipoib_device *ipoib,
struct ib_gid *gid, int join ) {
struct ib_device *ibdev = ipoib->ibdev;
struct io_buffer *iobuf;
- struct ib_mad_mc_member_record *mc_member_record;
+ struct ib_mad_sa *sa;
struct ib_address_vector av;
int rc;
/* Allocate I/O buffer */
- iobuf = alloc_iob ( sizeof ( *mc_member_record ) );
+ iobuf = alloc_iob ( sizeof ( *sa ) );
if ( ! iobuf )
return -ENOMEM;
- iob_put ( iobuf, sizeof ( *mc_member_record ) );
- mc_member_record = iobuf->data;
- memset ( mc_member_record, 0, sizeof ( *mc_member_record ) );
+ iob_put ( iobuf, sizeof ( *sa ) );
+ sa = iobuf->data;
+ memset ( sa, 0, sizeof ( *sa ) );
/* Construct path record request */
- mc_member_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
- mc_member_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
- mc_member_record->mad_hdr.class_version = 2;
- mc_member_record->mad_hdr.method =
+ sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ sa->mad_hdr.class_version = 2;
+ sa->mad_hdr.method =
( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE );
- mc_member_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC );
- mc_member_record->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC;
- mc_member_record->mad_hdr.tid[1] = ipoib_meta_tid++;
- mc_member_record->sa_hdr.comp_mask[1] =
+ sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC );
+ sa->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC;
+ sa->mad_hdr.tid[1] = ipoib_meta_tid++;
+ sa->sa_hdr.comp_mask[1] =
htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_JOIN_STATE );
- mc_member_record->scope__join_state = 1;
- memcpy ( &mc_member_record->mgid, gid,
- sizeof ( mc_member_record->mgid ) );
- memcpy ( &mc_member_record->port_gid, &ibdev->port_gid,
- sizeof ( mc_member_record->port_gid ) );
+ sa->sa_data.mc_member_record.scope__join_state = 1;
+ memcpy ( &sa->sa_data.mc_member_record.mgid, gid,
+ sizeof ( sa->sa_data.mc_member_record.mgid ) );
+ memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid,
+ sizeof ( sa->sa_data.mc_member_record.port_gid ) );
/* Construct address vector */
memset ( &av, 0, sizeof ( av ) );
- av.dlid = ibdev->sm_lid;
- av.dest_qp = IB_SA_QPN;
+ av.lid = ibdev->sm_lid;
+ av.sl = ibdev->sm_sl;
+ av.qpn = IB_SA_QPN;
av.qkey = IB_GLOBAL_QKEY;
/* Post send request */
@@ -459,49 +558,51 @@ static int ipoib_transmit ( struct net_device *netdev,
struct io_buffer *iobuf ) {
struct ipoib_device *ipoib = netdev->priv;
struct ib_device *ibdev = ipoib->ibdev;
- struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data;
+ struct ipoib_hdr *ipoib_hdr;
+ struct ipoib_peer *dest;
struct ib_address_vector av;
struct ib_gid *gid;
- struct ipoib_cached_path *path;
- int rc;
/* Sanity check */
- if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) {
+ if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
return -EINVAL;
}
- iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) );
+ ipoib_hdr = iobuf->data;
/* Attempting transmission while link is down will put the
* queue pair into an error state, so don't try it.
*/
- if ( ! ibdev->link_up )
+ if ( ! ib_link_ok ( ibdev ) )
return -ENETUNREACH;
+ /* Identify destination address */
+ dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
+ if ( ! dest )
+ return -ENXIO;
+ ipoib_hdr->u.reserved = 0;
+
/* Construct address vector */
memset ( &av, 0, sizeof ( av ) );
- av.qkey = IB_GLOBAL_QKEY;
+ av.qkey = ipoib->data_qkey;
av.gid_present = 1;
- if ( ipoib_pshdr->peer.qpn == htonl ( IPOIB_BROADCAST_QPN ) ) {
- /* Broadcast address */
- av.dest_qp = IB_BROADCAST_QPN;
- av.dlid = ipoib->broadcast_lid;
+ if ( dest->mac.qpn == htonl ( IPOIB_BROADCAST_QPN ) ) {
+ /* Broadcast */
+ av.qpn = IB_BROADCAST_QPN;
+ av.lid = ipoib->broadcast_lid;
gid = &ipoib->broadcast_gid;
} else {
- /* Unicast - look in path cache */
- path = ipoib_find_cached_path ( &ipoib_pshdr->peer.gid );
- if ( ! path ) {
- /* No path entry - get path record */
- rc = ipoib_get_path_record ( ipoib,
- &ipoib_pshdr->peer.gid );
- netdev_tx_complete ( netdev, iobuf );
- return rc;
+ /* Unicast */
+ if ( ! dest->lid ) {
+ /* No LID yet - get path record to fetch LID */
+ ipoib_get_path_record ( ipoib, &dest->mac.gid );
+ return -ENOENT;
}
- av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn );
- av.dlid = path->dlid;
- av.rate = path->rate;
- av.sl = path->sl;
- gid = &ipoib_pshdr->peer.gid;
+ av.qpn = ntohl ( dest->mac.qpn );
+ av.lid = dest->lid;
+ av.rate = dest->rate;
+ av.sl = dest->sl;
+ gid = &dest->mac.gid;
}
memcpy ( &av.gid, gid, sizeof ( av.gid ) );
@@ -513,17 +614,15 @@ static int ipoib_transmit ( struct net_device *netdev,
*
* @v ibdev Infiniband device
* @v qp Queue pair
- * @v completion Completion
* @v iobuf I/O buffer
+ * @v rc Completion status code
*/
static void ipoib_data_complete_send ( struct ib_device *ibdev __unused,
struct ib_queue_pair *qp,
- struct ib_completion *completion,
- struct io_buffer *iobuf ) {
+ struct io_buffer *iobuf, int rc ) {
struct net_device *netdev = ib_qp_get_ownerdata ( qp );
- netdev_tx_complete_err ( netdev, iobuf,
- ( completion->syndrome ? -EIO : 0 ) );
+ netdev_tx_complete_err ( netdev, iobuf, rc );
}
/**
@@ -531,67 +630,67 @@ static void ipoib_data_complete_send ( struct ib_device *ibdev __unused,
*
* @v ibdev Infiniband device
* @v qp Queue pair
- * @v completion Completion
+ * @v av Address vector, or NULL
* @v iobuf I/O buffer
+ * @v rc Completion status code
*/
static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused,
struct ib_queue_pair *qp,
- struct ib_completion *completion,
- struct io_buffer *iobuf ) {
+ struct ib_address_vector *av,
+ struct io_buffer *iobuf, int rc ) {
struct net_device *netdev = ib_qp_get_ownerdata ( qp );
struct ipoib_device *ipoib = netdev->priv;
- struct ipoib_pseudo_hdr *ipoib_pshdr;
-
- if ( completion->syndrome ) {
- netdev_rx_err ( netdev, iobuf, -EIO );
- goto done;
- }
+ struct ipoib_hdr *ipoib_hdr;
+ struct ipoib_peer *src;
- iob_put ( iobuf, completion->len );
- if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) {
- DBGC ( ipoib, "IPoIB %p received data packet too short to "
- "contain GRH\n", ipoib );
- DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
- netdev_rx_err ( netdev, iobuf, -EIO );
- goto done;
+ if ( rc != 0 ) {
+ netdev_rx_err ( netdev, iobuf, rc );
+ return;
}
- iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) );
- if ( iob_len ( iobuf ) < sizeof ( struct ipoib_real_hdr ) ) {
+ /* Sanity check */
+ if ( iob_len ( iobuf ) < sizeof ( struct ipoib_hdr ) ) {
DBGC ( ipoib, "IPoIB %p received data packet too short to "
"contain IPoIB header\n", ipoib );
DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
netdev_rx_err ( netdev, iobuf, -EIO );
- goto done;
+ return;
}
+ ipoib_hdr = iobuf->data;
- ipoib_pshdr = iob_push ( iobuf, sizeof ( *ipoib_pshdr ) );
- /* FIXME: fill in a MAC address for the sake of AoE! */
+ /* Parse source address */
+ if ( av->gid_present ) {
+ src = ipoib_cache_peer ( &av->gid, av->qpn );
+ ipoib_hdr->u.peer.src = src->key;
+ }
+ /* Hand off to network layer */
netdev_rx ( netdev, iobuf );
-
- done:
- ipoib->data.recv_fill--;
}
+/** IPoIB data completion operations */
+static struct ib_completion_queue_operations ipoib_data_cq_op = {
+ .complete_send = ipoib_data_complete_send,
+ .complete_recv = ipoib_data_complete_recv,
+};
+
/**
* Handle IPoIB metadata send completion
*
* @v ibdev Infiniband device
* @v qp Queue pair
- * @v completion Completion
* @v iobuf I/O buffer
+ * @v rc Completion status code
*/
static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused,
struct ib_queue_pair *qp,
- struct ib_completion *completion,
- struct io_buffer *iobuf ) {
+ struct io_buffer *iobuf, int rc ) {
struct net_device *netdev = ib_qp_get_ownerdata ( qp );
struct ipoib_device *ipoib = netdev->priv;
- if ( completion->syndrome ) {
- DBGC ( ipoib, "IPoIB %p metadata TX completion error %x\n",
- ipoib, completion->syndrome );
+ if ( rc != 0 ) {
+ DBGC ( ipoib, "IPoIB %p metadata TX completion error: %s\n",
+ ipoib, strerror ( rc ) );
}
free_iob ( iobuf );
}
@@ -602,26 +701,25 @@ static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused,
* @v ipoib IPoIB device
* @v path_record Path record
*/
-static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused,
- struct ib_mad_path_record *path_record ) {
- struct ipoib_cached_path *path;
+static void ipoib_recv_path_record ( struct ipoib_device *ipoib,
+ struct ib_path_record *path_record ) {
+ struct ipoib_peer *peer;
+
+ /* Locate peer cache entry */
+ peer = ipoib_lookup_peer_by_gid ( &path_record->dgid );
+ if ( ! peer ) {
+ DBGC ( ipoib, "IPoIB %p received unsolicited path record\n",
+ ipoib );
+ return;
+ }
/* Update path cache entry */
- path = &ipoib_path_cache[ipoib_path_cache_idx];
- memcpy ( &path->gid, &path_record->dgid, sizeof ( path->gid ) );
- path->dlid = ntohs ( path_record->dlid );
- path->sl = ( path_record->reserved__sl & 0x0f );
- path->rate = ( path_record->rate_selector__rate & 0x3f );
-
- DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n",
- htonl ( path->gid.u.dwords[0] ), htonl ( path->gid.u.dwords[1] ),
- htonl ( path->gid.u.dwords[2] ), htonl ( path->gid.u.dwords[3] ),
- path->dlid, path->sl, path->rate );
-
- /* Update path cache index */
- ipoib_path_cache_idx++;
- if ( ipoib_path_cache_idx == IPOIB_NUM_CACHED_PATHS )
- ipoib_path_cache_idx = 0;
+ peer->lid = ntohs ( path_record->dlid );
+ peer->sl = ( path_record->reserved__sl & 0x0f );
+ peer->rate = ( path_record->rate_selector__rate & 0x3f );
+
+ DBG ( "IPoIB peer %x has dlid %x sl %x rate %x\n",
+ peer->key, peer->lid, peer->sl, peer->rate );
}
/**
@@ -631,7 +729,7 @@ static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused,
* @v mc_member_record Multicast membership record
*/
static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib,
- struct ib_mad_mc_member_record *mc_member_record ) {
+ struct ib_mc_member_record *mc_member_record ) {
int joined;
int rc;
@@ -657,64 +755,64 @@ static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib,
*
* @v ibdev Infiniband device
* @v qp Queue pair
- * @v completion Completion
+ * @v av Address vector, or NULL
* @v iobuf I/O buffer
+ * @v rc Completion status code
*/
-static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused,
- struct ib_queue_pair *qp,
- struct ib_completion *completion,
- struct io_buffer *iobuf ) {
+static void
+ipoib_meta_complete_recv ( struct ib_device *ibdev __unused,
+ struct ib_queue_pair *qp,
+ struct ib_address_vector *av __unused,
+ struct io_buffer *iobuf, int rc ) {
struct net_device *netdev = ib_qp_get_ownerdata ( qp );
struct ipoib_device *ipoib = netdev->priv;
- union ib_mad *mad;
+ struct ib_mad_sa *sa;
- if ( completion->syndrome ) {
- DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n",
- ipoib, completion->syndrome );
+ if ( rc != 0 ) {
+ DBGC ( ipoib, "IPoIB %p metadata RX completion error: %s\n",
+ ipoib, strerror ( rc ) );
goto done;
}
- iob_put ( iobuf, completion->len );
- if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) {
- DBGC ( ipoib, "IPoIB %p received metadata packet too short "
- "to contain GRH\n", ipoib );
- DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
- goto done;
- }
- iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) );
- if ( iob_len ( iobuf ) < sizeof ( *mad ) ) {
+ if ( iob_len ( iobuf ) < sizeof ( *sa ) ) {
DBGC ( ipoib, "IPoIB %p received metadata packet too short "
"to contain reply\n", ipoib );
DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
goto done;
}
- mad = iobuf->data;
+ sa = iobuf->data;
- if ( mad->mad_hdr.status != 0 ) {
+ if ( sa->mad_hdr.status != 0 ) {
DBGC ( ipoib, "IPoIB %p metadata RX err status %04x\n",
- ipoib, ntohs ( mad->mad_hdr.status ) );
+ ipoib, ntohs ( sa->mad_hdr.status ) );
goto done;
}
- switch ( mad->mad_hdr.tid[0] ) {
+ switch ( sa->mad_hdr.tid[0] ) {
case IPOIB_TID_GET_PATH_REC:
- ipoib_recv_path_record ( ipoib, &mad->path_record );
+ ipoib_recv_path_record ( ipoib, &sa->sa_data.path_record );
break;
case IPOIB_TID_MC_MEMBER_REC:
- ipoib_recv_mc_member_record ( ipoib, &mad->mc_member_record );
+ ipoib_recv_mc_member_record ( ipoib,
+ &sa->sa_data.mc_member_record );
break;
default:
DBGC ( ipoib, "IPoIB %p unwanted response:\n",
ipoib );
- DBGC_HD ( ipoib, mad, sizeof ( *mad ) );
+ DBGC_HD ( ipoib, sa, sizeof ( *sa ) );
break;
}
done:
- ipoib->meta.recv_fill--;
free_iob ( iobuf );
}
+/** IPoIB metadata completion operations */
+static struct ib_completion_queue_operations ipoib_meta_cq_op = {
+ .complete_send = ipoib_meta_complete_send,
+ .complete_recv = ipoib_meta_complete_recv,
+};
+
/**
* Refill IPoIB receive ring
*
@@ -726,15 +824,14 @@ static void ipoib_refill_recv ( struct ipoib_device *ipoib,
struct io_buffer *iobuf;
int rc;
- while ( qset->recv_fill < qset->recv_max_fill ) {
- iobuf = alloc_iob ( IPOIB_MTU );
+ while ( qset->qp->recv.fill < qset->recv_max_fill ) {
+ iobuf = alloc_iob ( IPOIB_PKT_LEN );
if ( ! iobuf )
break;
if ( ( rc = ib_post_recv ( ibdev, qset->qp, iobuf ) ) != 0 ) {
free_iob ( iobuf );
break;
}
- qset->recv_fill++;
}
}
@@ -747,10 +844,8 @@ static void ipoib_poll ( struct net_device *netdev ) {
struct ipoib_device *ipoib = netdev->priv;
struct ib_device *ibdev = ipoib->ibdev;
- ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send,
- ipoib_meta_complete_recv );
- ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send,
- ipoib_data_complete_recv );
+ ib_poll_cq ( ibdev, ipoib->meta.cq );
+ ib_poll_cq ( ibdev, ipoib->data.cq );
ipoib_refill_recv ( ipoib, &ipoib->meta );
ipoib_refill_recv ( ipoib, &ipoib->data );
}
@@ -834,6 +929,7 @@ static int ipoib_open ( struct net_device *netdev ) {
/* Allocate metadata queue set */
if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta,
IPOIB_META_NUM_CQES,
+ &ipoib_meta_cq_op,
IPOIB_META_NUM_SEND_WQES,
IPOIB_META_NUM_RECV_WQES,
IB_GLOBAL_QKEY ) ) != 0 ) {
@@ -845,6 +941,7 @@ static int ipoib_open ( struct net_device *netdev ) {
/* Allocate data queue set */
if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data,
IPOIB_DATA_NUM_CQES,
+ &ipoib_data_cq_op,
IPOIB_DATA_NUM_SEND_WQES,
IPOIB_DATA_NUM_RECV_WQES,
IB_GLOBAL_QKEY ) ) != 0 ) {
@@ -923,15 +1020,15 @@ static void ipoib_set_ib_params ( struct ipoib_device *ipoib ) {
/* Calculate GID portion of MAC address based on port GID */
mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
- memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) );
+ memcpy ( &mac->gid, &ibdev->gid, sizeof ( mac->gid ) );
/* Calculate broadcast GID based on partition key */
- memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid,
+ memcpy ( &ipoib->broadcast_gid, &ipoib_broadcast.gid,
sizeof ( ipoib->broadcast_gid ) );
ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey );
/* Set net device link state to reflect Infiniband link state */
- if ( ibdev->link_up ) {
+ if ( ib_link_ok ( ibdev ) ) {
netdev_link_up ( netdev );
} else {
netdev_link_down ( netdev );