diff options
Diffstat (limited to 'gpxe/src/drivers/net/ipoib.c')
-rw-r--r-- | gpxe/src/drivers/net/ipoib.c | 565 |
1 files changed, 331 insertions, 234 deletions
diff --git a/gpxe/src/drivers/net/ipoib.c b/gpxe/src/drivers/net/ipoib.c index 16b2a0c8..f0d52044 100644 --- a/gpxe/src/drivers/net/ipoib.c +++ b/gpxe/src/drivers/net/ipoib.c @@ -33,9 +33,6 @@ * IP over Infiniband */ -/** IPoIB MTU */ -#define IPOIB_MTU 2048 - /** Number of IPoIB data send work queue entries */ #define IPOIB_DATA_NUM_SEND_WQES 2 @@ -60,8 +57,6 @@ struct ipoib_queue_set { struct ib_completion_queue *cq; /** Queue pair */ struct ib_queue_pair *qp; - /** Receive work queue fill level */ - unsigned int recv_fill; /** Receive work queue maximum fill level */ unsigned int recv_max_fill; }; @@ -90,49 +85,146 @@ struct ipoib_device { int broadcast_attached; }; +/** TID half used to identify get path record replies */ +#define IPOIB_TID_GET_PATH_REC 0x11111111UL + +/** TID half used to identify multicast member record replies */ +#define IPOIB_TID_MC_MEMBER_REC 0x22222222UL + +/** IPoIB metadata TID */ +static uint32_t ipoib_meta_tid = 0; + +/** Broadcast QPN used in IPoIB MAC addresses + * + * This is a guaranteed invalid real QPN + */ +#define IPOIB_BROADCAST_QPN 0xffffffffUL + +/** Broadcast IPoIB address */ +static struct ipoib_mac ipoib_broadcast = { + .qpn = ntohl ( IPOIB_BROADCAST_QPN ), + .gid.u.bytes = { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff }, +}; + +/**************************************************************************** + * + * IPoIB peer cache + * + **************************************************************************** + */ + /** - * IPoIB path cache entry + * IPoIB peer address * * This serves a similar role to the ARP cache for Ethernet. (ARP * *is* used on IPoIB; we have two caches to maintain.) */ -struct ipoib_cached_path { - /** Destination GID */ - struct ib_gid gid; - /** Destination LID */ - unsigned int dlid; +struct ipoib_peer { + /** Key */ + uint8_t key; + /** MAC address */ + struct ipoib_mac mac; + /** LID */ + unsigned int lid; /** Service level */ unsigned int sl; /** Rate */ unsigned int rate; }; -/** Number of IPoIB path cache entries */ -#define IPOIB_NUM_CACHED_PATHS 2 +/** Number of IPoIB peer cache entries + * + * Must be a power of two. + */ +#define IPOIB_NUM_CACHED_PEERS 4 -/** IPoIB path cache */ -static struct ipoib_cached_path ipoib_path_cache[IPOIB_NUM_CACHED_PATHS]; +/** IPoIB peer address cache */ +static struct ipoib_peer ipoib_peer_cache[IPOIB_NUM_CACHED_PEERS]; -/** Oldest IPoIB path cache entry index */ -static unsigned int ipoib_path_cache_idx = 0; +/** Oldest IPoIB peer cache entry index */ +static unsigned int ipoib_peer_cache_idx = 1; -/** TID half used to identify get path record replies */ -#define IPOIB_TID_GET_PATH_REC 0x11111111UL +/** + * Look up cached peer by key + * + * @v key Peer cache key + * @ret peer Peer cache entry, or NULL + */ +static struct ipoib_peer * ipoib_lookup_peer_by_key ( unsigned int key ) { + struct ipoib_peer *peer; + unsigned int i; -/** TID half used to identify multicast member record replies */ -#define IPOIB_TID_MC_MEMBER_REC 0x22222222UL + for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) { + peer = &ipoib_peer_cache[i]; + if ( peer->key == key ) + return peer; + } -/** IPoIB metadata TID */ -static uint32_t ipoib_meta_tid = 0; + if ( key != 0 ) { + DBG ( "IPoIB warning: peer cache lost track of key %x while " + "still in use\n", key ); + } + return NULL; +} -/** IPv4 broadcast GID */ -static const struct ib_gid ipv4_broadcast_gid = { - { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } } -}; +/** + * Look up cached peer by GID + * + * @v gid Peer GID + * @ret peer Peer cache entry, or NULL + */ +static struct ipoib_peer * +ipoib_lookup_peer_by_gid ( const struct ib_gid *gid ) { + struct ipoib_peer *peer; + unsigned int i; -/** Maximum time we will wait for the broadcast join to succeed */ -#define IPOIB_JOIN_MAX_DELAY_MS 1000 + for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) { + peer = &ipoib_peer_cache[i]; + if ( memcmp ( &peer->mac.gid, gid, + sizeof ( peer->mac.gid) ) == 0 ) { + return peer; + } + } + + return NULL; +} + +/** + * Store GID and QPN in peer cache + * + * @v gid Peer GID + * @v qpn Peer QPN + * @ret peer Peer cache entry + */ +static struct ipoib_peer * +ipoib_cache_peer ( const struct ib_gid *gid, unsigned long qpn ) { + struct ipoib_peer *peer; + unsigned int key; + + /* Look for existing cache entry */ + peer = ipoib_lookup_peer_by_gid ( gid ); + if ( peer ) { + assert ( peer->mac.qpn = ntohl ( qpn ) ); + return peer; + } + + /* No entry found: create a new one */ + key = ipoib_peer_cache_idx++; + peer = &ipoib_peer_cache[ key % IPOIB_NUM_CACHED_PEERS ]; + if ( peer->key ) + DBG ( "IPoIB peer %x evicted from cache\n", peer->key ); + + memset ( peer, 0, sizeof ( *peer ) ); + peer->key = key; + peer->mac.qpn = htonl ( qpn ); + memcpy ( &peer->mac.gid, gid, sizeof ( peer->mac.gid ) ); + DBG ( "IPoIB peer %x has GID %08lx:%08lx:%08lx:%08lx and QPN %lx\n", + peer->key, htonl ( gid->u.dwords[0] ), + htonl ( gid->u.dwords[1] ), htonl ( gid->u.dwords[2] ), + htonl ( gid->u.dwords[3] ), qpn ); + return peer; +} /**************************************************************************** * @@ -141,37 +233,32 @@ static const struct ib_gid ipv4_broadcast_gid = { **************************************************************************** */ -/** Broadcast QPN used in IPoIB MAC addresses - * - * This is a guaranteed invalid real QPN - */ -#define IPOIB_BROADCAST_QPN 0xffffffffUL - -/** Broadcast IPoIB address */ -static struct ipoib_mac ipoib_broadcast = { - .qpn = ntohl ( IPOIB_BROADCAST_QPN ), -}; - /** * Add IPoIB link-layer header * * @v iobuf I/O buffer - * @v netdev Network device - * @v net_protocol Network-layer protocol * @v ll_dest Link-layer destination address + * @v ll_source Source link-layer address + * @v net_proto Network-layer protocol, in network-byte order + * @ret rc Return status code */ -static int ipoib_push ( struct io_buffer *iobuf, - struct net_device *netdev __unused, - struct net_protocol *net_protocol, - const void *ll_dest ) { +static int ipoib_push ( struct io_buffer *iobuf, const void *ll_dest, + const void *ll_source __unused, uint16_t net_proto ) { struct ipoib_hdr *ipoib_hdr = iob_push ( iobuf, sizeof ( *ipoib_hdr ) ); + const struct ipoib_mac *dest_mac = ll_dest; + const struct ipoib_mac *src_mac = ll_source; + struct ipoib_peer *dest; + struct ipoib_peer *src; + + /* Add link-layer addresses to cache */ + dest = ipoib_cache_peer ( &dest_mac->gid, ntohl ( dest_mac->qpn ) ); + src = ipoib_cache_peer ( &src_mac->gid, ntohl ( src_mac->qpn ) ); /* Build IPoIB header */ - memcpy ( &ipoib_hdr->pseudo.peer, ll_dest, - sizeof ( ipoib_hdr->pseudo.peer ) ); - ipoib_hdr->real.proto = net_protocol->net_proto; - ipoib_hdr->real.reserved = 0; + ipoib_hdr->proto = net_proto; + ipoib_hdr->u.peer.dest = dest->key; + ipoib_hdr->u.peer.src = src->key; return 0; } @@ -180,15 +267,16 @@ static int ipoib_push ( struct io_buffer *iobuf, * Remove IPoIB link-layer header * * @v iobuf I/O buffer - * @v netdev Network device - * @v net_proto Network-layer protocol, in network-byte order - * @v ll_source Source link-layer address + * @ret ll_dest Link-layer destination address + * @ret ll_source Source link-layer address + * @ret net_proto Network-layer protocol, in network-byte order * @ret rc Return status code */ -static int ipoib_pull ( struct io_buffer *iobuf, - struct net_device *netdev __unused, - uint16_t *net_proto, const void **ll_source ) { +static int ipoib_pull ( struct io_buffer *iobuf, const void **ll_dest, + const void **ll_source, uint16_t *net_proto ) { struct ipoib_hdr *ipoib_hdr = iobuf->data; + struct ipoib_peer *dest; + struct ipoib_peer *source; /* Sanity check */ if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) { @@ -200,9 +288,17 @@ static int ipoib_pull ( struct io_buffer *iobuf, /* Strip off IPoIB header */ iob_pull ( iobuf, sizeof ( *ipoib_hdr ) ); + /* Identify source and destination addresses, and clear + * reserved word in IPoIB header + */ + dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest ); + source = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.src ); + ipoib_hdr->u.reserved = 0; + /* Fill in required fields */ - *net_proto = ipoib_hdr->real.proto; - *ll_source = &ipoib_hdr->pseudo.peer; + *ll_dest = ( dest ? &dest->mac : &ipoib_broadcast ); + *ll_source = ( source ? &source->mac : &ipoib_broadcast ); + *net_proto = ipoib_hdr->proto; return 0; } @@ -225,6 +321,21 @@ const char * ipoib_ntoa ( const void *ll_addr ) { return buf; } +/** + * Hash multicast address + * + * @v af Address family + * @v net_addr Network-layer address + * @v ll_addr Link-layer address to fill in + * @ret rc Return status code + */ +static int ipoib_mc_hash ( unsigned int af __unused, + const void *net_addr __unused, + void *ll_addr __unused ) { + + return -ENOTSUP; +} + /** IPoIB protocol */ struct ll_protocol ipoib_protocol __ll_protocol = { .name = "IPoIB", @@ -235,6 +346,7 @@ struct ll_protocol ipoib_protocol __ll_protocol = { .push = ipoib_push, .pull = ipoib_pull, .ntoa = ipoib_ntoa, + .mc_hash = ipoib_mc_hash, }; /**************************************************************************** @@ -266,11 +378,17 @@ static void ipoib_destroy_qset ( struct ipoib_device *ipoib, * * @v ipoib IPoIB device * @v qset Queue set + * @v num_cqes Number of completion queue entries + * @v cq_op Completion queue operations + * @v num_send_wqes Number of send work queue entries + * @v num_recv_wqes Number of receive work queue entries + * @v qkey Queue key * @ret rc Return status code */ static int ipoib_create_qset ( struct ipoib_device *ipoib, struct ipoib_queue_set *qset, unsigned int num_cqes, + struct ib_completion_queue_operations *cq_op, unsigned int num_send_wqes, unsigned int num_recv_wqes, unsigned long qkey ) { @@ -285,7 +403,7 @@ static int ipoib_create_qset ( struct ipoib_device *ipoib, qset->recv_max_fill = num_recv_wqes; /* Allocate completion queue */ - qset->cq = ib_create_cq ( ibdev, num_cqes ); + qset->cq = ib_create_cq ( ibdev, num_cqes, cq_op ); if ( ! qset->cq ) { DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n", ipoib ); @@ -312,28 +430,6 @@ static int ipoib_create_qset ( struct ipoib_device *ipoib, } /** - * Find path cache entry by GID - * - * @v gid GID - * @ret entry Path cache entry, or NULL - */ -static struct ipoib_cached_path * -ipoib_find_cached_path ( struct ib_gid *gid ) { - struct ipoib_cached_path *path; - unsigned int i; - - for ( i = 0 ; i < IPOIB_NUM_CACHED_PATHS ; i++ ) { - path = &ipoib_path_cache[i]; - if ( memcmp ( &path->gid, gid, sizeof ( *gid ) ) == 0 ) - return path; - } - DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx cache miss\n", - htonl ( gid->u.dwords[0] ), htonl ( gid->u.dwords[1] ), - htonl ( gid->u.dwords[2] ), htonl ( gid->u.dwords[3] ) ); - return NULL; -} - -/** * Transmit path record request * * @v ipoib IPoIB device @@ -344,36 +440,38 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, struct ib_gid *gid ) { struct ib_device *ibdev = ipoib->ibdev; struct io_buffer *iobuf; - struct ib_mad_path_record *path_record; + struct ib_mad_sa *sa; struct ib_address_vector av; int rc; /* Allocate I/O buffer */ - iobuf = alloc_iob ( sizeof ( *path_record ) ); + iobuf = alloc_iob ( sizeof ( *sa ) ); if ( ! iobuf ) return -ENOMEM; - iob_put ( iobuf, sizeof ( *path_record ) ); - path_record = iobuf->data; - memset ( path_record, 0, sizeof ( *path_record ) ); + iob_put ( iobuf, sizeof ( *sa ) ); + sa = iobuf->data; + memset ( sa, 0, sizeof ( *sa ) ); /* Construct path record request */ - path_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION; - path_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; - path_record->mad_hdr.class_version = 2; - path_record->mad_hdr.method = IB_MGMT_METHOD_GET; - path_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); - path_record->mad_hdr.tid[0] = IPOIB_TID_GET_PATH_REC; - path_record->mad_hdr.tid[1] = ipoib_meta_tid++; - path_record->sa_hdr.comp_mask[1] = + sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + sa->mad_hdr.class_version = 2; + sa->mad_hdr.method = IB_MGMT_METHOD_GET; + sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); + sa->mad_hdr.tid[0] = IPOIB_TID_GET_PATH_REC; + sa->mad_hdr.tid[1] = ipoib_meta_tid++; + sa->sa_hdr.comp_mask[1] = htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); - memcpy ( &path_record->dgid, gid, sizeof ( path_record->dgid ) ); - memcpy ( &path_record->sgid, &ibdev->port_gid, - sizeof ( path_record->sgid ) ); + memcpy ( &sa->sa_data.path_record.dgid, gid, + sizeof ( sa->sa_data.path_record.dgid ) ); + memcpy ( &sa->sa_data.path_record.sgid, &ibdev->gid, + sizeof ( sa->sa_data.path_record.sgid ) ); /* Construct address vector */ memset ( &av, 0, sizeof ( av ) ); - av.dlid = ibdev->sm_lid; - av.dest_qp = IB_SA_QPN; + av.lid = ibdev->sm_lid; + av.sl = ibdev->sm_sl; + av.qpn = IB_SA_QPN; av.qkey = IB_GLOBAL_QKEY; /* Post send request */ @@ -400,40 +498,41 @@ static int ipoib_mc_member_record ( struct ipoib_device *ipoib, struct ib_gid *gid, int join ) { struct ib_device *ibdev = ipoib->ibdev; struct io_buffer *iobuf; - struct ib_mad_mc_member_record *mc_member_record; + struct ib_mad_sa *sa; struct ib_address_vector av; int rc; /* Allocate I/O buffer */ - iobuf = alloc_iob ( sizeof ( *mc_member_record ) ); + iobuf = alloc_iob ( sizeof ( *sa ) ); if ( ! iobuf ) return -ENOMEM; - iob_put ( iobuf, sizeof ( *mc_member_record ) ); - mc_member_record = iobuf->data; - memset ( mc_member_record, 0, sizeof ( *mc_member_record ) ); + iob_put ( iobuf, sizeof ( *sa ) ); + sa = iobuf->data; + memset ( sa, 0, sizeof ( *sa ) ); /* Construct path record request */ - mc_member_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION; - mc_member_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; - mc_member_record->mad_hdr.class_version = 2; - mc_member_record->mad_hdr.method = + sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + sa->mad_hdr.class_version = 2; + sa->mad_hdr.method = ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE ); - mc_member_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ); - mc_member_record->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC; - mc_member_record->mad_hdr.tid[1] = ipoib_meta_tid++; - mc_member_record->sa_hdr.comp_mask[1] = + sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ); + sa->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC; + sa->mad_hdr.tid[1] = ipoib_meta_tid++; + sa->sa_hdr.comp_mask[1] = htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE ); - mc_member_record->scope__join_state = 1; - memcpy ( &mc_member_record->mgid, gid, - sizeof ( mc_member_record->mgid ) ); - memcpy ( &mc_member_record->port_gid, &ibdev->port_gid, - sizeof ( mc_member_record->port_gid ) ); + sa->sa_data.mc_member_record.scope__join_state = 1; + memcpy ( &sa->sa_data.mc_member_record.mgid, gid, + sizeof ( sa->sa_data.mc_member_record.mgid ) ); + memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid, + sizeof ( sa->sa_data.mc_member_record.port_gid ) ); /* Construct address vector */ memset ( &av, 0, sizeof ( av ) ); - av.dlid = ibdev->sm_lid; - av.dest_qp = IB_SA_QPN; + av.lid = ibdev->sm_lid; + av.sl = ibdev->sm_sl; + av.qpn = IB_SA_QPN; av.qkey = IB_GLOBAL_QKEY; /* Post send request */ @@ -459,49 +558,51 @@ static int ipoib_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) { struct ipoib_device *ipoib = netdev->priv; struct ib_device *ibdev = ipoib->ibdev; - struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data; + struct ipoib_hdr *ipoib_hdr; + struct ipoib_peer *dest; struct ib_address_vector av; struct ib_gid *gid; - struct ipoib_cached_path *path; - int rc; /* Sanity check */ - if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) { + if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) { DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib ); return -EINVAL; } - iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); + ipoib_hdr = iobuf->data; /* Attempting transmission while link is down will put the * queue pair into an error state, so don't try it. */ - if ( ! ibdev->link_up ) + if ( ! ib_link_ok ( ibdev ) ) return -ENETUNREACH; + /* Identify destination address */ + dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest ); + if ( ! dest ) + return -ENXIO; + ipoib_hdr->u.reserved = 0; + /* Construct address vector */ memset ( &av, 0, sizeof ( av ) ); - av.qkey = IB_GLOBAL_QKEY; + av.qkey = ipoib->data_qkey; av.gid_present = 1; - if ( ipoib_pshdr->peer.qpn == htonl ( IPOIB_BROADCAST_QPN ) ) { - /* Broadcast address */ - av.dest_qp = IB_BROADCAST_QPN; - av.dlid = ipoib->broadcast_lid; + if ( dest->mac.qpn == htonl ( IPOIB_BROADCAST_QPN ) ) { + /* Broadcast */ + av.qpn = IB_BROADCAST_QPN; + av.lid = ipoib->broadcast_lid; gid = &ipoib->broadcast_gid; } else { - /* Unicast - look in path cache */ - path = ipoib_find_cached_path ( &ipoib_pshdr->peer.gid ); - if ( ! path ) { - /* No path entry - get path record */ - rc = ipoib_get_path_record ( ipoib, - &ipoib_pshdr->peer.gid ); - netdev_tx_complete ( netdev, iobuf ); - return rc; + /* Unicast */ + if ( ! dest->lid ) { + /* No LID yet - get path record to fetch LID */ + ipoib_get_path_record ( ipoib, &dest->mac.gid ); + return -ENOENT; } - av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn ); - av.dlid = path->dlid; - av.rate = path->rate; - av.sl = path->sl; - gid = &ipoib_pshdr->peer.gid; + av.qpn = ntohl ( dest->mac.qpn ); + av.lid = dest->lid; + av.rate = dest->rate; + av.sl = dest->sl; + gid = &dest->mac.gid; } memcpy ( &av.gid, gid, sizeof ( av.gid ) ); @@ -513,17 +614,15 @@ static int ipoib_transmit ( struct net_device *netdev, * * @v ibdev Infiniband device * @v qp Queue pair - * @v completion Completion * @v iobuf I/O buffer + * @v rc Completion status code */ static void ipoib_data_complete_send ( struct ib_device *ibdev __unused, struct ib_queue_pair *qp, - struct ib_completion *completion, - struct io_buffer *iobuf ) { + struct io_buffer *iobuf, int rc ) { struct net_device *netdev = ib_qp_get_ownerdata ( qp ); - netdev_tx_complete_err ( netdev, iobuf, - ( completion->syndrome ? -EIO : 0 ) ); + netdev_tx_complete_err ( netdev, iobuf, rc ); } /** @@ -531,67 +630,67 @@ static void ipoib_data_complete_send ( struct ib_device *ibdev __unused, * * @v ibdev Infiniband device * @v qp Queue pair - * @v completion Completion + * @v av Address vector, or NULL * @v iobuf I/O buffer + * @v rc Completion status code */ static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused, struct ib_queue_pair *qp, - struct ib_completion *completion, - struct io_buffer *iobuf ) { + struct ib_address_vector *av, + struct io_buffer *iobuf, int rc ) { struct net_device *netdev = ib_qp_get_ownerdata ( qp ); struct ipoib_device *ipoib = netdev->priv; - struct ipoib_pseudo_hdr *ipoib_pshdr; - - if ( completion->syndrome ) { - netdev_rx_err ( netdev, iobuf, -EIO ); - goto done; - } + struct ipoib_hdr *ipoib_hdr; + struct ipoib_peer *src; - iob_put ( iobuf, completion->len ); - if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) { - DBGC ( ipoib, "IPoIB %p received data packet too short to " - "contain GRH\n", ipoib ); - DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); - netdev_rx_err ( netdev, iobuf, -EIO ); - goto done; + if ( rc != 0 ) { + netdev_rx_err ( netdev, iobuf, rc ); + return; } - iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); - if ( iob_len ( iobuf ) < sizeof ( struct ipoib_real_hdr ) ) { + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( struct ipoib_hdr ) ) { DBGC ( ipoib, "IPoIB %p received data packet too short to " "contain IPoIB header\n", ipoib ); DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); netdev_rx_err ( netdev, iobuf, -EIO ); - goto done; + return; } + ipoib_hdr = iobuf->data; - ipoib_pshdr = iob_push ( iobuf, sizeof ( *ipoib_pshdr ) ); - /* FIXME: fill in a MAC address for the sake of AoE! */ + /* Parse source address */ + if ( av->gid_present ) { + src = ipoib_cache_peer ( &av->gid, av->qpn ); + ipoib_hdr->u.peer.src = src->key; + } + /* Hand off to network layer */ netdev_rx ( netdev, iobuf ); - - done: - ipoib->data.recv_fill--; } +/** IPoIB data completion operations */ +static struct ib_completion_queue_operations ipoib_data_cq_op = { + .complete_send = ipoib_data_complete_send, + .complete_recv = ipoib_data_complete_recv, +}; + /** * Handle IPoIB metadata send completion * * @v ibdev Infiniband device * @v qp Queue pair - * @v completion Completion * @v iobuf I/O buffer + * @v rc Completion status code */ static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused, struct ib_queue_pair *qp, - struct ib_completion *completion, - struct io_buffer *iobuf ) { + struct io_buffer *iobuf, int rc ) { struct net_device *netdev = ib_qp_get_ownerdata ( qp ); struct ipoib_device *ipoib = netdev->priv; - if ( completion->syndrome ) { - DBGC ( ipoib, "IPoIB %p metadata TX completion error %x\n", - ipoib, completion->syndrome ); + if ( rc != 0 ) { + DBGC ( ipoib, "IPoIB %p metadata TX completion error: %s\n", + ipoib, strerror ( rc ) ); } free_iob ( iobuf ); } @@ -602,26 +701,25 @@ static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused, * @v ipoib IPoIB device * @v path_record Path record */ -static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused, - struct ib_mad_path_record *path_record ) { - struct ipoib_cached_path *path; +static void ipoib_recv_path_record ( struct ipoib_device *ipoib, + struct ib_path_record *path_record ) { + struct ipoib_peer *peer; + + /* Locate peer cache entry */ + peer = ipoib_lookup_peer_by_gid ( &path_record->dgid ); + if ( ! peer ) { + DBGC ( ipoib, "IPoIB %p received unsolicited path record\n", + ipoib ); + return; + } /* Update path cache entry */ - path = &ipoib_path_cache[ipoib_path_cache_idx]; - memcpy ( &path->gid, &path_record->dgid, sizeof ( path->gid ) ); - path->dlid = ntohs ( path_record->dlid ); - path->sl = ( path_record->reserved__sl & 0x0f ); - path->rate = ( path_record->rate_selector__rate & 0x3f ); - - DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n", - htonl ( path->gid.u.dwords[0] ), htonl ( path->gid.u.dwords[1] ), - htonl ( path->gid.u.dwords[2] ), htonl ( path->gid.u.dwords[3] ), - path->dlid, path->sl, path->rate ); - - /* Update path cache index */ - ipoib_path_cache_idx++; - if ( ipoib_path_cache_idx == IPOIB_NUM_CACHED_PATHS ) - ipoib_path_cache_idx = 0; + peer->lid = ntohs ( path_record->dlid ); + peer->sl = ( path_record->reserved__sl & 0x0f ); + peer->rate = ( path_record->rate_selector__rate & 0x3f ); + + DBG ( "IPoIB peer %x has dlid %x sl %x rate %x\n", + peer->key, peer->lid, peer->sl, peer->rate ); } /** @@ -631,7 +729,7 @@ static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused, * @v mc_member_record Multicast membership record */ static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib, - struct ib_mad_mc_member_record *mc_member_record ) { + struct ib_mc_member_record *mc_member_record ) { int joined; int rc; @@ -657,64 +755,64 @@ static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib, * * @v ibdev Infiniband device * @v qp Queue pair - * @v completion Completion + * @v av Address vector, or NULL * @v iobuf I/O buffer + * @v rc Completion status code */ -static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused, - struct ib_queue_pair *qp, - struct ib_completion *completion, - struct io_buffer *iobuf ) { +static void +ipoib_meta_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_address_vector *av __unused, + struct io_buffer *iobuf, int rc ) { struct net_device *netdev = ib_qp_get_ownerdata ( qp ); struct ipoib_device *ipoib = netdev->priv; - union ib_mad *mad; + struct ib_mad_sa *sa; - if ( completion->syndrome ) { - DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n", - ipoib, completion->syndrome ); + if ( rc != 0 ) { + DBGC ( ipoib, "IPoIB %p metadata RX completion error: %s\n", + ipoib, strerror ( rc ) ); goto done; } - iob_put ( iobuf, completion->len ); - if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) { - DBGC ( ipoib, "IPoIB %p received metadata packet too short " - "to contain GRH\n", ipoib ); - DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); - goto done; - } - iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); - if ( iob_len ( iobuf ) < sizeof ( *mad ) ) { + if ( iob_len ( iobuf ) < sizeof ( *sa ) ) { DBGC ( ipoib, "IPoIB %p received metadata packet too short " "to contain reply\n", ipoib ); DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); goto done; } - mad = iobuf->data; + sa = iobuf->data; - if ( mad->mad_hdr.status != 0 ) { + if ( sa->mad_hdr.status != 0 ) { DBGC ( ipoib, "IPoIB %p metadata RX err status %04x\n", - ipoib, ntohs ( mad->mad_hdr.status ) ); + ipoib, ntohs ( sa->mad_hdr.status ) ); goto done; } - switch ( mad->mad_hdr.tid[0] ) { + switch ( sa->mad_hdr.tid[0] ) { case IPOIB_TID_GET_PATH_REC: - ipoib_recv_path_record ( ipoib, &mad->path_record ); + ipoib_recv_path_record ( ipoib, &sa->sa_data.path_record ); break; case IPOIB_TID_MC_MEMBER_REC: - ipoib_recv_mc_member_record ( ipoib, &mad->mc_member_record ); + ipoib_recv_mc_member_record ( ipoib, + &sa->sa_data.mc_member_record ); break; default: DBGC ( ipoib, "IPoIB %p unwanted response:\n", ipoib ); - DBGC_HD ( ipoib, mad, sizeof ( *mad ) ); + DBGC_HD ( ipoib, sa, sizeof ( *sa ) ); break; } done: - ipoib->meta.recv_fill--; free_iob ( iobuf ); } +/** IPoIB metadata completion operations */ +static struct ib_completion_queue_operations ipoib_meta_cq_op = { + .complete_send = ipoib_meta_complete_send, + .complete_recv = ipoib_meta_complete_recv, +}; + /** * Refill IPoIB receive ring * @@ -726,15 +824,14 @@ static void ipoib_refill_recv ( struct ipoib_device *ipoib, struct io_buffer *iobuf; int rc; - while ( qset->recv_fill < qset->recv_max_fill ) { - iobuf = alloc_iob ( IPOIB_MTU ); + while ( qset->qp->recv.fill < qset->recv_max_fill ) { + iobuf = alloc_iob ( IPOIB_PKT_LEN ); if ( ! iobuf ) break; if ( ( rc = ib_post_recv ( ibdev, qset->qp, iobuf ) ) != 0 ) { free_iob ( iobuf ); break; } - qset->recv_fill++; } } @@ -747,10 +844,8 @@ static void ipoib_poll ( struct net_device *netdev ) { struct ipoib_device *ipoib = netdev->priv; struct ib_device *ibdev = ipoib->ibdev; - ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send, - ipoib_meta_complete_recv ); - ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send, - ipoib_data_complete_recv ); + ib_poll_cq ( ibdev, ipoib->meta.cq ); + ib_poll_cq ( ibdev, ipoib->data.cq ); ipoib_refill_recv ( ipoib, &ipoib->meta ); ipoib_refill_recv ( ipoib, &ipoib->data ); } @@ -834,6 +929,7 @@ static int ipoib_open ( struct net_device *netdev ) { /* Allocate metadata queue set */ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta, IPOIB_META_NUM_CQES, + &ipoib_meta_cq_op, IPOIB_META_NUM_SEND_WQES, IPOIB_META_NUM_RECV_WQES, IB_GLOBAL_QKEY ) ) != 0 ) { @@ -845,6 +941,7 @@ static int ipoib_open ( struct net_device *netdev ) { /* Allocate data queue set */ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data, IPOIB_DATA_NUM_CQES, + &ipoib_data_cq_op, IPOIB_DATA_NUM_SEND_WQES, IPOIB_DATA_NUM_RECV_WQES, IB_GLOBAL_QKEY ) ) != 0 ) { @@ -923,15 +1020,15 @@ static void ipoib_set_ib_params ( struct ipoib_device *ipoib ) { /* Calculate GID portion of MAC address based on port GID */ mac = ( ( struct ipoib_mac * ) netdev->ll_addr ); - memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) ); + memcpy ( &mac->gid, &ibdev->gid, sizeof ( mac->gid ) ); /* Calculate broadcast GID based on partition key */ - memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid, + memcpy ( &ipoib->broadcast_gid, &ipoib_broadcast.gid, sizeof ( ipoib->broadcast_gid ) ); ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey ); /* Set net device link state to reflect Infiniband link state */ - if ( ibdev->link_up ) { + if ( ib_link_ok ( ibdev ) ) { netdev_link_up ( netdev ); } else { netdev_link_down ( netdev ); |