diff options
Diffstat (limited to 'gpxe/src/net/infiniband.c')
-rw-r--r-- | gpxe/src/net/infiniband.c | 437 |
1 files changed, 437 insertions, 0 deletions
diff --git a/gpxe/src/net/infiniband.c b/gpxe/src/net/infiniband.c new file mode 100644 index 00000000..39d11285 --- /dev/null +++ b/gpxe/src/net/infiniband.c @@ -0,0 +1,437 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <byteswap.h> +#include <errno.h> +#include <assert.h> +#include <gpxe/list.h> +#include <gpxe/if_arp.h> +#include <gpxe/netdevice.h> +#include <gpxe/iobuf.h> +#include <gpxe/ipoib.h> +#include <gpxe/infiniband.h> + +/** @file + * + * Infiniband protocol + * + */ + +/** + * Create completion queue + * + * @v ibdev Infiniband device + * @v num_cqes Number of completion queue entries + * @ret cq New completion queue + */ +struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, + unsigned int num_cqes ) { + struct ib_completion_queue *cq; + int rc; + + DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev ); + + /* Allocate and initialise data structure */ + cq = zalloc ( sizeof ( *cq ) ); + if ( ! cq ) + return NULL; + cq->num_cqes = num_cqes; + INIT_LIST_HEAD ( &cq->work_queues ); + + /* Perform device-specific initialisation and get CQN */ + if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not initialise completion " + "queue: %s\n", ibdev, strerror ( rc ) ); + free ( cq ); + return NULL; + } + + DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) " + "with CQN %#lx\n", ibdev, num_cqes, cq, + ib_cq_get_drvdata ( cq ), cq->cqn ); + return cq; +} + +/** + * Destroy completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ +void ib_destroy_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n", + ibdev, cq->cqn ); + assert ( list_empty ( &cq->work_queues ) ); + ibdev->op->destroy_cq ( ibdev, cq ); + free ( cq ); +} + +/** + * Create queue pair + * + * @v ibdev Infiniband device + * @v num_send_wqes Number of send work queue entries + * @v send_cq Send completion queue + * @v num_recv_wqes Number of receive work queue entries + * @v recv_cq Receive completion queue + * @v qkey Queue key + * @ret qp Queue pair + */ +struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, + unsigned int num_send_wqes, + struct ib_completion_queue *send_cq, + unsigned int num_recv_wqes, + struct ib_completion_queue *recv_cq, + unsigned long qkey ) { + struct ib_queue_pair *qp; + size_t total_size; + int rc; + + DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev ); + + /* Allocate and initialise data structure */ + total_size = ( sizeof ( *qp ) + + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) + + ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); + qp = zalloc ( total_size ); + if ( ! qp ) + return NULL; + qp->qkey = qkey; + qp->send.qp = qp; + qp->send.is_send = 1; + qp->send.cq = send_cq; + list_add ( &qp->send.list, &send_cq->work_queues ); + qp->send.num_wqes = num_send_wqes; + qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) ); + qp->recv.qp = qp; + qp->recv.cq = recv_cq; + list_add ( &qp->recv.list, &recv_cq->work_queues ); + qp->recv.num_wqes = num_recv_wqes; + qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) + + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) )); + + /* Perform device-specific initialisation and get QPN */ + if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not initialise queue pair: " + "%s\n", ibdev, strerror ( rc ) ); + list_del ( &qp->send.list ); + list_del ( &qp->recv.list ); + free ( qp ); + return NULL; + } + + DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n", + ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn ); + DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n", + ibdev, qp->qpn, num_send_wqes, qp->send.iobufs, + qp->recv.iobufs ); + DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n", + ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs, + ( ( ( void * ) qp ) + total_size ) ); + return qp; +} + +/** + * Destroy queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ +void ib_destroy_qp ( struct ib_device *ibdev, + struct ib_queue_pair *qp ) { + DBGC ( ibdev, "IBDEV %p destroying queue pair %#lx\n", + ibdev, qp->qpn ); + ibdev->op->destroy_qp ( ibdev, qp ); + list_del ( &qp->send.list ); + list_del ( &qp->recv.list ); + free ( qp ); +} + +/** + * Find work queue belonging to completion queue + * + * @v cq Completion queue + * @v qpn Queue pair number + * @v is_send Find send work queue (rather than receive) + * @ret wq Work queue, or NULL if not found + */ +struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, + unsigned long qpn, int is_send ) { + struct ib_work_queue *wq; + + list_for_each_entry ( wq, &cq->work_queues, list ) { + if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) ) + return wq; + } + return NULL; +} + +/*************************************************************************** + * + * Management datagram operations + * + *************************************************************************** + */ + +/** + * Get port information + * + * @v ibdev Infiniband device + * @v port_info Port information datagram to fill in + * @ret rc Return status code + */ +static int ib_get_port_info ( struct ib_device *ibdev, + struct ib_mad_port_info *port_info ) { + struct ib_mad_hdr *hdr = &port_info->mad_hdr; + int rc; + + /* Construct MAD */ + memset ( port_info, 0, sizeof ( *port_info ) ); + hdr->base_version = IB_MGMT_BASE_VERSION; + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_PORT_INFO ); + hdr->attr_mod = htonl ( ibdev->port ); + + if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *port_info ) ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get port info: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get GUID information + * + * @v ibdev Infiniband device + * @v guid_info GUID information datagram to fill in + * @ret rc Return status code + */ +static int ib_get_guid_info ( struct ib_device *ibdev, + struct ib_mad_guid_info *guid_info ) { + struct ib_mad_hdr *hdr = &guid_info->mad_hdr; + int rc; + + /* Construct MAD */ + memset ( guid_info, 0, sizeof ( *guid_info ) ); + hdr->base_version = IB_MGMT_BASE_VERSION; + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_GUID_INFO ); + + if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *guid_info ) ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get GUID info: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get partition key table + * + * @v ibdev Infiniband device + * @v guid_info Partition key table datagram to fill in + * @ret rc Return status code + */ +static int ib_get_pkey_table ( struct ib_device *ibdev, + struct ib_mad_pkey_table *pkey_table ) { + struct ib_mad_hdr *hdr = &pkey_table->mad_hdr; + int rc; + + /* Construct MAD */ + memset ( pkey_table, 0, sizeof ( *pkey_table ) ); + hdr->base_version = IB_MGMT_BASE_VERSION; + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ); + + if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *pkey_table ) ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get pkey table: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Wait for link up + * + * @v ibdev Infiniband device + * @ret rc Return status code + * + * This function shouldn't really exist. Unfortunately, IB links take + * a long time to come up, and we can't get various key parameters + * e.g. our own IPoIB MAC address without information from the subnet + * manager). We should eventually make link-up an asynchronous event. + */ +static int ib_wait_for_link ( struct ib_device *ibdev ) { + struct ib_mad_port_info port_info; + unsigned int retries; + int rc; + + printf ( "Waiting for Infiniband link-up..." ); + for ( retries = 20 ; retries ; retries-- ) { + if ( ( rc = ib_get_port_info ( ibdev, &port_info ) ) != 0 ) + continue; + if ( ( ( port_info.port_state__link_speed_supported ) & 0xf ) + == 4 ) { + printf ( "ok\n" ); + return 0; + } + printf ( "." ); + sleep ( 1 ); + } + printf ( "failed\n" ); + return -ENODEV; +}; + +/** + * Get MAD parameters + * + * @v ibdev Infiniband device + * @ret rc Return status code + */ +static int ib_get_mad_params ( struct ib_device *ibdev ) { + union { + /* This union exists just to save stack space */ + struct ib_mad_port_info port_info; + struct ib_mad_guid_info guid_info; + struct ib_mad_pkey_table pkey_table; + } u; + int rc; + + /* Port info gives us the first half of the port GID and the SM LID */ + if ( ( rc = ib_get_port_info ( ibdev, &u.port_info ) ) != 0 ) + return rc; + memcpy ( &ibdev->port_gid.u.bytes[0], u.port_info.gid_prefix, 8 ); + ibdev->sm_lid = ntohs ( u.port_info.mastersm_lid ); + + /* GUID info gives us the second half of the port GID */ + if ( ( rc = ib_get_guid_info ( ibdev, &u.guid_info ) ) != 0 ) + return rc; + memcpy ( &ibdev->port_gid.u.bytes[8], u.guid_info.gid_local, 8 ); + + /* Get partition key */ + if ( ( rc = ib_get_pkey_table ( ibdev, &u.pkey_table ) ) != 0 ) + return rc; + ibdev->pkey = ntohs ( u.pkey_table.pkey[0][0] ); + + DBGC ( ibdev, "IBDEV %p port GID is %08lx:%08lx:%08lx:%08lx\n", + ibdev, htonl ( ibdev->port_gid.u.dwords[0] ), + htonl ( ibdev->port_gid.u.dwords[1] ), + htonl ( ibdev->port_gid.u.dwords[2] ), + htonl ( ibdev->port_gid.u.dwords[3] ) ); + + return 0; +} + +/*************************************************************************** + * + * Infiniband device creation/destruction + * + *************************************************************************** + */ + +/** + * Allocate Infiniband device + * + * @v priv_size Size of driver private data area + * @ret ibdev Infiniband device, or NULL + */ +struct ib_device * alloc_ibdev ( size_t priv_size ) { + struct ib_device *ibdev; + void *drv_priv; + size_t total_len; + + total_len = ( sizeof ( *ibdev ) + priv_size ); + ibdev = zalloc ( total_len ); + if ( ibdev ) { + drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) ); + ib_set_drvdata ( ibdev, drv_priv ); + } + return ibdev; +} + +/** + * Register Infiniband device + * + * @v ibdev Infiniband device + * @ret rc Return status code + */ +int register_ibdev ( struct ib_device *ibdev ) { + int rc; + + /* Open link */ + if ( ( rc = ib_open ( ibdev ) ) != 0 ) + goto err_open; + + /* Wait for link */ + if ( ( rc = ib_wait_for_link ( ibdev ) ) != 0 ) + goto err_wait_for_link; + + /* Get MAD parameters */ + if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 ) + goto err_get_mad_params; + + /* Add IPoIB device */ + if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n", + ibdev, strerror ( rc ) ); + goto err_ipoib_probe; + } + + return 0; + + err_ipoib_probe: + err_get_mad_params: + err_wait_for_link: + ib_close ( ibdev ); + err_open: + return rc; +} + +/** + * Unregister Infiniband device + * + * @v ibdev Infiniband device + */ +void unregister_ibdev ( struct ib_device *ibdev ) { + ipoib_remove ( ibdev ); + ib_close ( ibdev ); +} + +/** + * Free Infiniband device + * + * @v ibdev Infiniband device + */ +void free_ibdev ( struct ib_device *ibdev ) { + free ( ibdev ); +} + |