summaryrefslogtreecommitdiff
path: root/gpxe/src/net/infiniband.c
diff options
context:
space:
mode:
Diffstat (limited to 'gpxe/src/net/infiniband.c')
-rw-r--r--gpxe/src/net/infiniband.c437
1 files changed, 437 insertions, 0 deletions
diff --git a/gpxe/src/net/infiniband.c b/gpxe/src/net/infiniband.c
new file mode 100644
index 00000000..39d11285
--- /dev/null
+++ b/gpxe/src/net/infiniband.c
@@ -0,0 +1,437 @@
+/*
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <assert.h>
+#include <gpxe/list.h>
+#include <gpxe/if_arp.h>
+#include <gpxe/netdevice.h>
+#include <gpxe/iobuf.h>
+#include <gpxe/ipoib.h>
+#include <gpxe/infiniband.h>
+
+/** @file
+ *
+ * Infiniband protocol
+ *
+ */
+
+/**
+ * Create completion queue
+ *
+ * @v ibdev Infiniband device
+ * @v num_cqes Number of completion queue entries
+ * @ret cq New completion queue
+ */
+struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev,
+ unsigned int num_cqes ) {
+ struct ib_completion_queue *cq;
+ int rc;
+
+ DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
+
+ /* Allocate and initialise data structure */
+ cq = zalloc ( sizeof ( *cq ) );
+ if ( ! cq )
+ return NULL;
+ cq->num_cqes = num_cqes;
+ INIT_LIST_HEAD ( &cq->work_queues );
+
+ /* Perform device-specific initialisation and get CQN */
+ if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not initialise completion "
+ "queue: %s\n", ibdev, strerror ( rc ) );
+ free ( cq );
+ return NULL;
+ }
+
+ DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
+ "with CQN %#lx\n", ibdev, num_cqes, cq,
+ ib_cq_get_drvdata ( cq ), cq->cqn );
+ return cq;
+}
+
+/**
+ * Destroy completion queue
+ *
+ * @v ibdev Infiniband device
+ * @v cq Completion queue
+ */
+void ib_destroy_cq ( struct ib_device *ibdev,
+ struct ib_completion_queue *cq ) {
+ DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
+ ibdev, cq->cqn );
+ assert ( list_empty ( &cq->work_queues ) );
+ ibdev->op->destroy_cq ( ibdev, cq );
+ free ( cq );
+}
+
+/**
+ * Create queue pair
+ *
+ * @v ibdev Infiniband device
+ * @v num_send_wqes Number of send work queue entries
+ * @v send_cq Send completion queue
+ * @v num_recv_wqes Number of receive work queue entries
+ * @v recv_cq Receive completion queue
+ * @v qkey Queue key
+ * @ret qp Queue pair
+ */
+struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
+ unsigned int num_send_wqes,
+ struct ib_completion_queue *send_cq,
+ unsigned int num_recv_wqes,
+ struct ib_completion_queue *recv_cq,
+ unsigned long qkey ) {
+ struct ib_queue_pair *qp;
+ size_t total_size;
+ int rc;
+
+ DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
+
+ /* Allocate and initialise data structure */
+ total_size = ( sizeof ( *qp ) +
+ ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
+ ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
+ qp = zalloc ( total_size );
+ if ( ! qp )
+ return NULL;
+ qp->qkey = qkey;
+ qp->send.qp = qp;
+ qp->send.is_send = 1;
+ qp->send.cq = send_cq;
+ list_add ( &qp->send.list, &send_cq->work_queues );
+ qp->send.num_wqes = num_send_wqes;
+ qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
+ qp->recv.qp = qp;
+ qp->recv.cq = recv_cq;
+ list_add ( &qp->recv.list, &recv_cq->work_queues );
+ qp->recv.num_wqes = num_recv_wqes;
+ qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
+ ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
+
+ /* Perform device-specific initialisation and get QPN */
+ if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
+ "%s\n", ibdev, strerror ( rc ) );
+ list_del ( &qp->send.list );
+ list_del ( &qp->recv.list );
+ free ( qp );
+ return NULL;
+ }
+
+ DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
+ ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
+ DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
+ ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
+ qp->recv.iobufs );
+ DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
+ ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
+ ( ( ( void * ) qp ) + total_size ) );
+ return qp;
+}
+
+/**
+ * Destroy queue pair
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ */
+void ib_destroy_qp ( struct ib_device *ibdev,
+ struct ib_queue_pair *qp ) {
+ DBGC ( ibdev, "IBDEV %p destroying queue pair %#lx\n",
+ ibdev, qp->qpn );
+ ibdev->op->destroy_qp ( ibdev, qp );
+ list_del ( &qp->send.list );
+ list_del ( &qp->recv.list );
+ free ( qp );
+}
+
+/**
+ * Find work queue belonging to completion queue
+ *
+ * @v cq Completion queue
+ * @v qpn Queue pair number
+ * @v is_send Find send work queue (rather than receive)
+ * @ret wq Work queue, or NULL if not found
+ */
+struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
+ unsigned long qpn, int is_send ) {
+ struct ib_work_queue *wq;
+
+ list_for_each_entry ( wq, &cq->work_queues, list ) {
+ if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
+ return wq;
+ }
+ return NULL;
+}
+
+/***************************************************************************
+ *
+ * Management datagram operations
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Get port information
+ *
+ * @v ibdev Infiniband device
+ * @v port_info Port information datagram to fill in
+ * @ret rc Return status code
+ */
+static int ib_get_port_info ( struct ib_device *ibdev,
+ struct ib_mad_port_info *port_info ) {
+ struct ib_mad_hdr *hdr = &port_info->mad_hdr;
+ int rc;
+
+ /* Construct MAD */
+ memset ( port_info, 0, sizeof ( *port_info ) );
+ hdr->base_version = IB_MGMT_BASE_VERSION;
+ hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ hdr->class_version = 1;
+ hdr->method = IB_MGMT_METHOD_GET;
+ hdr->attr_id = htons ( IB_SMP_ATTR_PORT_INFO );
+ hdr->attr_mod = htonl ( ibdev->port );
+
+ if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *port_info ) ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not get port info: %s\n",
+ ibdev, strerror ( rc ) );
+ return rc;
+ }
+ return 0;
+}
+
+/**
+ * Get GUID information
+ *
+ * @v ibdev Infiniband device
+ * @v guid_info GUID information datagram to fill in
+ * @ret rc Return status code
+ */
+static int ib_get_guid_info ( struct ib_device *ibdev,
+ struct ib_mad_guid_info *guid_info ) {
+ struct ib_mad_hdr *hdr = &guid_info->mad_hdr;
+ int rc;
+
+ /* Construct MAD */
+ memset ( guid_info, 0, sizeof ( *guid_info ) );
+ hdr->base_version = IB_MGMT_BASE_VERSION;
+ hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ hdr->class_version = 1;
+ hdr->method = IB_MGMT_METHOD_GET;
+ hdr->attr_id = htons ( IB_SMP_ATTR_GUID_INFO );
+
+ if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *guid_info ) ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not get GUID info: %s\n",
+ ibdev, strerror ( rc ) );
+ return rc;
+ }
+ return 0;
+}
+
+/**
+ * Get partition key table
+ *
+ * @v ibdev Infiniband device
+ * @v guid_info Partition key table datagram to fill in
+ * @ret rc Return status code
+ */
+static int ib_get_pkey_table ( struct ib_device *ibdev,
+ struct ib_mad_pkey_table *pkey_table ) {
+ struct ib_mad_hdr *hdr = &pkey_table->mad_hdr;
+ int rc;
+
+ /* Construct MAD */
+ memset ( pkey_table, 0, sizeof ( *pkey_table ) );
+ hdr->base_version = IB_MGMT_BASE_VERSION;
+ hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ hdr->class_version = 1;
+ hdr->method = IB_MGMT_METHOD_GET;
+ hdr->attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE );
+
+ if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *pkey_table ) ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not get pkey table: %s\n",
+ ibdev, strerror ( rc ) );
+ return rc;
+ }
+ return 0;
+}
+
+/**
+ * Wait for link up
+ *
+ * @v ibdev Infiniband device
+ * @ret rc Return status code
+ *
+ * This function shouldn't really exist. Unfortunately, IB links take
+ * a long time to come up, and we can't get various key parameters
+ * e.g. our own IPoIB MAC address without information from the subnet
+ * manager). We should eventually make link-up an asynchronous event.
+ */
+static int ib_wait_for_link ( struct ib_device *ibdev ) {
+ struct ib_mad_port_info port_info;
+ unsigned int retries;
+ int rc;
+
+ printf ( "Waiting for Infiniband link-up..." );
+ for ( retries = 20 ; retries ; retries-- ) {
+ if ( ( rc = ib_get_port_info ( ibdev, &port_info ) ) != 0 )
+ continue;
+ if ( ( ( port_info.port_state__link_speed_supported ) & 0xf )
+ == 4 ) {
+ printf ( "ok\n" );
+ return 0;
+ }
+ printf ( "." );
+ sleep ( 1 );
+ }
+ printf ( "failed\n" );
+ return -ENODEV;
+};
+
+/**
+ * Get MAD parameters
+ *
+ * @v ibdev Infiniband device
+ * @ret rc Return status code
+ */
+static int ib_get_mad_params ( struct ib_device *ibdev ) {
+ union {
+ /* This union exists just to save stack space */
+ struct ib_mad_port_info port_info;
+ struct ib_mad_guid_info guid_info;
+ struct ib_mad_pkey_table pkey_table;
+ } u;
+ int rc;
+
+ /* Port info gives us the first half of the port GID and the SM LID */
+ if ( ( rc = ib_get_port_info ( ibdev, &u.port_info ) ) != 0 )
+ return rc;
+ memcpy ( &ibdev->port_gid.u.bytes[0], u.port_info.gid_prefix, 8 );
+ ibdev->sm_lid = ntohs ( u.port_info.mastersm_lid );
+
+ /* GUID info gives us the second half of the port GID */
+ if ( ( rc = ib_get_guid_info ( ibdev, &u.guid_info ) ) != 0 )
+ return rc;
+ memcpy ( &ibdev->port_gid.u.bytes[8], u.guid_info.gid_local, 8 );
+
+ /* Get partition key */
+ if ( ( rc = ib_get_pkey_table ( ibdev, &u.pkey_table ) ) != 0 )
+ return rc;
+ ibdev->pkey = ntohs ( u.pkey_table.pkey[0][0] );
+
+ DBGC ( ibdev, "IBDEV %p port GID is %08lx:%08lx:%08lx:%08lx\n",
+ ibdev, htonl ( ibdev->port_gid.u.dwords[0] ),
+ htonl ( ibdev->port_gid.u.dwords[1] ),
+ htonl ( ibdev->port_gid.u.dwords[2] ),
+ htonl ( ibdev->port_gid.u.dwords[3] ) );
+
+ return 0;
+}
+
+/***************************************************************************
+ *
+ * Infiniband device creation/destruction
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Allocate Infiniband device
+ *
+ * @v priv_size Size of driver private data area
+ * @ret ibdev Infiniband device, or NULL
+ */
+struct ib_device * alloc_ibdev ( size_t priv_size ) {
+ struct ib_device *ibdev;
+ void *drv_priv;
+ size_t total_len;
+
+ total_len = ( sizeof ( *ibdev ) + priv_size );
+ ibdev = zalloc ( total_len );
+ if ( ibdev ) {
+ drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
+ ib_set_drvdata ( ibdev, drv_priv );
+ }
+ return ibdev;
+}
+
+/**
+ * Register Infiniband device
+ *
+ * @v ibdev Infiniband device
+ * @ret rc Return status code
+ */
+int register_ibdev ( struct ib_device *ibdev ) {
+ int rc;
+
+ /* Open link */
+ if ( ( rc = ib_open ( ibdev ) ) != 0 )
+ goto err_open;
+
+ /* Wait for link */
+ if ( ( rc = ib_wait_for_link ( ibdev ) ) != 0 )
+ goto err_wait_for_link;
+
+ /* Get MAD parameters */
+ if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 )
+ goto err_get_mad_params;
+
+ /* Add IPoIB device */
+ if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n",
+ ibdev, strerror ( rc ) );
+ goto err_ipoib_probe;
+ }
+
+ return 0;
+
+ err_ipoib_probe:
+ err_get_mad_params:
+ err_wait_for_link:
+ ib_close ( ibdev );
+ err_open:
+ return rc;
+}
+
+/**
+ * Unregister Infiniband device
+ *
+ * @v ibdev Infiniband device
+ */
+void unregister_ibdev ( struct ib_device *ibdev ) {
+ ipoib_remove ( ibdev );
+ ib_close ( ibdev );
+}
+
+/**
+ * Free Infiniband device
+ *
+ * @v ibdev Infiniband device
+ */
+void free_ibdev ( struct ib_device *ibdev ) {
+ free ( ibdev );
+}
+