diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-12 16:11:45 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-12 16:11:45 -0700 |
commit | 48efe453e6b29561f78a1df55c7f58375259cb8c (patch) | |
tree | 53d6ac1f2010b102c15b264b13fc4c98ba634d48 | |
parent | ac4de9543aca59f2b763746647577302fbedd57e (diff) | |
parent | 2999ee7fda3f670effbfa746164c525f9d1be4b8 (diff) | |
download | linux-48efe453e6b29561f78a1df55c7f58375259cb8c.tar.gz |
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
Pull SCSI target updates from Nicholas Bellinger:
"Lots of activity again this round for I/O performance optimizations
(per-cpu IDA pre-allocation for vhost + iscsi/target), and the
addition of new fabric independent features to target-core
(COMPARE_AND_WRITE + EXTENDED_COPY).
The main highlights include:
- Support for iscsi-target login multiplexing across individual
network portals
- Generic Per-cpu IDA logic (kent + akpm + clameter)
- Conversion of vhost to use per-cpu IDA pre-allocation for
descriptors, SGLs and userspace page pointer list
- Conversion of iscsi-target + iser-target to use per-cpu IDA
pre-allocation for descriptors
- Add support for generic COMPARE_AND_WRITE (AtomicTestandSet)
emulation for virtual backend drivers
- Add support for generic EXTENDED_COPY (CopyOffload) emulation for
virtual backend drivers.
- Add support for fast memory registration mode to iser-target (Vu)
The patches to add COMPARE_AND_WRITE and EXTENDED_COPY support are of
particular significance, which make us the first and only open source
target to support the full set of VAAI primitives.
Currently Linux clients are lacking upstream support to actually
utilize these primitives. However, with server side support now in
place for folks like MKP + ZAB working on the client, this logic once
reserved for the highest end of storage arrays, can now be run in VMs
on their laptops"
* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending: (50 commits)
target/iscsi: Bump versions to v4.1.0
target: Update copyright ownership/year information to 2013
iscsi-target: Bump default TCP listen backlog to 256
target: Fix >= v3.9+ regression in PR APTPL + ALUA metadata write-out
iscsi-target; Bump default CmdSN Depth to 64
iscsi-target: Remove unnecessary wait_for_completion in iscsi_get_thread_set
iscsi-target: Add thread_set->ts_activate_sem + use common deallocate
iscsi-target: Fix race with thread_pre_handler flush_signals + ISCSI_THREAD_SET_DIE
target: remove unused including <linux/version.h>
iser-target: introduce fast memory registration mode (FRWR)
iser-target: generalize rdma memory registration and cleanup
iser-target: move rdma wr processing to a shared function
target: Enable global EXTENDED_COPY setup/release
target: Add Third Party Copy (3PC) bit in INQUIRY response
target: Enable EXTENDED_COPY setup in spc_parse_cdb
target: Add support for EXTENDED_COPY copy offload emulation
target: Avoid non-existent tg_pt_gp_mem in target_alua_state_check
target: Add global device list for EXTENDED_COPY
target: Make helpers non static for EXTENDED_COPY command setup
target: Make spc_parse_naa_6h_vendor_specific non static
...
61 files changed, 3487 insertions, 776 deletions
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 3f62041222f2..3591855cc5b5 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1,7 +1,7 @@ /******************************************************************************* * This file contains iSCSI extentions for RDMA (iSER) Verbs * - * (c) Copyright 2013 RisingTide Systems LLC. + * (c) Copyright 2013 Datera, Inc. * * Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -39,7 +39,17 @@ static DEFINE_MUTEX(device_list_mutex); static LIST_HEAD(device_list); static struct workqueue_struct *isert_rx_wq; static struct workqueue_struct *isert_comp_wq; -static struct kmem_cache *isert_cmd_cache; + +static void +isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); +static int +isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr); +static void +isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); +static int +isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr); static void isert_qp_event_callback(struct ib_event *e, void *context) @@ -80,14 +90,8 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) { struct isert_device *device = isert_conn->conn_device; struct ib_qp_init_attr attr; - struct ib_device_attr devattr; int ret, index, min_index = 0; - memset(&devattr, 0, sizeof(struct ib_device_attr)); - ret = isert_query_device(cma_id->device, &devattr); - if (ret) - return ret; - mutex_lock(&device_list_mutex); for (index = 0; index < device->cqs_used; index++) if (device->cq_active_qps[index] < @@ -108,7 +112,7 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) * FIXME: Use devattr.max_sge - 2 for max_send_sge as * work-around for RDMA_READ.. */ - attr.cap.max_send_sge = devattr.max_sge - 2; + attr.cap.max_send_sge = device->dev_attr.max_sge - 2; isert_conn->max_sge = attr.cap.max_send_sge; attr.cap.max_recv_sge = 1; @@ -210,14 +214,31 @@ isert_create_device_ib_res(struct isert_device *device) { struct ib_device *ib_dev = device->ib_device; struct isert_cq_desc *cq_desc; + struct ib_device_attr *dev_attr; int ret = 0, i, j; + dev_attr = &device->dev_attr; + ret = isert_query_device(ib_dev, dev_attr); + if (ret) + return ret; + + /* asign function handlers */ + if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { + device->use_frwr = 1; + device->reg_rdma_mem = isert_reg_rdma_frwr; + device->unreg_rdma_mem = isert_unreg_rdma_frwr; + } else { + device->use_frwr = 0; + device->reg_rdma_mem = isert_map_rdma; + device->unreg_rdma_mem = isert_unmap_cmd; + } + device->cqs_used = min_t(int, num_online_cpus(), device->ib_device->num_comp_vectors); device->cqs_used = min(ISERT_MAX_CQ, device->cqs_used); - pr_debug("Using %d CQs, device %s supports %d vectors\n", + pr_debug("Using %d CQs, device %s supports %d vectors support FRWR %d\n", device->cqs_used, device->ib_device->name, - device->ib_device->num_comp_vectors); + device->ib_device->num_comp_vectors, device->use_frwr); device->cq_desc = kzalloc(sizeof(struct isert_cq_desc) * device->cqs_used, GFP_KERNEL); if (!device->cq_desc) { @@ -363,6 +384,85 @@ isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id) return device; } +static void +isert_conn_free_frwr_pool(struct isert_conn *isert_conn) +{ + struct fast_reg_descriptor *fr_desc, *tmp; + int i = 0; + + if (list_empty(&isert_conn->conn_frwr_pool)) + return; + + pr_debug("Freeing conn %p frwr pool", isert_conn); + + list_for_each_entry_safe(fr_desc, tmp, + &isert_conn->conn_frwr_pool, list) { + list_del(&fr_desc->list); + ib_free_fast_reg_page_list(fr_desc->data_frpl); + ib_dereg_mr(fr_desc->data_mr); + kfree(fr_desc); + ++i; + } + + if (i < isert_conn->conn_frwr_pool_size) + pr_warn("Pool still has %d regions registered\n", + isert_conn->conn_frwr_pool_size - i); +} + +static int +isert_conn_create_frwr_pool(struct isert_conn *isert_conn) +{ + struct fast_reg_descriptor *fr_desc; + struct isert_device *device = isert_conn->conn_device; + int i, ret; + + INIT_LIST_HEAD(&isert_conn->conn_frwr_pool); + isert_conn->conn_frwr_pool_size = 0; + for (i = 0; i < ISCSI_DEF_XMIT_CMDS_MAX; i++) { + fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL); + if (!fr_desc) { + pr_err("Failed to allocate fast_reg descriptor\n"); + ret = -ENOMEM; + goto err; + } + + fr_desc->data_frpl = + ib_alloc_fast_reg_page_list(device->ib_device, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(fr_desc->data_frpl)) { + pr_err("Failed to allocate fr_pg_list err=%ld\n", + PTR_ERR(fr_desc->data_frpl)); + ret = PTR_ERR(fr_desc->data_frpl); + goto err; + } + + fr_desc->data_mr = ib_alloc_fast_reg_mr(device->dev_pd, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(fr_desc->data_mr)) { + pr_err("Failed to allocate frmr err=%ld\n", + PTR_ERR(fr_desc->data_mr)); + ret = PTR_ERR(fr_desc->data_mr); + ib_free_fast_reg_page_list(fr_desc->data_frpl); + goto err; + } + pr_debug("Create fr_desc %p page_list %p\n", + fr_desc, fr_desc->data_frpl->page_list); + + fr_desc->valid = true; + list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool); + isert_conn->conn_frwr_pool_size++; + } + + pr_debug("Creating conn %p frwr pool size=%d", + isert_conn, isert_conn->conn_frwr_pool_size); + + return 0; + +err: + isert_conn_free_frwr_pool(isert_conn); + return ret; +} + static int isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { @@ -389,6 +489,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) kref_init(&isert_conn->conn_kref); kref_get(&isert_conn->conn_kref); mutex_init(&isert_conn->conn_mutex); + spin_lock_init(&isert_conn->conn_lock); cma_id->context = isert_conn; isert_conn->conn_cm_id = cma_id; @@ -446,6 +547,14 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_conn->conn_pd = device->dev_pd; isert_conn->conn_mr = device->dev_mr; + if (device->use_frwr) { + ret = isert_conn_create_frwr_pool(isert_conn); + if (ret) { + pr_err("Conn: %p failed to create frwr_pool\n", isert_conn); + goto out_frwr; + } + } + ret = isert_conn_setup_qp(isert_conn, cma_id); if (ret) goto out_conn_dev; @@ -459,6 +568,9 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) return 0; out_conn_dev: + if (device->use_frwr) + isert_conn_free_frwr_pool(isert_conn); +out_frwr: isert_device_try_release(device); out_rsp_dma_map: ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma, @@ -482,6 +594,9 @@ isert_connect_release(struct isert_conn *isert_conn) pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); + if (device->use_frwr) + isert_conn_free_frwr_pool(isert_conn); + if (isert_conn->conn_qp) { cq_index = ((struct isert_cq_desc *) isert_conn->conn_qp->recv_cq->cq_context)->cq_index; @@ -869,46 +984,37 @@ isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen, size, rx_buflen, MAX_KEY_VALUE_PAIRS); memcpy(login->req_buf, &rx_desc->data[0], size); - complete(&isert_conn->conn_login_comp); -} - -static void -isert_release_cmd(struct iscsi_cmd *cmd) -{ - struct isert_cmd *isert_cmd = container_of(cmd, struct isert_cmd, - iscsi_cmd); - - pr_debug("Entering isert_release_cmd %p >>>>>>>>>>>>>>>.\n", isert_cmd); - - kfree(cmd->buf_ptr); - kfree(cmd->tmr_req); - - kmem_cache_free(isert_cmd_cache, isert_cmd); + if (login->first_request) { + complete(&isert_conn->conn_login_comp); + return; + } + schedule_delayed_work(&conn->login_work, 0); } static struct iscsi_cmd -*isert_alloc_cmd(struct iscsi_conn *conn, gfp_t gfp) +*isert_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp) { struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct isert_cmd *isert_cmd; + struct iscsi_cmd *cmd; - isert_cmd = kmem_cache_zalloc(isert_cmd_cache, gfp); - if (!isert_cmd) { - pr_err("Unable to allocate isert_cmd\n"); + cmd = iscsit_allocate_cmd(conn, gfp); + if (!cmd) { + pr_err("Unable to allocate iscsi_cmd + isert_cmd\n"); return NULL; } + isert_cmd = iscsit_priv_cmd(cmd); isert_cmd->conn = isert_conn; - isert_cmd->iscsi_cmd.release_cmd = &isert_release_cmd; + isert_cmd->iscsi_cmd = cmd; - return &isert_cmd->iscsi_cmd; + return cmd; } static int isert_handle_scsi_cmd(struct isert_conn *isert_conn, - struct isert_cmd *isert_cmd, struct iser_rx_desc *rx_desc, - unsigned char *buf) + struct isert_cmd *isert_cmd, struct iscsi_cmd *cmd, + struct iser_rx_desc *rx_desc, unsigned char *buf) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; struct iscsi_conn *conn = isert_conn->conn; struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)buf; struct scatterlist *sg; @@ -1015,9 +1121,9 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn, static int isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, - struct iser_rx_desc *rx_desc, unsigned char *buf) + struct iscsi_cmd *cmd, struct iser_rx_desc *rx_desc, + unsigned char *buf) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; struct iscsi_conn *conn = isert_conn->conn; struct iscsi_nopout *hdr = (struct iscsi_nopout *)buf; int rc; @@ -1034,9 +1140,9 @@ isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, static int isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, - struct iser_rx_desc *rx_desc, struct iscsi_text *hdr) + struct iscsi_cmd *cmd, struct iser_rx_desc *rx_desc, + struct iscsi_text *hdr) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; struct iscsi_conn *conn = isert_conn->conn; u32 payload_length = ntoh24(hdr->dlength); int rc; @@ -1081,26 +1187,26 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, switch (opcode) { case ISCSI_OP_SCSI_CMD: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; - isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd); + isert_cmd = iscsit_priv_cmd(cmd); isert_cmd->read_stag = read_stag; isert_cmd->read_va = read_va; isert_cmd->write_stag = write_stag; isert_cmd->write_va = write_va; - ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, + ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, cmd, rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_NOOP_OUT: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; - isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd); - ret = isert_handle_nop_out(isert_conn, isert_cmd, + isert_cmd = iscsit_priv_cmd(cmd); + ret = isert_handle_nop_out(isert_conn, isert_cmd, cmd, rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_SCSI_DATA_OUT: @@ -1108,7 +1214,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_SCSI_TMFUNC: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; @@ -1116,7 +1222,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_LOGOUT: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; @@ -1127,12 +1233,12 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, HZ); break; case ISCSI_OP_TEXT: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; - isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd); - ret = isert_handle_text_cmd(isert_conn, isert_cmd, + isert_cmd = iscsit_priv_cmd(cmd); + ret = isert_handle_text_cmd(isert_conn, isert_cmd, cmd, rx_desc, (struct iscsi_text *)hdr); break; default: @@ -1243,26 +1349,65 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; - pr_debug("isert_unmap_cmd >>>>>>>>>>>>>>>>>>>>>>>\n"); + pr_debug("isert_unmap_cmd: %p\n", isert_cmd); + if (wr->sge) { + pr_debug("isert_unmap_cmd: %p unmap_sg op\n", isert_cmd); + ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + wr->sge = NULL; + } + + if (wr->send_wr) { + pr_debug("isert_unmap_cmd: %p free send_wr\n", isert_cmd); + kfree(wr->send_wr); + wr->send_wr = NULL; + } + + if (wr->ib_sge) { + pr_debug("isert_unmap_cmd: %p free ib_sge\n", isert_cmd); + kfree(wr->ib_sge); + wr->ib_sge = NULL; + } +} + +static void +isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) +{ + struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + LIST_HEAD(unmap_list); + + pr_debug("unreg_frwr_cmd: %p\n", isert_cmd); + + if (wr->fr_desc) { + pr_debug("unreg_frwr_cmd: %p free fr_desc %p\n", + isert_cmd, wr->fr_desc); + spin_lock_bh(&isert_conn->conn_lock); + list_add_tail(&wr->fr_desc->list, &isert_conn->conn_frwr_pool); + spin_unlock_bh(&isert_conn->conn_lock); + wr->fr_desc = NULL; + } if (wr->sge) { - ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, DMA_TO_DEVICE); + pr_debug("unreg_frwr_cmd: %p unmap_sg op\n", isert_cmd); + ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); wr->sge = NULL; } - kfree(wr->send_wr); + wr->ib_sge = NULL; wr->send_wr = NULL; - - kfree(isert_cmd->ib_sge); - isert_cmd->ib_sge = NULL; } static void isert_put_cmd(struct isert_cmd *isert_cmd) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; struct isert_conn *isert_conn = isert_cmd->conn; struct iscsi_conn *conn = isert_conn->conn; + struct isert_device *device = isert_conn->conn_device; pr_debug("Entering isert_put_cmd: %p\n", isert_cmd); @@ -1276,7 +1421,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) if (cmd->data_direction == DMA_TO_DEVICE) iscsit_stop_dataout_timer(cmd); - isert_unmap_cmd(isert_cmd, isert_conn); + device->unreg_rdma_mem(isert_cmd, isert_conn); transport_generic_free_cmd(&cmd->se_cmd, 0); break; case ISCSI_OP_SCSI_TMFUNC: @@ -1311,7 +1456,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) * Fall-through */ default: - isert_release_cmd(cmd); + iscsit_release_cmd(cmd); break; } } @@ -1347,27 +1492,16 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd) { struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; struct se_cmd *se_cmd = &cmd->se_cmd; - struct ib_device *ib_dev = isert_cmd->conn->conn_cm_id->device; + struct isert_conn *isert_conn = isert_cmd->conn; + struct isert_device *device = isert_conn->conn_device; iscsit_stop_dataout_timer(cmd); + device->unreg_rdma_mem(isert_cmd, isert_conn); + cmd->write_data_done = wr->cur_rdma_length; - if (wr->sge) { - pr_debug("isert_do_rdma_read_comp: Unmapping wr->sge from t_data_sg\n"); - ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, DMA_TO_DEVICE); - wr->sge = NULL; - } - - if (isert_cmd->ib_sge) { - pr_debug("isert_do_rdma_read_comp: Freeing isert_cmd->ib_sge\n"); - kfree(isert_cmd->ib_sge); - isert_cmd->ib_sge = NULL; - } - - cmd->write_data_done = se_cmd->data_length; - - pr_debug("isert_do_rdma_read_comp, calling target_execute_cmd\n"); + pr_debug("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd); spin_lock_bh(&cmd->istate_lock); cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT; cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT; @@ -1383,7 +1517,7 @@ isert_do_control_comp(struct work_struct *work) struct isert_cmd, comp_work); struct isert_conn *isert_conn = isert_cmd->conn; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; switch (cmd->i_state) { case ISTATE_SEND_TASKMGTRSP: @@ -1429,7 +1563,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn, struct ib_device *ib_dev) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; if (cmd->i_state == ISTATE_SEND_TASKMGTRSP || cmd->i_state == ISTATE_SEND_LOGOUTRSP || @@ -1621,8 +1755,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd) static int isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; struct iscsi_scsi_rsp *hdr = (struct iscsi_scsi_rsp *) @@ -1671,8 +1804,7 @@ static int isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn, bool nopout_response) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; @@ -1691,8 +1823,7 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn, static int isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; @@ -1710,8 +1841,7 @@ isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) static int isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; @@ -1729,8 +1859,7 @@ isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) static int isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; @@ -1762,8 +1891,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) static int isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; struct iscsi_text_rsp *hdr = @@ -1805,7 +1933,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, struct ib_sge *ib_sge, struct ib_send_wr *send_wr, u32 data_left, u32 offset) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; struct scatterlist *sg_start, *tmp_sg; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; u32 sg_off, page_off; @@ -1832,8 +1960,8 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, ib_sg_dma_len(ib_dev, tmp_sg) - page_off); ib_sge->lkey = isert_conn->conn_mr->lkey; - pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u\n", - ib_sge->addr, ib_sge->length); + pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u lkey: %08x\n", + ib_sge->addr, ib_sge->length, ib_sge->lkey); page_off = 0; data_left -= ib_sge->length; ib_sge++; @@ -1847,200 +1975,373 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, } static int -isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) +isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr) { struct se_cmd *se_cmd = &cmd->se_cmd; - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); - struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; - struct ib_send_wr *wr_failed, *send_wr; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct ib_send_wr *send_wr; struct ib_sge *ib_sge; - struct scatterlist *sg; - u32 offset = 0, data_len, data_left, rdma_write_max; - int rc, ret = 0, count, sg_nents, i, ib_sge_cnt; - - pr_debug("RDMA_WRITE: data_length: %u\n", se_cmd->data_length); + struct scatterlist *sg_start; + u32 sg_off = 0, sg_nents; + u32 offset = 0, data_len, data_left, rdma_write_max, va_offset = 0; + int ret = 0, count, i, ib_sge_cnt; + + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { + data_left = se_cmd->data_length; + iscsit_increment_maxcmdsn(cmd, conn->sess); + cmd->stat_sn = conn->stat_sn++; + } else { + sg_off = cmd->write_data_done / PAGE_SIZE; + data_left = se_cmd->data_length - cmd->write_data_done; + offset = cmd->write_data_done; + isert_cmd->tx_desc.isert_cmd = isert_cmd; + } - sg = &se_cmd->t_data_sg[0]; - sg_nents = se_cmd->t_data_nents; + sg_start = &cmd->se_cmd.t_data_sg[sg_off]; + sg_nents = se_cmd->t_data_nents - sg_off; - count = ib_dma_map_sg(ib_dev, sg, sg_nents, DMA_TO_DEVICE); + count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); if (unlikely(!count)) { - pr_err("Unable to map put_datain SGs\n"); + pr_err("Cmd: %p unrable to map SGs\n", isert_cmd); return -EINVAL; } - wr->sge = sg; + wr->sge = sg_start; wr->num_sge = sg_nents; - pr_debug("Mapped IB count: %u sg: %p sg_nents: %u for RDMA_WRITE\n", - count, sg, sg_nents); + wr->cur_rdma_length = data_left; + pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", + isert_cmd, count, sg_start, sg_nents, data_left); ib_sge = kzalloc(sizeof(struct ib_sge) * sg_nents, GFP_KERNEL); if (!ib_sge) { - pr_warn("Unable to allocate datain ib_sge\n"); + pr_warn("Unable to allocate ib_sge\n"); ret = -ENOMEM; goto unmap_sg; } - isert_cmd->ib_sge = ib_sge; - - pr_debug("Allocated ib_sge: %p from t_data_ents: %d for RDMA_WRITE\n", - ib_sge, se_cmd->t_data_nents); + wr->ib_sge = ib_sge; wr->send_wr_num = DIV_ROUND_UP(sg_nents, isert_conn->max_sge); wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, GFP_KERNEL); if (!wr->send_wr) { - pr_err("Unable to allocate wr->send_wr\n"); + pr_debug("Unable to allocate wr->send_wr\n"); ret = -ENOMEM; goto unmap_sg; } - pr_debug("Allocated wr->send_wr: %p wr->send_wr_num: %u\n", - wr->send_wr, wr->send_wr_num); - - iscsit_increment_maxcmdsn(cmd, conn->sess); - cmd->stat_sn = conn->stat_sn++; wr->isert_cmd = isert_cmd; rdma_write_max = isert_conn->max_sge * PAGE_SIZE; - data_left = se_cmd->data_length; for (i = 0; i < wr->send_wr_num; i++) { send_wr = &isert_cmd->rdma_wr.send_wr[i]; data_len = min(data_left, rdma_write_max); - send_wr->opcode = IB_WR_RDMA_WRITE; send_wr->send_flags = 0; - send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset; - send_wr->wr.rdma.rkey = isert_cmd->read_stag; + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { + send_wr->opcode = IB_WR_RDMA_WRITE; + send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset; + send_wr->wr.rdma.rkey = isert_cmd->read_stag; + if (i + 1 == wr->send_wr_num) + send_wr->next = &isert_cmd->tx_desc.send_wr; + else + send_wr->next = &wr->send_wr[i + 1]; + } else { + send_wr->opcode = IB_WR_RDMA_READ; + send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset; + send_wr->wr.rdma.rkey = isert_cmd->write_stag; + if (i + 1 == wr->send_wr_num) + send_wr->send_flags = IB_SEND_SIGNALED; + else + send_wr->next = &wr->send_wr[i + 1]; + } ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge, send_wr, data_len, offset); ib_sge += ib_sge_cnt; - if (i + 1 == wr->send_wr_num) - send_wr->next = &isert_cmd->tx_desc.send_wr; - else - send_wr->next = &wr->send_wr[i + 1]; - offset += data_len; + va_offset += data_len; data_left -= data_len; } - /* - * Build isert_conn->tx_desc for iSCSI response PDU and attach - */ - isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); - iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *) - &isert_cmd->tx_desc.iscsi_header); - isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr); - atomic_inc(&isert_conn->post_send_buf_count); + return 0; +unmap_sg: + ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + return ret; +} - rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); - if (rc) { - pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); - atomic_dec(&isert_conn->post_send_buf_count); +static int +isert_map_fr_pagelist(struct ib_device *ib_dev, + struct scatterlist *sg_start, int sg_nents, u64 *fr_pl) +{ + u64 start_addr, end_addr, page, chunk_start = 0; + struct scatterlist *tmp_sg; + int i = 0, new_chunk, last_ent, n_pages; + + n_pages = 0; + new_chunk = 1; + last_ent = sg_nents - 1; + for_each_sg(sg_start, tmp_sg, sg_nents, i) { + start_addr = ib_sg_dma_address(ib_dev, tmp_sg); + if (new_chunk) + chunk_start = start_addr; + end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg); + + pr_debug("SGL[%d] dma_addr: 0x%16llx len: %u\n", + i, (unsigned long long)tmp_sg->dma_address, + tmp_sg->length); + + if ((end_addr & ~PAGE_MASK) && i < last_ent) { + new_chunk = 0; + continue; + } + new_chunk = 1; + + page = chunk_start & PAGE_MASK; + do { + fr_pl[n_pages++] = page; + pr_debug("Mapped page_list[%d] page_addr: 0x%16llx\n", + n_pages - 1, page); + page += PAGE_SIZE; + } while (page < end_addr); } - pr_debug("Posted RDMA_WRITE + Response for iSER Data READ\n"); - return 1; -unmap_sg: - ib_dma_unmap_sg(ib_dev, sg, sg_nents, DMA_TO_DEVICE); + return n_pages; +} + +static int +isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, + struct isert_cmd *isert_cmd, struct isert_conn *isert_conn, + struct ib_sge *ib_sge, u32 offset, unsigned int data_len) +{ + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct scatterlist *sg_start; + u32 sg_off, page_off; + struct ib_send_wr fr_wr, inv_wr; + struct ib_send_wr *bad_wr, *wr = NULL; + u8 key; + int ret, sg_nents, pagelist_len; + + sg_off = offset / PAGE_SIZE; + sg_start = &cmd->se_cmd.t_data_sg[sg_off]; + sg_nents = min_t(unsigned int, cmd->se_cmd.t_data_nents - sg_off, + ISCSI_ISER_SG_TABLESIZE); + page_off = offset % PAGE_SIZE; + + pr_debug("Cmd: %p use fr_desc %p sg_nents %d sg_off %d offset %u\n", + isert_cmd, fr_desc, sg_nents, sg_off, offset); + + pagelist_len = isert_map_fr_pagelist(ib_dev, sg_start, sg_nents, + &fr_desc->data_frpl->page_list[0]); + + if (!fr_desc->valid) { + memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.ex.invalidate_rkey = fr_desc->data_mr->rkey; + wr = &inv_wr; + /* Bump the key */ + key = (u8)(fr_desc->data_mr->rkey & 0x000000FF); + ib_update_fast_reg_key(fr_desc->data_mr, ++key); + } + + /* Prepare FASTREG WR */ + memset(&fr_wr, 0, sizeof(fr_wr)); + fr_wr.opcode = IB_WR_FAST_REG_MR; + fr_wr.wr.fast_reg.iova_start = + fr_desc->data_frpl->page_list[0] + page_off; + fr_wr.wr.fast_reg.page_list = fr_desc->data_frpl; + fr_wr.wr.fast_reg.page_list_len = pagelist_len; + fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + fr_wr.wr.fast_reg.length = data_len; + fr_wr.wr.fast_reg.rkey = fr_desc->data_mr->rkey; + fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE; + + if (!wr) + wr = &fr_wr; + else + wr->next = &fr_wr; + + ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr); + if (ret) { + pr_err("fast registration failed, ret:%d\n", ret); + return ret; + } + fr_desc->valid = false; + + ib_sge->lkey = fr_desc->data_mr->lkey; + ib_sge->addr = fr_desc->data_frpl->page_list[0] + page_off; + ib_sge->length = data_len; + + pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u lkey: %08x\n", + ib_sge->addr, ib_sge->length, ib_sge->lkey); + return ret; } static int -isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) +isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr) { struct se_cmd *se_cmd = &cmd->se_cmd; - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); - struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; - struct ib_send_wr *wr_failed, *send_wr; - struct ib_sge *ib_sge; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct ib_send_wr *send_wr; + struct ib_sge *ib_sge; struct scatterlist *sg_start; - u32 sg_off, sg_nents, page_off, va_offset = 0; + struct fast_reg_descriptor *fr_desc; + u32 sg_off = 0, sg_nents; u32 offset = 0, data_len, data_left, rdma_write_max; - int rc, ret = 0, count, i, ib_sge_cnt; + int ret = 0, count; + unsigned long flags; - pr_debug("RDMA_READ: data_length: %u write_data_done: %u\n", - se_cmd->data_length, cmd->write_data_done); + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { + data_left = se_cmd->data_length; + iscsit_increment_maxcmdsn(cmd, conn->sess); + cmd->stat_sn = conn->stat_sn++; + } else { + sg_off = cmd->write_data_done / PAGE_SIZE; + data_left = se_cmd->data_length - cmd->write_data_done; + offset = cmd->write_data_done; + isert_cmd->tx_desc.isert_cmd = isert_cmd; + } - sg_off = cmd->write_data_done / PAGE_SIZE; sg_start = &cmd->se_cmd.t_data_sg[sg_off]; - page_off = cmd->write_data_done % PAGE_SIZE; - - pr_debug("RDMA_READ: sg_off: %d, sg_start: %p page_off: %d\n", - sg_off, sg_start, page_off); - - data_left = se_cmd->data_length - cmd->write_data_done; sg_nents = se_cmd->t_data_nents - sg_off; - pr_debug("RDMA_READ: data_left: %d, sg_nents: %d\n", - data_left, sg_nents); - - count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, DMA_FROM_DEVICE); + count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); if (unlikely(!count)) { - pr_err("Unable to map get_dataout SGs\n"); + pr_err("Cmd: %p unrable to map SGs\n", isert_cmd); return -EINVAL; } wr->sge = sg_start; wr->num_sge = sg_nents; - pr_debug("Mapped IB count: %u sg_start: %p sg_nents: %u for RDMA_READ\n", - count, sg_start, sg_nents); + pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", + isert_cmd, count, sg_start, sg_nents, data_left); - ib_sge = kzalloc(sizeof(struct ib_sge) * sg_nents, GFP_KERNEL); - if (!ib_sge) { - pr_warn("Unable to allocate dataout ib_sge\n"); - ret = -ENOMEM; - goto unmap_sg; + memset(&wr->s_ib_sge, 0, sizeof(*ib_sge)); + ib_sge = &wr->s_ib_sge; + wr->ib_sge = ib_sge; + + wr->send_wr_num = 1; + memset(&wr->s_send_wr, 0, sizeof(*send_wr)); + wr->send_wr = &wr->s_send_wr; + + wr->isert_cmd = isert_cmd; + rdma_write_max = ISCSI_ISER_SG_TABLESIZE * PAGE_SIZE; + + send_wr = &isert_cmd->rdma_wr.s_send_wr; + send_wr->sg_list = ib_sge; + send_wr->num_sge = 1; + send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc; + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { + send_wr->opcode = IB_WR_RDMA_WRITE; + send_wr->wr.rdma.remote_addr = isert_cmd->read_va; + send_wr->wr.rdma.rkey = isert_cmd->read_stag; + send_wr->send_flags = 0; + send_wr->next = &isert_cmd->tx_desc.send_wr; + } else { + send_wr->opcode = IB_WR_RDMA_READ; + send_wr->wr.rdma.remote_addr = isert_cmd->write_va; + send_wr->wr.rdma.rkey = isert_cmd->write_stag; + send_wr->send_flags = IB_SEND_SIGNALED; } - isert_cmd->ib_sge = ib_sge; - pr_debug("Using ib_sge: %p from sg_ents: %d for RDMA_READ\n", - ib_sge, sg_nents); + data_len = min(data_left, rdma_write_max); + wr->cur_rdma_length = data_len; - wr->send_wr_num = DIV_ROUND_UP(sg_nents, isert_conn->max_sge); - wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, - GFP_KERNEL); - if (!wr->send_wr) { - pr_debug("Unable to allocate wr->send_wr\n"); - ret = -ENOMEM; + spin_lock_irqsave(&isert_conn->conn_lock, flags); + fr_desc = list_first_entry(&isert_conn->conn_frwr_pool, + struct fast_reg_descriptor, list); + list_del(&fr_desc->list); + spin_unlock_irqrestore(&isert_conn->conn_lock, flags); + wr->fr_desc = fr_desc; + + ret = isert_fast_reg_mr(fr_desc, isert_cmd, isert_conn, + ib_sge, offset, data_len); + if (ret) { + list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool); goto unmap_sg; } - pr_debug("Allocated wr->send_wr: %p wr->send_wr_num: %u\n", - wr->send_wr, wr->send_wr_num); - isert_cmd->tx_desc.isert_cmd = isert_cmd; + return 0; - wr->iser_ib_op = ISER_IB_RDMA_READ; - wr->isert_cmd = isert_cmd; - rdma_write_max = isert_conn->max_sge * PAGE_SIZE; - offset = cmd->write_data_done; +unmap_sg: + ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + return ret; +} - for (i = 0; i < wr->send_wr_num; i++) { - send_wr = &isert_cmd->rdma_wr.send_wr[i]; - data_len = min(data_left, rdma_write_max); +static int +isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) +{ + struct se_cmd *se_cmd = &cmd->se_cmd; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct isert_conn *isert_conn = (struct isert_conn *)conn->context; + struct isert_device *device = isert_conn->conn_device; + struct ib_send_wr *wr_failed; + int rc; - send_wr->opcode = IB_WR_RDMA_READ; - send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset; - send_wr->wr.rdma.rkey = isert_cmd->write_stag; + pr_debug("Cmd: %p RDMA_WRITE data_length: %u\n", + isert_cmd, se_cmd->data_length); + wr->iser_ib_op = ISER_IB_RDMA_WRITE; + rc = device->reg_rdma_mem(conn, cmd, wr); + if (rc) { + pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd); + return rc; + } - ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge, - send_wr, data_len, offset); - ib_sge += ib_sge_cnt; + /* + * Build isert_conn->tx_desc for iSCSI response PDU and attach + */ + isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); + iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *) + &isert_cmd->tx_desc.iscsi_header); + isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); + isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr); - if (i + 1 == wr->send_wr_num) - send_wr->send_flags = IB_SEND_SIGNALED; - else - send_wr->next = &wr->send_wr[i + 1]; + atomic_inc(&isert_conn->post_send_buf_count); - offset += data_len; - va_offset += data_len; - data_left -= data_len; + rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); + if (rc) { + pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); + atomic_dec(&isert_conn->post_send_buf_count); + } + pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data READ\n", + isert_cmd); + + return 1; +} + +static int +isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) +{ + struct se_cmd *se_cmd = &cmd->se_cmd; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct isert_conn *isert_conn = (struct isert_conn *)conn->context; + struct isert_device *device = isert_conn->conn_device; + struct ib_send_wr *wr_failed; + int rc; + + pr_debug("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n", + isert_cmd, se_cmd->data_length, cmd->write_data_done); + wr->iser_ib_op = ISER_IB_RDMA_READ; + rc = device->reg_rdma_mem(conn, cmd, wr); + if (rc) { + pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd); + return rc; } atomic_inc(&isert_conn->post_send_buf_count); @@ -2050,12 +2351,10 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); atomic_dec(&isert_conn->post_send_buf_count); } - pr_debug("Posted RDMA_READ memory for ISER Data WRITE\n"); - return 0; + pr_debug("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n", + isert_cmd); -unmap_sg: - ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, DMA_FROM_DEVICE); - return ret; + return 0; } static int @@ -2224,6 +2523,14 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login) int ret; pr_debug("isert_get_login_rx before conn_login_comp conn: %p\n", conn); + /* + * For login requests after the first PDU, isert_rx_login_req() will + * kick schedule_delayed_work(&conn->login_work) as the packet is + * received, which turns this callback from iscsi_target_do_login_rx() + * into a NOP. + */ + if (!login->first_request) + return 0; ret = wait_for_completion_interruptible(&isert_conn->conn_login_comp); if (ret) @@ -2393,12 +2700,12 @@ static void isert_free_conn(struct iscsi_conn *conn) static struct iscsit_transport iser_target_transport = { .name = "IB/iSER", .transport_type = ISCSI_INFINIBAND, + .priv_size = sizeof(struct isert_cmd), .owner = THIS_MODULE, .iscsit_setup_np = isert_setup_np, .iscsit_accept_np = isert_accept_np, .iscsit_free_np = isert_free_np, .iscsit_free_conn = isert_free_conn, - .iscsit_alloc_cmd = isert_alloc_cmd, .iscsit_get_login_rx = isert_get_login_rx, .iscsit_put_login_tx = isert_put_login_tx, .iscsit_immediate_queue = isert_immediate_queue, @@ -2425,21 +2732,10 @@ static int __init isert_init(void) goto destroy_rx_wq; } - isert_cmd_cache = kmem_cache_create("isert_cmd_cache", - sizeof(struct isert_cmd), __alignof__(struct isert_cmd), - 0, NULL); - if (!isert_cmd_cache) { - pr_err("Unable to create isert_cmd_cache\n"); - ret = -ENOMEM; - goto destroy_tx_cq; - } - iscsit_register_transport(&iser_target_transport); pr_debug("iSER_TARGET[0] - Loaded iser_target_transport\n"); return 0; -destroy_tx_cq: - destroy_workqueue(isert_comp_wq); destroy_rx_wq: destroy_workqueue(isert_rx_wq); return ret; @@ -2447,7 +2743,6 @@ destroy_rx_wq: static void __exit isert_exit(void) { - kmem_cache_destroy(isert_cmd_cache); destroy_workqueue(isert_comp_wq); destroy_workqueue(isert_rx_wq); iscsit_unregister_transport(&iser_target_transport); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 191117b5b508..631f2090f0b8 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -5,6 +5,7 @@ #include <rdma/rdma_cm.h> #define ISERT_RDMA_LISTEN_BACKLOG 10 +#define ISCSI_ISER_SG_TABLESIZE 256 enum isert_desc_type { ISCSI_TX_CONTROL, @@ -45,15 +46,26 @@ struct iser_tx_desc { struct ib_send_wr send_wr; } __packed; +struct fast_reg_descriptor { + struct list_head list; + struct ib_mr *data_mr; + struct ib_fast_reg_page_list *data_frpl; + bool valid; +}; + struct isert_rdma_wr { struct list_head wr_list; struct isert_cmd *isert_cmd; enum iser_ib_op_code iser_ib_op; struct ib_sge *ib_sge; + struct ib_sge s_ib_sge; int num_sge; struct scatterlist *sge; int send_wr_num; struct ib_send_wr *send_wr; + struct ib_send_wr s_send_wr; + u32 cur_rdma_length; + struct fast_reg_descriptor *fr_desc; }; struct isert_cmd { @@ -67,8 +79,7 @@ struct isert_cmd { u32 write_va_off; u32 rdma_wr_num; struct isert_conn *conn; - struct iscsi_cmd iscsi_cmd; - struct ib_sge *ib_sge; + struct iscsi_cmd *iscsi_cmd; struct iser_tx_desc tx_desc; struct isert_rdma_wr rdma_wr; struct work_struct comp_work; @@ -106,6 +117,10 @@ struct isert_conn { wait_queue_head_t conn_wait; wait_queue_head_t conn_wait_comp_err; struct kref conn_kref; + struct list_head conn_frwr_pool; + int conn_frwr_pool_size; + /* lock to protect frwr_pool */ + spinlock_t conn_lock; }; #define ISERT_MAX_CQ 64 @@ -118,6 +133,7 @@ struct isert_cq_desc { }; struct isert_device { + int use_frwr; int cqs_used; int refcount; int cq_active_qps[ISERT_MAX_CQ]; @@ -128,6 +144,12 @@ struct isert_device { struct ib_cq *dev_tx_cq[ISERT_MAX_CQ]; struct isert_cq_desc *cq_desc; struct list_head dev_node; + struct ib_device_attr dev_attr; + int (*reg_rdma_mem)(struct iscsi_conn *conn, + struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr); + void (*unreg_rdma_mem)(struct isert_cmd *isert_cmd, + struct isert_conn *isert_conn); }; struct isert_np { diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index ff12d4677cc4..596480022b0a 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -10,7 +10,7 @@ * * Forward port and refactoring to modern qla2xxx and target/configfs * - * Copyright (C) 2010-2011 Nicholas A. Bellinger <nab@kernel.org> + * Copyright (C) 2010-2013 Nicholas A. Bellinger <nab@kernel.org> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index a6da313e253b..f85b9e5c1f05 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -2,12 +2,9 @@ * This file contains tcm implementation using v4 configfs fabric infrastructure * for QLogic target mode HBAs * - * ?? Copyright 2010-2011 RisingTide Systems LLC. + * (c) Copyright 2010-2013 Datera, Inc. * - * Licensed to the Linux Foundation under the General Public License (GPL) - * version 2. - * - * Author: Nicholas A. Bellinger <nab@risingtidesystems.com> + * Author: Nicholas A. Bellinger <nab@daterainc.com> * * tcm_qla2xxx_parse_wwn() and tcm_qla2xxx_format_wwn() contains code from * the TCM_FC / Open-FCoE.org fabric module. @@ -360,6 +357,14 @@ static int tcm_qla2xxx_check_prod_write_protect(struct se_portal_group *se_tpg) return QLA_TPG_ATTRIB(tpg)->prod_mode_write_protect; } +static int tcm_qla2xxx_check_demo_mode_login_only(struct se_portal_group *se_tpg) +{ + struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, + struct tcm_qla2xxx_tpg, se_tpg); + + return QLA_TPG_ATTRIB(tpg)->demo_mode_login_only; +} + static struct se_node_acl *tcm_qla2xxx_alloc_fabric_acl( struct se_portal_group *se_tpg) { @@ -489,38 +494,13 @@ static u32 tcm_qla2xxx_sess_get_index(struct se_session *se_sess) return 0; } -/* - * The LIO target core uses DMA_TO_DEVICE to mean that data is going - * to the target (eg handling a WRITE) and DMA_FROM_DEVICE to mean - * that data is coming from the target (eg handling a READ). However, - * this is just the opposite of what we have to tell the DMA mapping - * layer -- eg when handling a READ, the HBA will have to DMA the data - * out of memory so it can send it to the initiator, which means we - * need to use DMA_TO_DEVICE when we map the data. - */ -static enum dma_data_direction tcm_qla2xxx_mapping_dir(struct se_cmd *se_cmd) -{ - if (se_cmd->se_cmd_flags & SCF_BIDI) - return DMA_BIDIRECTIONAL; - - switch (se_cmd->data_direction) { - case DMA_TO_DEVICE: - return DMA_FROM_DEVICE; - case DMA_FROM_DEVICE: - return DMA_TO_DEVICE; - case DMA_NONE: - default: - return DMA_NONE; - } -} - static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); cmd->bufflen = se_cmd->data_length; - cmd->dma_data_direction = tcm_qla2xxx_mapping_dir(se_cmd); + cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); cmd->sg_cnt = se_cmd->t_data_nents; cmd->sg = se_cmd->t_data_sg; @@ -656,7 +636,7 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd) struct qla_tgt_cmd, se_cmd); cmd->bufflen = se_cmd->data_length; - cmd->dma_data_direction = tcm_qla2xxx_mapping_dir(se_cmd); + cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); cmd->aborted = (se_cmd->transport_state & CMD_T_ABORTED); cmd->sg_cnt = se_cmd->t_data_nents; @@ -680,7 +660,7 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd) cmd->sg = NULL; cmd->sg_cnt = 0; cmd->offset = 0; - cmd->dma_data_direction = tcm_qla2xxx_mapping_dir(se_cmd); + cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); cmd->aborted = (se_cmd->transport_state & CMD_T_ABORTED); if (se_cmd->data_direction == DMA_FROM_DEVICE) { @@ -939,11 +919,19 @@ DEF_QLA_TPG_ATTR_BOOL(prod_mode_write_protect); DEF_QLA_TPG_ATTRIB(prod_mode_write_protect); QLA_TPG_ATTR(prod_mode_write_protect, S_IRUGO | S_IWUSR); +/* + * Define tcm_qla2xxx_tpg_attrib_s_demo_mode_login_only + */ +DEF_QLA_TPG_ATTR_BOOL(demo_mode_login_only); +DEF_QLA_TPG_ATTRIB(demo_mode_login_only); +QLA_TPG_ATTR(demo_mode_login_only, S_IRUGO | S_IWUSR); + static struct configfs_attribute *tcm_qla2xxx_tpg_attrib_attrs[] = { &tcm_qla2xxx_tpg_attrib_generate_node_acls.attr, &tcm_qla2xxx_tpg_attrib_cache_dynamic_acls.attr, &tcm_qla2xxx_tpg_attrib_demo_mode_write_protect.attr, &tcm_qla2xxx_tpg_attrib_prod_mode_write_protect.attr, + &tcm_qla2xxx_tpg_attrib_demo_mode_login_only.attr, NULL, }; @@ -1042,6 +1030,7 @@ static struct se_portal_group *tcm_qla2xxx_make_tpg( QLA_TPG_ATTRIB(tpg)->generate_node_acls = 1; QLA_TPG_ATTRIB(tpg)->demo_mode_write_protect = 1; QLA_TPG_ATTRIB(tpg)->cache_dynamic_acls = 1; + QLA_TPG_ATTRIB(tpg)->demo_mode_login_only = 1; ret = core_tpg_register(&tcm_qla2xxx_fabric_configfs->tf_ops, wwn, &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); @@ -1736,7 +1725,7 @@ static struct target_core_fabric_ops tcm_qla2xxx_ops = { tcm_qla2xxx_check_demo_write_protect, .tpg_check_prod_mode_write_protect = tcm_qla2xxx_check_prod_write_protect, - .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_true, + .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_demo_mode_login_only, .tpg_alloc_fabric_acl = tcm_qla2xxx_alloc_fabric_acl, .tpg_release_fabric_acl = tcm_qla2xxx_release_fabric_acl, .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, @@ -1784,7 +1773,7 @@ static struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .tpg_check_demo_mode_cache = tcm_qla2xxx_check_true, .tpg_check_demo_mode_write_protect = tcm_qla2xxx_check_true, .tpg_check_prod_mode_write_protect = tcm_qla2xxx_check_false, - .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_true, + .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_demo_mode_login_only, .tpg_alloc_fabric_acl = tcm_qla2xxx_alloc_fabric_acl, .tpg_release_fabric_acl = tcm_qla2xxx_release_fabric_acl, .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index 9ba075fe9781..329327528a55 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -29,6 +29,7 @@ struct tcm_qla2xxx_tpg_attrib { int cache_dynamic_acls; int demo_mode_write_protect; int prod_mode_write_protect; + int demo_mode_login_only; }; struct tcm_qla2xxx_tpg { diff --git a/drivers/target/Makefile b/drivers/target/Makefile index 9fdcb561422f..85b012d2f89b 100644 --- a/drivers/target/Makefile +++ b/drivers/target/Makefile @@ -13,7 +13,8 @@ target_core_mod-y := target_core_configfs.o \ target_core_spc.o \ target_core_ua.o \ target_core_rd.o \ - target_core_stat.o + target_core_stat.o \ + target_core_xcopy.o obj-$(CONFIG_TARGET_CORE) += target_core_mod.o diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 3a179302b904..35b61f7d6c63 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains main functions related to the iSCSI Target Core Driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -63,7 +61,6 @@ spinlock_t sess_idr_lock; struct iscsit_global *iscsit_global; -struct kmem_cache *lio_cmd_cache; struct kmem_cache *lio_qr_cache; struct kmem_cache *lio_dr_cache; struct kmem_cache *lio_ooo_cache; @@ -220,11 +217,6 @@ int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) spin_unlock_bh(&np->np_thread_lock); return -1; } - if (np->np_login_tpg) { - pr_err("np->np_login_tpg() is not NULL!\n"); - spin_unlock_bh(&np->np_thread_lock); - return -1; - } spin_unlock_bh(&np->np_thread_lock); /* * Determine if the portal group is accepting storage traffic. @@ -239,26 +231,38 @@ int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) /* * Here we serialize access across the TIQN+TPG Tuple. */ - ret = mutex_lock_interruptible(&tpg->np_login_lock); + ret = down_interruptible(&tpg->np_login_sem); if ((ret != 0) || signal_pending(current)) return -1; - spin_lock_bh(&np->np_thread_lock); - np->np_login_tpg = tpg; - spin_unlock_bh(&np->np_thread_lock); + spin_lock_bh(&tpg->tpg_state_lock); + if (tpg->tpg_state != TPG_STATE_ACTIVE) { + spin_unlock_bh(&tpg->tpg_state_lock); + up(&tpg->np_login_sem); + return -1; + } + spin_unlock_bh(&tpg->tpg_state_lock); return 0; } -int iscsit_deaccess_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) +void iscsit_login_kref_put(struct kref *kref) +{ + struct iscsi_tpg_np *tpg_np = container_of(kref, + struct iscsi_tpg_np, tpg_np_kref); + + complete(&tpg_np->tpg_np_comp); +} + +int iscsit_deaccess_np(struct iscsi_np *np, struct iscsi_portal_group *tpg, + struct iscsi_tpg_np *tpg_np) { struct iscsi_tiqn *tiqn = tpg->tpg_tiqn; - spin_lock_bh(&np->np_thread_lock); - np->np_login_tpg = NULL; - spin_unlock_bh(&np->np_thread_lock); + up(&tpg->np_login_sem); - mutex_unlock(&tpg->np_login_lock); + if (tpg_np) + kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put); if (tiqn) iscsit_put_tiqn_for_login(tiqn); @@ -410,20 +414,10 @@ struct iscsi_np *iscsit_add_np( int iscsit_reset_np_thread( struct iscsi_np *np, struct iscsi_tpg_np *tpg_np, - struct iscsi_portal_group *tpg) + struct iscsi_portal_group *tpg, + bool shutdown) { spin_lock_bh(&np->np_thread_lock); - if (tpg && tpg_np) { - /* - * The reset operation need only be performed when the - * passed struct iscsi_portal_group has a login in progress - * to one of the network portals. - */ - if (tpg_np->tpg_np->np_login_tpg != tpg) { - spin_unlock_bh(&np->np_thread_lock); - return 0; - } - } if (np->np_thread_state == ISCSI_NP_THREAD_INACTIVE) { spin_unlock_bh(&np->np_thread_lock); return 0; @@ -438,6 +432,12 @@ int iscsit_reset_np_thread( } spin_unlock_bh(&np->np_thread_lock); + if (tpg_np && shutdown) { + kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put); + + wait_for_completion(&tpg_np->tpg_np_comp); + } + return 0; } @@ -497,7 +497,6 @@ static struct iscsit_transport iscsi_target_transport = { .iscsit_setup_np = iscsit_setup_np, .iscsit_accept_np = iscsit_accept_np, .iscsit_free_np = iscsit_free_np, - .iscsit_alloc_cmd = iscsit_alloc_cmd, .iscsit_get_login_rx = iscsit_get_login_rx, .iscsit_put_login_tx = iscsit_put_login_tx, .iscsit_get_dataout = iscsit_build_r2ts_for_cmd, @@ -538,22 +537,13 @@ static int __init iscsi_target_init_module(void) goto ts_out1; } - lio_cmd_cache = kmem_cache_create("lio_cmd_cache", - sizeof(struct iscsi_cmd), __alignof__(struct iscsi_cmd), - 0, NULL); - if (!lio_cmd_cache) { - pr_err("Unable to kmem_cache_create() for" - " lio_cmd_cache\n"); - goto ts_out2; - } - lio_qr_cache = kmem_cache_create("lio_qr_cache", sizeof(struct iscsi_queue_req), __alignof__(struct iscsi_queue_req), 0, NULL); if (!lio_qr_cache) { pr_err("nable to kmem_cache_create() for" " lio_qr_cache\n"); - goto cmd_out; + goto ts_out2; } lio_dr_cache = kmem_cache_create("lio_dr_cache", @@ -597,8 +587,6 @@ dr_out: kmem_cache_destroy(lio_dr_cache); qr_out: kmem_cache_destroy(lio_qr_cache); -cmd_out: - kmem_cache_destroy(lio_cmd_cache); ts_out2: iscsi_deallocate_thread_sets(); ts_out1: @@ -616,7 +604,6 @@ static void __exit iscsi_target_cleanup_module(void) iscsi_thread_set_free(); iscsit_release_discovery_tpg(); iscsit_unregister_transport(&iscsi_target_transport); - kmem_cache_destroy(lio_cmd_cache); kmem_cache_destroy(lio_qr_cache); kmem_cache_destroy(lio_dr_cache); kmem_cache_destroy(lio_ooo_cache); @@ -3447,12 +3434,10 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) bool inaddr_any = iscsit_check_inaddr_any(np); len = sprintf(buf, "TargetAddress=" - "%s%s%s:%hu,%hu", - (np->np_sockaddr.ss_family == AF_INET6) ? - "[" : "", (inaddr_any == false) ? + "%s:%hu,%hu", + (inaddr_any == false) ? np->np_ip : conn->local_ip, - (np->np_sockaddr.ss_family == AF_INET6) ? - "]" : "", (inaddr_any == false) ? + (inaddr_any == false) ? np->np_port : conn->local_port, tpg->tpgt); len += 1; diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h index 2c437cb8ca00..e936d56fb523 100644 --- a/drivers/target/iscsi/iscsi_target.h +++ b/drivers/target/iscsi/iscsi_target.h @@ -7,13 +7,15 @@ extern void iscsit_put_tiqn_for_login(struct iscsi_tiqn *); extern struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *); extern void iscsit_del_tiqn(struct iscsi_tiqn *); extern int iscsit_access_np(struct iscsi_np *, struct iscsi_portal_group *); -extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *); +extern void iscsit_login_kref_put(struct kref *); +extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *, + struct iscsi_tpg_np *); extern bool iscsit_check_np_match(struct __kernel_sockaddr_storage *, struct iscsi_np *, int); extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *, char *, int); extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *, - struct iscsi_portal_group *); + struct iscsi_portal_group *, bool); extern int iscsit_del_np(struct iscsi_np *); extern int iscsit_reject_cmd(struct iscsi_cmd *cmd, u8, unsigned char *); extern void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *); @@ -37,7 +39,6 @@ extern struct target_fabric_configfs *lio_target_fabric_configfs; extern struct kmem_cache *lio_dr_cache; extern struct kmem_cache *lio_ooo_cache; -extern struct kmem_cache *lio_cmd_cache; extern struct kmem_cache *lio_qr_cache; extern struct kmem_cache *lio_r2t_cache; diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index cee17543278c..7505fddca15f 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file houses the main functions for the iSCSI CHAP support * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index bbfd28893164..fd145259361d 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -2,9 +2,7 @@ * This file contains the configfs implementation for iSCSI Target mode * from the LIO-Target Project. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -265,9 +263,9 @@ static struct se_tpg_np *lio_target_call_addnptotpg( *port_str = '\0'; /* Terminate string for IP */ port_str++; /* Skip over ":" */ - ret = strict_strtoul(port_str, 0, &port); + ret = kstrtoul(port_str, 0, &port); if (ret < 0) { - pr_err("strict_strtoul() failed for port_str: %d\n", ret); + pr_err("kstrtoul() failed for port_str: %d\n", ret); return ERR_PTR(ret); } sock_in6 = (struct sockaddr_in6 *)&sockaddr; @@ -290,9 +288,9 @@ static struct se_tpg_np *lio_target_call_addnptotpg( *port_str = '\0'; /* Terminate string for IP */ port_str++; /* Skip over ":" */ - ret = strict_strtoul(port_str, 0, &port); + ret = kstrtoul(port_str, 0, &port); if (ret < 0) { - pr_err("strict_strtoul() failed for port_str: %d\n", ret); + pr_err("kstrtoul() failed for port_str: %d\n", ret); return ERR_PTR(ret); } sock_in = (struct sockaddr_in *)&sockaddr; @@ -1481,7 +1479,7 @@ static ssize_t lio_target_wwn_show_attr_lio_version( struct target_fabric_configfs *tf, char *page) { - return sprintf(page, "RisingTide Systems Linux-iSCSI Target "ISCSIT_VERSION"\n"); + return sprintf(page, "Datera Inc. iSCSI Target "ISCSIT_VERSION"\n"); } TF_WWN_ATTR_RO(lio_target, lio_version); @@ -1925,7 +1923,7 @@ static void lio_release_cmd(struct se_cmd *se_cmd) struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); pr_debug("Entering lio_release_cmd for se_cmd: %p\n", se_cmd); - cmd->release_cmd(cmd); + iscsit_release_cmd(cmd); } /* End functions for target_core_fabric_ops */ diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h index 4f77a78edef9..9a5721b8ff96 100644 --- a/drivers/target/iscsi/iscsi_target_core.h +++ b/drivers/target/iscsi/iscsi_target_core.h @@ -9,7 +9,7 @@ #include <scsi/iscsi_proto.h> #include <target/target_core_base.h> -#define ISCSIT_VERSION "v4.1.0-rc2" +#define ISCSIT_VERSION "v4.1.0" #define ISCSI_MAX_DATASN_MISSING_COUNT 16 #define ISCSI_TX_THREAD_TCP_TIMEOUT 2 #define ISCSI_RX_THREAD_TCP_TIMEOUT 2 @@ -17,6 +17,9 @@ #define SECONDS_FOR_ASYNC_TEXT 10 #define SECONDS_FOR_LOGOUT_COMP 15 #define WHITE_SPACE " \t\v\f\n\r" +#define ISCSIT_MIN_TAGS 16 +#define ISCSIT_EXTRA_TAGS 8 +#define ISCSIT_TCP_BACKLOG 256 /* struct iscsi_node_attrib sanity values */ #define NA_DATAOUT_TIMEOUT 3 @@ -47,7 +50,7 @@ #define TA_NETIF_TIMEOUT_MAX 15 #define TA_NETIF_TIMEOUT_MIN 2 #define TA_GENERATE_NODE_ACLS 0 -#define TA_DEFAULT_CMDSN_DEPTH 16 +#define TA_DEFAULT_CMDSN_DEPTH 64 #define TA_DEFAULT_CMDSN_DEPTH_MAX 512 #define TA_DEFAULT_CMDSN_DEPTH_MIN 1 #define TA_CACHE_DYNAMIC_ACLS 0 @@ -489,7 +492,6 @@ struct iscsi_cmd { u32 first_data_sg_off; u32 kmapped_nents; sense_reason_t sense_reason; - void (*release_cmd)(struct iscsi_cmd *); } ____cacheline_aligned; struct iscsi_tmr_req { @@ -554,9 +556,19 @@ struct iscsi_conn { struct completion rx_half_close_comp; /* socket used by this connection */ struct socket *sock; + void (*orig_data_ready)(struct sock *, int); + void (*orig_state_change)(struct sock *); +#define LOGIN_FLAGS_READ_ACTIVE 1 +#define LOGIN_FLAGS_CLOSED 2 +#define LOGIN_FLAGS_READY 4 + unsigned long login_flags; + struct delayed_work login_work; + struct delayed_work login_cleanup_work; + struct iscsi_login *login; struct timer_list nopin_timer; struct timer_list nopin_response_timer; struct timer_list transport_timer; + struct task_struct *login_kworker; /* Spinlock used for add/deleting cmd's from conn_cmd_list */ spinlock_t cmd_lock; spinlock_t conn_usage_lock; @@ -584,6 +596,7 @@ struct iscsi_conn { void *context; struct iscsi_login_thread_s *login_thread; struct iscsi_portal_group *tpg; + struct iscsi_tpg_np *tpg_np; /* Pointer to parent session */ struct iscsi_session *sess; /* Pointer to thread_set in use for this conn's threads */ @@ -682,6 +695,7 @@ struct iscsi_login { u8 version_max; u8 login_complete; u8 login_failed; + bool zero_tsih; char isid[6]; u32 cmd_sn; itt_t init_task_tag; @@ -694,6 +708,7 @@ struct iscsi_login { char *req_buf; char *rsp_buf; struct iscsi_conn *conn; + struct iscsi_np *np; } ____cacheline_aligned; struct iscsi_node_attrib { @@ -773,7 +788,6 @@ struct iscsi_np { struct __kernel_sockaddr_storage np_sockaddr; struct task_struct *np_thread; struct timer_list np_login_timer; - struct iscsi_portal_group *np_login_tpg; void *np_context; struct iscsit_transport *np_transport; struct list_head np_list; @@ -788,6 +802,8 @@ struct iscsi_tpg_np { struct list_head tpg_np_parent_list; struct se_tpg_np se_tpg_np; spinlock_t tpg_np_parent_lock; + struct completion tpg_np_comp; + struct kref tpg_np_kref; }; struct iscsi_portal_group { @@ -809,7 +825,7 @@ struct iscsi_portal_group { spinlock_t tpg_state_lock; struct se_portal_group tpg_se_tpg; struct mutex tpg_access_lock; - struct mutex np_login_lock; + struct semaphore np_login_sem; struct iscsi_tpg_attrib tpg_attrib; struct iscsi_node_auth tpg_demo_auth; /* Pointer to default list of iSCSI parameters for TPG */ diff --git a/drivers/target/iscsi/iscsi_target_datain_values.c b/drivers/target/iscsi/iscsi_target_datain_values.c index 848fee768948..e93d5a7a3f81 100644 --- a/drivers/target/iscsi/iscsi_target_datain_values.c +++ b/drivers/target/iscsi/iscsi_target_datain_values.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the iSCSI Target DataIN value generation functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_device.c b/drivers/target/iscsi/iscsi_target_device.c index 1b74033510a0..6c7a5104a4cd 100644 --- a/drivers/target/iscsi/iscsi_target_device.c +++ b/drivers/target/iscsi/iscsi_target_device.c @@ -2,9 +2,7 @@ * This file contains the iSCSI Virtual Device and Disk Transport * agnostic related functions. * - \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 08bd87833321..41052e512d92 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -2,9 +2,7 @@ * This file contains error recovery level zero functions used by * the iSCSI Target driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c index 586c268679a4..e048d6439f4a 100644 --- a/drivers/target/iscsi/iscsi_target_erl1.c +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains error recovery level one used by the iSCSI Target driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c index 45a5afd5ea13..33be1fb1df32 100644 --- a/drivers/target/iscsi/iscsi_target_erl2.c +++ b/drivers/target/iscsi/iscsi_target_erl2.c @@ -2,9 +2,7 @@ * This file contains error recovery level two functions used by * the iSCSI Target driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index bc788c52b6cc..1794c753954a 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the login functions used by the iSCSI Target driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -50,6 +48,7 @@ static struct iscsi_login *iscsi_login_init_conn(struct iscsi_conn *conn) pr_err("Unable to allocate memory for struct iscsi_login.\n"); return NULL; } + conn->login = login; login->conn = conn; login->first_request = 1; @@ -428,7 +427,7 @@ static int iscsi_login_zero_tsih_s2( ISCSI_LOGIN_STATUS_NO_RESOURCES); return -1; } - rc = strict_strtoul(param->value, 0, &mrdsl); + rc = kstrtoul(param->value, 0, &mrdsl); if (rc < 0) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); @@ -684,7 +683,7 @@ static void iscsi_post_login_start_timers(struct iscsi_conn *conn) iscsit_start_nopin_timer(conn); } -static int iscsi_post_login_handler( +int iscsi_post_login_handler( struct iscsi_np *np, struct iscsi_conn *conn, u8 zero_tsih) @@ -872,7 +871,7 @@ int iscsit_setup_np( struct __kernel_sockaddr_storage *sockaddr) { struct socket *sock = NULL; - int backlog = 5, ret, opt = 0, len; + int backlog = ISCSIT_TCP_BACKLOG, ret, opt = 0, len; switch (np->np_network_transport) { case ISCSI_TCP: @@ -1007,16 +1006,24 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) rc = conn->sock->ops->getname(conn->sock, (struct sockaddr *)&sock_in6, &err, 1); if (!rc) { - snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI6c", - &sock_in6.sin6_addr.in6_u); + if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) + snprintf(conn->login_ip, sizeof(conn->login_ip), "[%pI6c]", + &sock_in6.sin6_addr.in6_u); + else + snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI4", + &sock_in6.sin6_addr.s6_addr32[3]); conn->login_port = ntohs(sock_in6.sin6_port); } rc = conn->sock->ops->getname(conn->sock, (struct sockaddr *)&sock_in6, &err, 0); if (!rc) { - snprintf(conn->local_ip, sizeof(conn->local_ip), "%pI6c", - &sock_in6.sin6_addr.in6_u); + if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) + snprintf(conn->local_ip, sizeof(conn->local_ip), "[%pI6c]", + &sock_in6.sin6_addr.in6_u); + else + snprintf(conn->local_ip, sizeof(conn->local_ip), "%pI4", + &sock_in6.sin6_addr.s6_addr32[3]); conn->local_port = ntohs(sock_in6.sin6_port); } } else { @@ -1116,6 +1123,77 @@ iscsit_conn_set_transport(struct iscsi_conn *conn, struct iscsit_transport *t) return 0; } +void iscsi_target_login_sess_out(struct iscsi_conn *conn, + struct iscsi_np *np, bool zero_tsih, bool new_sess) +{ + if (new_sess == false) + goto old_sess_out; + + pr_err("iSCSI Login negotiation failed.\n"); + iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + if (!zero_tsih || !conn->sess) + goto old_sess_out; + if (conn->sess->se_sess) + transport_free_session(conn->sess->se_sess); + if (conn->sess->session_index != 0) { + spin_lock_bh(&sess_idr_lock); + idr_remove(&sess_idr, conn->sess->session_index); + spin_unlock_bh(&sess_idr_lock); + } + kfree(conn->sess->sess_ops); + kfree(conn->sess); + +old_sess_out: + iscsi_stop_login_thread_timer(np); + /* + * If login negotiation fails check if the Time2Retain timer + * needs to be restarted. + */ + if (!zero_tsih && conn->sess) { + spin_lock_bh(&conn->sess->conn_lock); + if (conn->sess->session_state == TARG_SESS_STATE_FAILED) { + struct se_portal_group *se_tpg = + &ISCSI_TPG_C(conn)->tpg_se_tpg; + + atomic_set(&conn->sess->session_continuation, 0); + spin_unlock_bh(&conn->sess->conn_lock); + spin_lock_bh(&se_tpg->session_lock); + iscsit_start_time2retain_handler(conn->sess); + spin_unlock_bh(&se_tpg->session_lock); + } else + spin_unlock_bh(&conn->sess->conn_lock); + iscsit_dec_session_usage_count(conn->sess); + } + + if (!IS_ERR(conn->conn_rx_hash.tfm)) + crypto_free_hash(conn->conn_rx_hash.tfm); + if (!IS_ERR(conn->conn_tx_hash.tfm)) + crypto_free_hash(conn->conn_tx_hash.tfm); + + if (conn->conn_cpumask) + free_cpumask_var(conn->conn_cpumask); + + kfree(conn->conn_ops); + + if (conn->param_list) { + iscsi_release_param_list(conn->param_list); + conn->param_list = NULL; + } + iscsi_target_nego_release(conn); + + if (conn->sock) { + sock_release(conn->sock); + conn->sock = NULL; + } + + if (conn->conn_transport->iscsit_free_conn) + conn->conn_transport->iscsit_free_conn(conn); + + iscsit_put_transport(conn->conn_transport); + kfree(conn); +} + static int __iscsi_target_login_thread(struct iscsi_np *np) { u8 *buffer, zero_tsih = 0; @@ -1124,6 +1202,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) struct iscsi_login *login; struct iscsi_portal_group *tpg = NULL; struct iscsi_login_req *pdu; + struct iscsi_tpg_np *tpg_np; + bool new_sess = false; flush_signals(current); @@ -1264,6 +1344,7 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) tpg = conn->tpg; goto new_sess_out; } + login->zero_tsih = zero_tsih; tpg = conn->tpg; if (!tpg) { @@ -1279,7 +1360,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) goto old_sess_out; } - if (iscsi_target_start_negotiation(login, conn) < 0) + ret = iscsi_target_start_negotiation(login, conn); + if (ret < 0) goto new_sess_out; if (!conn->sess) { @@ -1292,84 +1374,32 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) if (signal_pending(current)) goto new_sess_out; - ret = iscsi_post_login_handler(np, conn, zero_tsih); + if (ret == 1) { + tpg_np = conn->tpg_np; - if (ret < 0) - goto new_sess_out; + ret = iscsi_post_login_handler(np, conn, zero_tsih); + if (ret < 0) + goto new_sess_out; + + iscsit_deaccess_np(np, tpg, tpg_np); + } - iscsit_deaccess_np(np, tpg); tpg = NULL; + tpg_np = NULL; /* Get another socket */ return 1; new_sess_out: - pr_err("iSCSI Login negotiation failed.\n"); - iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, - ISCSI_LOGIN_STATUS_INIT_ERR); - if (!zero_tsih || !conn->sess) - goto old_sess_out; - if (conn->sess->se_sess) - transport_free_session(conn->sess->se_sess); - if (conn->sess->session_index != 0) { - spin_lock_bh(&sess_idr_lock); - idr_remove(&sess_idr, conn->sess->session_index); - spin_unlock_bh(&sess_idr_lock); - } - kfree(conn->sess->sess_ops); - kfree(conn->sess); + new_sess = true; old_sess_out: - iscsi_stop_login_thread_timer(np); - /* - * If login negotiation fails check if the Time2Retain timer - * needs to be restarted. - */ - if (!zero_tsih && conn->sess) { - spin_lock_bh(&conn->sess->conn_lock); - if (conn->sess->session_state == TARG_SESS_STATE_FAILED) { - struct se_portal_group *se_tpg = - &ISCSI_TPG_C(conn)->tpg_se_tpg; - - atomic_set(&conn->sess->session_continuation, 0); - spin_unlock_bh(&conn->sess->conn_lock); - spin_lock_bh(&se_tpg->session_lock); - iscsit_start_time2retain_handler(conn->sess); - spin_unlock_bh(&se_tpg->session_lock); - } else - spin_unlock_bh(&conn->sess->conn_lock); - iscsit_dec_session_usage_count(conn->sess); - } - - if (!IS_ERR(conn->conn_rx_hash.tfm)) - crypto_free_hash(conn->conn_rx_hash.tfm); - if (!IS_ERR(conn->conn_tx_hash.tfm)) - crypto_free_hash(conn->conn_tx_hash.tfm); - - if (conn->conn_cpumask) - free_cpumask_var(conn->conn_cpumask); - - kfree(conn->conn_ops); - - if (conn->param_list) { - iscsi_release_param_list(conn->param_list); - conn->param_list = NULL; - } - iscsi_target_nego_release(conn); - - if (conn->sock) { - sock_release(conn->sock); - conn->sock = NULL; - } - - if (conn->conn_transport->iscsit_free_conn) - conn->conn_transport->iscsit_free_conn(conn); - - iscsit_put_transport(conn->conn_transport); - - kfree(conn); + tpg_np = conn->tpg_np; + iscsi_target_login_sess_out(conn, np, zero_tsih, new_sess); + new_sess = false; if (tpg) { - iscsit_deaccess_np(np, tpg); + iscsit_deaccess_np(np, tpg, tpg_np); tpg = NULL; + tpg_np = NULL; } out: diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h index 63efd2878451..29d098324b7f 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -12,6 +12,9 @@ extern int iscsit_accept_np(struct iscsi_np *, struct iscsi_conn *); extern int iscsit_get_login_rx(struct iscsi_conn *, struct iscsi_login *); extern int iscsit_put_login_tx(struct iscsi_conn *, struct iscsi_login *, u32); extern void iscsit_free_conn(struct iscsi_np *, struct iscsi_conn *); +extern int iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); +extern void iscsi_target_login_sess_out(struct iscsi_conn *, struct iscsi_np *, + bool, bool); extern int iscsi_target_login_thread(void *); extern int iscsi_login_disable_FIM_keys(struct iscsi_param_list *, struct iscsi_conn *); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index c4675b4ceb49..14d1aed5af1d 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains main functions related to iSCSI Parameter negotiation. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -377,15 +375,284 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log return 0; } -static int iscsi_target_do_login_io(struct iscsi_conn *conn, struct iscsi_login *login) +static void iscsi_target_sk_data_ready(struct sock *sk, int count) { - if (iscsi_target_do_tx_login_io(conn, login) < 0) - return -1; + struct iscsi_conn *conn = sk->sk_user_data; + bool rc; - if (conn->conn_transport->iscsit_get_login_rx(conn, login) < 0) - return -1; + pr_debug("Entering iscsi_target_sk_data_ready: conn: %p\n", conn); - return 0; + write_lock_bh(&sk->sk_callback_lock); + if (!sk->sk_user_data) { + write_unlock_bh(&sk->sk_callback_lock); + return; + } + if (!test_bit(LOGIN_FLAGS_READY, &conn->login_flags)) { + write_unlock_bh(&sk->sk_callback_lock); + pr_debug("Got LOGIN_FLAGS_READY=0, conn: %p >>>>\n", conn); + return; + } + if (test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { + write_unlock_bh(&sk->sk_callback_lock); + pr_debug("Got LOGIN_FLAGS_CLOSED=1, conn: %p >>>>\n", conn); + return; + } + if (test_and_set_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) { + write_unlock_bh(&sk->sk_callback_lock); + pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1, conn: %p >>>>\n", conn); + return; + } + + rc = schedule_delayed_work(&conn->login_work, 0); + if (rc == false) { + pr_debug("iscsi_target_sk_data_ready, schedule_delayed_work" + " got false\n"); + } + write_unlock_bh(&sk->sk_callback_lock); +} + +static void iscsi_target_sk_state_change(struct sock *); + +static void iscsi_target_set_sock_callbacks(struct iscsi_conn *conn) +{ + struct sock *sk; + + if (!conn->sock) + return; + + sk = conn->sock->sk; + pr_debug("Entering iscsi_target_set_sock_callbacks: conn: %p\n", conn); + + write_lock_bh(&sk->sk_callback_lock); + sk->sk_user_data = conn; + conn->orig_data_ready = sk->sk_data_ready; + conn->orig_state_change = sk->sk_state_change; + sk->sk_data_ready = iscsi_target_sk_data_ready; + sk->sk_state_change = iscsi_target_sk_state_change; + write_unlock_bh(&sk->sk_callback_lock); + + sk->sk_sndtimeo = TA_LOGIN_TIMEOUT * HZ; + sk->sk_rcvtimeo = TA_LOGIN_TIMEOUT * HZ; +} + +static void iscsi_target_restore_sock_callbacks(struct iscsi_conn *conn) +{ + struct sock *sk; + + if (!conn->sock) + return; + + sk = conn->sock->sk; + pr_debug("Entering iscsi_target_restore_sock_callbacks: conn: %p\n", conn); + + write_lock_bh(&sk->sk_callback_lock); + if (!sk->sk_user_data) { + write_unlock_bh(&sk->sk_callback_lock); + return; + } + sk->sk_user_data = NULL; + sk->sk_data_ready = conn->orig_data_ready; + sk->sk_state_change = conn->orig_state_change; + write_unlock_bh(&sk->sk_callback_lock); + + sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; + sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; +} + +static int iscsi_target_do_login(struct iscsi_conn *, struct iscsi_login *); + +static bool iscsi_target_sk_state_check(struct sock *sk) +{ + if (sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) { + pr_debug("iscsi_target_sk_state_check: TCP_CLOSE_WAIT|TCP_CLOSE," + "returning FALSE\n"); + return false; + } + return true; +} + +static void iscsi_target_login_drop(struct iscsi_conn *conn, struct iscsi_login *login) +{ + struct iscsi_np *np = login->np; + bool zero_tsih = login->zero_tsih; + + iscsi_remove_failed_auth_entry(conn); + iscsi_target_nego_release(conn); + iscsi_target_login_sess_out(conn, np, zero_tsih, true); +} + +static void iscsi_target_login_timeout(unsigned long data) +{ + struct iscsi_conn *conn = (struct iscsi_conn *)data; + + pr_debug("Entering iscsi_target_login_timeout >>>>>>>>>>>>>>>>>>>\n"); + + if (conn->login_kworker) { + pr_debug("Sending SIGINT to conn->login_kworker %s/%d\n", + conn->login_kworker->comm, conn->login_kworker->pid); + send_sig(SIGINT, conn->login_kworker, 1); + } +} + +static void iscsi_target_do_login_rx(struct work_struct *work) +{ + struct iscsi_conn *conn = container_of(work, + struct iscsi_conn, login_work.work); + struct iscsi_login *login = conn->login; + struct iscsi_np *np = login->np; + struct iscsi_portal_group *tpg = conn->tpg; + struct iscsi_tpg_np *tpg_np = conn->tpg_np; + struct timer_list login_timer; + int rc, zero_tsih = login->zero_tsih; + bool state; + + pr_debug("entering iscsi_target_do_login_rx, conn: %p, %s:%d\n", + conn, current->comm, current->pid); + + spin_lock(&tpg->tpg_state_lock); + state = (tpg->tpg_state == TPG_STATE_ACTIVE); + spin_unlock(&tpg->tpg_state_lock); + + if (state == false) { + pr_debug("iscsi_target_do_login_rx: tpg_state != TPG_STATE_ACTIVE\n"); + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + return; + } + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + read_lock_bh(&sk->sk_callback_lock); + state = iscsi_target_sk_state_check(sk); + read_unlock_bh(&sk->sk_callback_lock); + + if (state == false) { + pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n"); + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + return; + } + } + + conn->login_kworker = current; + allow_signal(SIGINT); + + init_timer(&login_timer); + login_timer.expires = (get_jiffies_64() + TA_LOGIN_TIMEOUT * HZ); + login_timer.data = (unsigned long)conn; + login_timer.function = iscsi_target_login_timeout; + add_timer(&login_timer); + pr_debug("Starting login_timer for %s/%d\n", current->comm, current->pid); + + rc = conn->conn_transport->iscsit_get_login_rx(conn, login); + del_timer_sync(&login_timer); + flush_signals(current); + conn->login_kworker = NULL; + + if (rc < 0) { + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + return; + } + + pr_debug("iscsi_target_do_login_rx after rx_login_io, %p, %s:%d\n", + conn, current->comm, current->pid); + + rc = iscsi_target_do_login(conn, login); + if (rc < 0) { + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + } else if (!rc) { + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + } + } else if (rc == 1) { + iscsi_target_nego_release(conn); + iscsi_post_login_handler(np, conn, zero_tsih); + iscsit_deaccess_np(np, tpg, tpg_np); + } +} + +static void iscsi_target_do_cleanup(struct work_struct *work) +{ + struct iscsi_conn *conn = container_of(work, + struct iscsi_conn, login_cleanup_work.work); + struct sock *sk = conn->sock->sk; + struct iscsi_login *login = conn->login; + struct iscsi_np *np = login->np; + struct iscsi_portal_group *tpg = conn->tpg; + struct iscsi_tpg_np *tpg_np = conn->tpg_np; + + pr_debug("Entering iscsi_target_do_cleanup\n"); + + cancel_delayed_work_sync(&conn->login_work); + conn->orig_state_change(sk); + + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + + pr_debug("iscsi_target_do_cleanup done()\n"); +} + +static void iscsi_target_sk_state_change(struct sock *sk) +{ + struct iscsi_conn *conn; + void (*orig_state_change)(struct sock *); + bool state; + + pr_debug("Entering iscsi_target_sk_state_change\n"); + + write_lock_bh(&sk->sk_callback_lock); + conn = sk->sk_user_data; + if (!conn) { + write_unlock_bh(&sk->sk_callback_lock); + return; + } + orig_state_change = conn->orig_state_change; + + if (!test_bit(LOGIN_FLAGS_READY, &conn->login_flags)) { + pr_debug("Got LOGIN_FLAGS_READY=0 sk_state_change conn: %p\n", + conn); + write_unlock_bh(&sk->sk_callback_lock); + orig_state_change(sk); + return; + } + if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) { + pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1 sk_state_change" + " conn: %p\n", conn); + write_unlock_bh(&sk->sk_callback_lock); + orig_state_change(sk); + return; + } + if (test_and_set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { + pr_debug("Got LOGIN_FLAGS_CLOSED=1 sk_state_change conn: %p\n", + conn); + write_unlock_bh(&sk->sk_callback_lock); + orig_state_change(sk); + return; + } + + state = iscsi_target_sk_state_check(sk); + write_unlock_bh(&sk->sk_callback_lock); + + pr_debug("iscsi_target_sk_state_change: state: %d\n", state); + + if (!state) { + pr_debug("iscsi_target_sk_state_change got failed state\n"); + schedule_delayed_work(&conn->login_cleanup_work, 0); + return; + } + orig_state_change(sk); } /* @@ -643,10 +910,11 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { login->tsih = conn->sess->tsih; login->login_complete = 1; + iscsi_target_restore_sock_callbacks(conn); if (iscsi_target_do_tx_login_io(conn, login) < 0) return -1; - return 0; + return 1; } break; default: @@ -656,13 +924,29 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo break; } - if (iscsi_target_do_login_io(conn, login) < 0) + if (iscsi_target_do_tx_login_io(conn, login) < 0) return -1; if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { login_rsp->flags &= ~ISCSI_FLAG_LOGIN_TRANSIT; login_rsp->flags &= ~ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK; } + break; + } + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + bool state; + + read_lock_bh(&sk->sk_callback_lock); + state = iscsi_target_sk_state_check(sk); + read_unlock_bh(&sk->sk_callback_lock); + + if (!state) { + pr_debug("iscsi_target_do_login() failed state for" + " conn: %p\n", conn); + return -1; + } } return 0; @@ -695,9 +979,17 @@ int iscsi_target_locate_portal( char *tmpbuf, *start = NULL, *end = NULL, *key, *value; struct iscsi_session *sess = conn->sess; struct iscsi_tiqn *tiqn; + struct iscsi_tpg_np *tpg_np = NULL; struct iscsi_login_req *login_req; - u32 payload_length; - int sessiontype = 0, ret = 0; + struct se_node_acl *se_nacl; + u32 payload_length, queue_depth = 0; + int sessiontype = 0, ret = 0, tag_num, tag_size; + + INIT_DELAYED_WORK(&conn->login_work, iscsi_target_do_login_rx); + INIT_DELAYED_WORK(&conn->login_cleanup_work, iscsi_target_do_cleanup); + iscsi_target_set_sock_callbacks(conn); + + login->np = np; login_req = (struct iscsi_login_req *) login->req; payload_length = ntoh24(login_req->dlength); @@ -791,7 +1083,7 @@ int iscsi_target_locate_portal( goto out; } ret = 0; - goto out; + goto alloc_tags; } get_target: @@ -822,7 +1114,7 @@ get_target: /* * Locate Target Portal Group from Storage Node. */ - conn->tpg = iscsit_get_tpg_from_np(tiqn, np); + conn->tpg = iscsit_get_tpg_from_np(tiqn, np, &tpg_np); if (!conn->tpg) { pr_err("Unable to locate Target Portal Group" " on %s\n", tiqn->tiqn); @@ -832,12 +1124,16 @@ get_target: ret = -1; goto out; } + conn->tpg_np = tpg_np; pr_debug("Located Portal Group Object: %hu\n", conn->tpg->tpgt); /* * Setup crc32c modules from libcrypto */ if (iscsi_login_setup_crypto(conn) < 0) { pr_err("iscsi_login_setup_crypto() failed\n"); + kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put); + iscsit_put_tiqn_for_login(tiqn); + conn->tpg = NULL; ret = -1; goto out; } @@ -846,11 +1142,12 @@ get_target: * process login attempt. */ if (iscsit_access_np(np, conn->tpg) < 0) { + kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put); iscsit_put_tiqn_for_login(tiqn); iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); - ret = -1; conn->tpg = NULL; + ret = -1; goto out; } @@ -883,8 +1180,27 @@ get_target: ret = -1; goto out; } + se_nacl = sess->se_sess->se_node_acl; + queue_depth = se_nacl->queue_depth; + /* + * Setup pre-allocated tags based upon allowed per NodeACL CmdSN + * depth for non immediate commands, plus extra tags for immediate + * commands. + * + * Also enforce a ISCSIT_MIN_TAGS to prevent unnecessary contention + * in per-cpu-ida tag allocation logic + small queue_depth. + */ +alloc_tags: + tag_num = max_t(u32, ISCSIT_MIN_TAGS, queue_depth); + tag_num += ISCSIT_EXTRA_TAGS; + tag_size = sizeof(struct iscsi_cmd) + conn->conn_transport->priv_size; - ret = 0; + ret = transport_alloc_session_tags(sess->se_sess, tag_num, tag_size); + if (ret < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + ret = -1; + } out: kfree(tmpbuf); return ret; @@ -897,10 +1213,23 @@ int iscsi_target_start_negotiation( int ret; ret = iscsi_target_do_login(conn, login); - if (ret != 0) + if (!ret) { + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + set_bit(LOGIN_FLAGS_READY, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + } + } else if (ret < 0) { + cancel_delayed_work_sync(&conn->login_work); + cancel_delayed_work_sync(&conn->login_cleanup_work); + iscsi_target_restore_sock_callbacks(conn); iscsi_remove_failed_auth_entry(conn); + } + if (ret != 0) + iscsi_target_nego_release(conn); - iscsi_target_nego_release(conn); return ret; } diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.c b/drivers/target/iscsi/iscsi_target_nodeattrib.c index 11dc2936af76..93bdc475eb00 100644 --- a/drivers/target/iscsi/iscsi_target_nodeattrib.c +++ b/drivers/target/iscsi/iscsi_target_nodeattrib.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the main functions related to Initiator Node Attributes. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 35fd6439eb01..4d2e23fc76fd 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains main functions related to iSCSI Parameter negotiation. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -1182,7 +1180,7 @@ static int iscsi_check_acceptor_state(struct iscsi_param *param, char *value, unsigned long long tmp; int rc; - rc = strict_strtoull(param->value, 0, &tmp); + rc = kstrtoull(param->value, 0, &tmp); if (rc < 0) return -1; diff --git a/drivers/target/iscsi/iscsi_target_seq_pdu_list.c b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c index edb592a368ef..ca41b583f2f6 100644 --- a/drivers/target/iscsi/iscsi_target_seq_pdu_list.c +++ b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c @@ -2,9 +2,7 @@ * This file contains main functions related to iSCSI DataSequenceInOrder=No * and DataPDUInOrder=No. * - \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_stat.c b/drivers/target/iscsi/iscsi_target_stat.c index 464b4206a51e..f788e8b5e855 100644 --- a/drivers/target/iscsi/iscsi_target_stat.c +++ b/drivers/target/iscsi/iscsi_target_stat.c @@ -2,9 +2,7 @@ * Modern ConfigFS group context specific iSCSI statistics based on original * iscsi_target_mib.c code * - * Copyright (c) 2011 Rising Tide Systems - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * Copyright (c) 2011-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -177,7 +175,7 @@ ISCSI_STAT_INSTANCE_ATTR_RO(description); static ssize_t iscsi_stat_instance_show_attr_vendor( struct iscsi_wwn_stat_grps *igrps, char *page) { - return snprintf(page, PAGE_SIZE, "RisingTide Systems iSCSI-Target\n"); + return snprintf(page, PAGE_SIZE, "Datera, Inc. iSCSI-Target\n"); } ISCSI_STAT_INSTANCE_ATTR_RO(vendor); @@ -432,13 +430,7 @@ static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr( int ret; spin_lock(&lstat->lock); - if (lstat->last_intr_fail_ip_family == AF_INET6) { - ret = snprintf(page, PAGE_SIZE, "[%s]\n", - lstat->last_intr_fail_ip_addr); - } else { - ret = snprintf(page, PAGE_SIZE, "%s\n", - lstat->last_intr_fail_ip_addr); - } + ret = snprintf(page, PAGE_SIZE, "%s\n", lstat->last_intr_fail_ip_addr); spin_unlock(&lstat->lock); return ret; diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c index b997e5da47d3..78404b1cc0bf 100644 --- a/drivers/target/iscsi/iscsi_target_tmr.c +++ b/drivers/target/iscsi/iscsi_target_tmr.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the iSCSI Target specific Task Management functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 439260b7d87f..4faeb47fa5e1 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains iSCSI Target Portal Group related functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -49,7 +47,7 @@ struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *tiqn, u1 INIT_LIST_HEAD(&tpg->tpg_gnp_list); INIT_LIST_HEAD(&tpg->tpg_list); mutex_init(&tpg->tpg_access_lock); - mutex_init(&tpg->np_login_lock); + sema_init(&tpg->np_login_sem, 1); spin_lock_init(&tpg->tpg_state_lock); spin_lock_init(&tpg->tpg_np_lock); @@ -129,7 +127,8 @@ void iscsit_release_discovery_tpg(void) struct iscsi_portal_group *iscsit_get_tpg_from_np( struct iscsi_tiqn *tiqn, - struct iscsi_np *np) + struct iscsi_np *np, + struct iscsi_tpg_np **tpg_np_out) { struct iscsi_portal_group *tpg = NULL; struct iscsi_tpg_np *tpg_np; @@ -147,6 +146,8 @@ struct iscsi_portal_group *iscsit_get_tpg_from_np( spin_lock(&tpg->tpg_np_lock); list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) { if (tpg_np->tpg_np == np) { + *tpg_np_out = tpg_np; + kref_get(&tpg_np->tpg_np_kref); spin_unlock(&tpg->tpg_np_lock); spin_unlock(&tiqn->tiqn_tpg_lock); return tpg; @@ -175,18 +176,20 @@ void iscsit_put_tpg(struct iscsi_portal_group *tpg) static void iscsit_clear_tpg_np_login_thread( struct iscsi_tpg_np *tpg_np, - struct iscsi_portal_group *tpg) + struct iscsi_portal_group *tpg, + bool shutdown) { if (!tpg_np->tpg_np) { pr_err("struct iscsi_tpg_np->tpg_np is NULL!\n"); return; } - iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg); + iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg, shutdown); } void iscsit_clear_tpg_np_login_threads( - struct iscsi_portal_group *tpg) + struct iscsi_portal_group *tpg, + bool shutdown) { struct iscsi_tpg_np *tpg_np; @@ -197,7 +200,7 @@ void iscsit_clear_tpg_np_login_threads( continue; } spin_unlock(&tpg->tpg_np_lock); - iscsit_clear_tpg_np_login_thread(tpg_np, tpg); + iscsit_clear_tpg_np_login_thread(tpg_np, tpg, shutdown); spin_lock(&tpg->tpg_np_lock); } spin_unlock(&tpg->tpg_np_lock); @@ -268,6 +271,8 @@ int iscsit_tpg_del_portal_group( tpg->tpg_state = TPG_STATE_INACTIVE; spin_unlock(&tpg->tpg_state_lock); + iscsit_clear_tpg_np_login_threads(tpg, true); + if (iscsit_release_sessions_for_tpg(tpg, force) < 0) { pr_err("Unable to delete iSCSI Target Portal Group:" " %hu while active sessions exist, and force=0\n", @@ -368,7 +373,7 @@ int iscsit_tpg_disable_portal_group(struct iscsi_portal_group *tpg, int force) tpg->tpg_state = TPG_STATE_INACTIVE; spin_unlock(&tpg->tpg_state_lock); - iscsit_clear_tpg_np_login_threads(tpg); + iscsit_clear_tpg_np_login_threads(tpg, false); if (iscsit_release_sessions_for_tpg(tpg, force) < 0) { spin_lock(&tpg->tpg_state_lock); @@ -490,6 +495,8 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( INIT_LIST_HEAD(&tpg_np->tpg_np_child_list); INIT_LIST_HEAD(&tpg_np->tpg_np_parent_list); spin_lock_init(&tpg_np->tpg_np_parent_lock); + init_completion(&tpg_np->tpg_np_comp); + kref_init(&tpg_np->tpg_np_kref); tpg_np->tpg_np = np; tpg_np->tpg = tpg; @@ -520,7 +527,7 @@ static int iscsit_tpg_release_np( struct iscsi_portal_group *tpg, struct iscsi_np *np) { - iscsit_clear_tpg_np_login_thread(tpg_np, tpg); + iscsit_clear_tpg_np_login_thread(tpg_np, tpg, true); pr_debug("CORE[%s] - Removed Network Portal: %s:%hu,%hu on %s\n", tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt, diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index dda48c141a8c..b77693e2c209 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -5,10 +5,10 @@ extern struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *, extern int iscsit_load_discovery_tpg(void); extern void iscsit_release_discovery_tpg(void); extern struct iscsi_portal_group *iscsit_get_tpg_from_np(struct iscsi_tiqn *, - struct iscsi_np *); + struct iscsi_np *, struct iscsi_tpg_np **); extern int iscsit_get_tpg(struct iscsi_portal_group *); extern void iscsit_put_tpg(struct iscsi_portal_group *); -extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *); +extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *, bool); extern void iscsit_tpg_dump_params(struct iscsi_portal_group *); extern int iscsit_tpg_add_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *); extern int iscsit_tpg_del_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *, diff --git a/drivers/target/iscsi/iscsi_target_tq.c b/drivers/target/iscsi/iscsi_target_tq.c index 81289520f96b..601e9cc61e98 100644 --- a/drivers/target/iscsi/iscsi_target_tq.c +++ b/drivers/target/iscsi/iscsi_target_tq.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the iSCSI Login Thread and Thread Queue functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -105,12 +103,11 @@ int iscsi_allocate_thread_sets(u32 thread_pair_count) ts->status = ISCSI_THREAD_SET_FREE; INIT_LIST_HEAD(&ts->ts_list); spin_lock_init(&ts->ts_state_lock); - init_completion(&ts->rx_post_start_comp); - init_completion(&ts->tx_post_start_comp); init_completion(&ts->rx_restart_comp); init_completion(&ts->tx_restart_comp); init_completion(&ts->rx_start_comp); init_completion(&ts->tx_start_comp); + sema_init(&ts->ts_activate_sem, 0); ts->create_threads = 1; ts->tx_thread = kthread_run(iscsi_target_tx_thread, ts, "%s", @@ -139,35 +136,44 @@ int iscsi_allocate_thread_sets(u32 thread_pair_count) return allocated_thread_pair_count; } -void iscsi_deallocate_thread_sets(void) +static void iscsi_deallocate_thread_one(struct iscsi_thread_set *ts) { - u32 released_count = 0; - struct iscsi_thread_set *ts = NULL; - - while ((ts = iscsi_get_ts_from_inactive_list())) { + spin_lock_bh(&ts->ts_state_lock); + ts->status = ISCSI_THREAD_SET_DIE; + if (ts->rx_thread) { + complete(&ts->rx_start_comp); + spin_unlock_bh(&ts->ts_state_lock); + kthread_stop(ts->rx_thread); spin_lock_bh(&ts->ts_state_lock); - ts->status = ISCSI_THREAD_SET_DIE; + } + if (ts->tx_thread) { + complete(&ts->tx_start_comp); spin_unlock_bh(&ts->ts_state_lock); + kthread_stop(ts->tx_thread); + spin_lock_bh(&ts->ts_state_lock); + } + spin_unlock_bh(&ts->ts_state_lock); + /* + * Release this thread_id in the thread_set_bitmap + */ + spin_lock(&ts_bitmap_lock); + bitmap_release_region(iscsit_global->ts_bitmap, + ts->thread_id, get_order(1)); + spin_unlock(&ts_bitmap_lock); - if (ts->rx_thread) { - send_sig(SIGINT, ts->rx_thread, 1); - kthread_stop(ts->rx_thread); - } - if (ts->tx_thread) { - send_sig(SIGINT, ts->tx_thread, 1); - kthread_stop(ts->tx_thread); - } - /* - * Release this thread_id in the thread_set_bitmap - */ - spin_lock(&ts_bitmap_lock); - bitmap_release_region(iscsit_global->ts_bitmap, - ts->thread_id, get_order(1)); - spin_unlock(&ts_bitmap_lock); + kfree(ts); +} +void iscsi_deallocate_thread_sets(void) +{ + struct iscsi_thread_set *ts = NULL; + u32 released_count = 0; + + while ((ts = iscsi_get_ts_from_inactive_list())) { + + iscsi_deallocate_thread_one(ts); released_count++; - kfree(ts); } if (released_count) @@ -187,34 +193,13 @@ static void iscsi_deallocate_extra_thread_sets(void) if (!ts) break; - spin_lock_bh(&ts->ts_state_lock); - ts->status = ISCSI_THREAD_SET_DIE; - spin_unlock_bh(&ts->ts_state_lock); - - if (ts->rx_thread) { - send_sig(SIGINT, ts->rx_thread, 1); - kthread_stop(ts->rx_thread); - } - if (ts->tx_thread) { - send_sig(SIGINT, ts->tx_thread, 1); - kthread_stop(ts->tx_thread); - } - /* - * Release this thread_id in the thread_set_bitmap - */ - spin_lock(&ts_bitmap_lock); - bitmap_release_region(iscsit_global->ts_bitmap, - ts->thread_id, get_order(1)); - spin_unlock(&ts_bitmap_lock); - + iscsi_deallocate_thread_one(ts); released_count++; - kfree(ts); } - if (released_count) { + if (released_count) pr_debug("Stopped %d thread set(s) (%d total threads)." "\n", released_count, released_count * 2); - } } void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set *ts) @@ -224,37 +209,23 @@ void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set spin_lock_bh(&ts->ts_state_lock); conn->thread_set = ts; ts->conn = conn; + ts->status = ISCSI_THREAD_SET_ACTIVE; spin_unlock_bh(&ts->ts_state_lock); - /* - * Start up the RX thread and wait on rx_post_start_comp. The RX - * Thread will then do the same for the TX Thread in - * iscsi_rx_thread_pre_handler(). - */ + complete(&ts->rx_start_comp); - wait_for_completion(&ts->rx_post_start_comp); + complete(&ts->tx_start_comp); + + down(&ts->ts_activate_sem); } struct iscsi_thread_set *iscsi_get_thread_set(void) { - int allocate_ts = 0; - struct completion comp; - struct iscsi_thread_set *ts = NULL; - /* - * If no inactive thread set is available on the first call to - * iscsi_get_ts_from_inactive_list(), sleep for a second and - * try again. If still none are available after two attempts, - * allocate a set ourselves. - */ + struct iscsi_thread_set *ts; + get_set: ts = iscsi_get_ts_from_inactive_list(); if (!ts) { - if (allocate_ts == 2) - iscsi_allocate_thread_sets(1); - - init_completion(&comp); - wait_for_completion_timeout(&comp, 1 * HZ); - - allocate_ts++; + iscsi_allocate_thread_sets(1); goto get_set; } @@ -263,6 +234,7 @@ get_set: ts->thread_count = 2; init_completion(&ts->rx_restart_comp); init_completion(&ts->tx_restart_comp); + sema_init(&ts->ts_activate_sem, 0); return ts; } @@ -400,7 +372,8 @@ static void iscsi_check_to_add_additional_sets(void) static int iscsi_signal_thread_pre_handler(struct iscsi_thread_set *ts) { spin_lock_bh(&ts->ts_state_lock); - if ((ts->status == ISCSI_THREAD_SET_DIE) || signal_pending(current)) { + if (ts->status == ISCSI_THREAD_SET_DIE || kthread_should_stop() || + signal_pending(current)) { spin_unlock_bh(&ts->ts_state_lock); return -1; } @@ -419,7 +392,8 @@ struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *ts) goto sleep; } - flush_signals(current); + if (ts->status != ISCSI_THREAD_SET_DIE) + flush_signals(current); if (ts->delay_inactive && (--ts->thread_count == 0)) { spin_unlock_bh(&ts->ts_state_lock); @@ -446,18 +420,19 @@ sleep: if (iscsi_signal_thread_pre_handler(ts) < 0) return NULL; + iscsi_check_to_add_additional_sets(); + + spin_lock_bh(&ts->ts_state_lock); if (!ts->conn) { pr_err("struct iscsi_thread_set->conn is NULL for" - " thread_id: %d, going back to sleep\n", ts->thread_id); - goto sleep; + " RX thread_id: %s/%d\n", current->comm, current->pid); + spin_unlock_bh(&ts->ts_state_lock); + return NULL; } - iscsi_check_to_add_additional_sets(); - /* - * The RX Thread starts up the TX Thread and sleeps. - */ ts->thread_clear |= ISCSI_CLEAR_RX_THREAD; - complete(&ts->tx_start_comp); - wait_for_completion(&ts->tx_post_start_comp); + spin_unlock_bh(&ts->ts_state_lock); + + up(&ts->ts_activate_sem); return ts->conn; } @@ -472,7 +447,8 @@ struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *ts) goto sleep; } - flush_signals(current); + if (ts->status != ISCSI_THREAD_SET_DIE) + flush_signals(current); if (ts->delay_inactive && (--ts->thread_count == 0)) { spin_unlock_bh(&ts->ts_state_lock); @@ -498,27 +474,20 @@ sleep: if (iscsi_signal_thread_pre_handler(ts) < 0) return NULL; - if (!ts->conn) { - pr_err("struct iscsi_thread_set->conn is NULL for " - " thread_id: %d, going back to sleep\n", - ts->thread_id); - goto sleep; - } - iscsi_check_to_add_additional_sets(); - /* - * From the TX thread, up the tx_post_start_comp that the RX Thread is - * sleeping on in iscsi_rx_thread_pre_handler(), then up the - * rx_post_start_comp that iscsi_activate_thread_set() is sleeping on. - */ - ts->thread_clear |= ISCSI_CLEAR_TX_THREAD; - complete(&ts->tx_post_start_comp); - complete(&ts->rx_post_start_comp); spin_lock_bh(&ts->ts_state_lock); - ts->status = ISCSI_THREAD_SET_ACTIVE; + if (!ts->conn) { + pr_err("struct iscsi_thread_set->conn is NULL for" + " TX thread_id: %s/%d\n", current->comm, current->pid); + spin_unlock_bh(&ts->ts_state_lock); + return NULL; + } + ts->thread_clear |= ISCSI_CLEAR_TX_THREAD; spin_unlock_bh(&ts->ts_state_lock); + up(&ts->ts_activate_sem); + return ts->conn; } diff --git a/drivers/target/iscsi/iscsi_target_tq.h b/drivers/target/iscsi/iscsi_target_tq.h index 547d11831282..cc1eede5ab3a 100644 --- a/drivers/target/iscsi/iscsi_target_tq.h +++ b/drivers/target/iscsi/iscsi_target_tq.h @@ -64,10 +64,6 @@ struct iscsi_thread_set { struct iscsi_conn *conn; /* used for controlling ts state accesses */ spinlock_t ts_state_lock; - /* Used for rx side post startup */ - struct completion rx_post_start_comp; - /* Used for tx side post startup */ - struct completion tx_post_start_comp; /* used for restarting thread queue */ struct completion rx_restart_comp; /* used for restarting thread queue */ @@ -82,6 +78,7 @@ struct iscsi_thread_set { struct task_struct *tx_thread; /* struct iscsi_thread_set in list list head*/ struct list_head ts_list; + struct semaphore ts_activate_sem; }; #endif /*** ISCSI_THREAD_QUEUE_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 1df06d5e4e01..f2de28e178fd 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the iSCSI Target specific utility functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -19,6 +17,7 @@ ******************************************************************************/ #include <linux/list.h> +#include <linux/percpu_ida.h> #include <scsi/scsi_tcq.h> #include <scsi/iscsi_proto.h> #include <target/target_core_base.h> @@ -149,18 +148,6 @@ void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd) spin_unlock_bh(&cmd->r2t_lock); } -struct iscsi_cmd *iscsit_alloc_cmd(struct iscsi_conn *conn, gfp_t gfp_mask) -{ - struct iscsi_cmd *cmd; - - cmd = kmem_cache_zalloc(lio_cmd_cache, gfp_mask); - if (!cmd) - return NULL; - - cmd->release_cmd = &iscsit_release_cmd; - return cmd; -} - /* * May be called from software interrupt (timer) context for allocating * iSCSI NopINs. @@ -168,12 +155,15 @@ struct iscsi_cmd *iscsit_alloc_cmd(struct iscsi_conn *conn, gfp_t gfp_mask) struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp_mask) { struct iscsi_cmd *cmd; + struct se_session *se_sess = conn->sess->se_sess; + int size, tag; - cmd = conn->conn_transport->iscsit_alloc_cmd(conn, gfp_mask); - if (!cmd) { - pr_err("Unable to allocate memory for struct iscsi_cmd.\n"); - return NULL; - } + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, gfp_mask); + size = sizeof(struct iscsi_cmd) + conn->conn_transport->priv_size; + cmd = (struct iscsi_cmd *)(se_sess->sess_cmd_map + (tag * size)); + memset(cmd, 0, size); + + cmd->se_cmd.map_tag = tag; cmd->conn = conn; INIT_LIST_HEAD(&cmd->i_conn_node); INIT_LIST_HEAD(&cmd->datain_list); @@ -689,6 +679,16 @@ void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *conn) void iscsit_release_cmd(struct iscsi_cmd *cmd) { + struct iscsi_session *sess; + struct se_cmd *se_cmd = &cmd->se_cmd; + + if (cmd->conn) + sess = cmd->conn->sess; + else + sess = cmd->sess; + + BUG_ON(!sess || !sess->se_sess); + kfree(cmd->buf_ptr); kfree(cmd->pdu_list); kfree(cmd->seq_list); @@ -696,8 +696,9 @@ void iscsit_release_cmd(struct iscsi_cmd *cmd) kfree(cmd->iov_data); kfree(cmd->text_in_ptr); - kmem_cache_free(lio_cmd_cache, cmd); + percpu_ida_free(&sess->se_sess->sess_tag_pool, se_cmd->map_tag); } +EXPORT_SYMBOL(iscsit_release_cmd); static void __iscsit_free_cmd(struct iscsi_cmd *cmd, bool scsi_cmd, bool check_queues) @@ -761,7 +762,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) /* Fall-through */ default: __iscsit_free_cmd(cmd, false, shutdown); - cmd->release_cmd(cmd); + iscsit_release_cmd(cmd); break; } } diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 568ad25f25d3..0f6d69dabca1 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -3,7 +3,7 @@ * This file contains the Linux/SCSI LLD virtual SCSI initiator driver * for emulated SAS initiator ports * - * © Copyright 2011 RisingTide Systems LLC. + * © Copyright 2011-2013 Datera, Inc. * * Licensed to the Linux Foundation under the General Public License (GPL) version 2. * diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index cbe48ab41745..47244102281e 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -3,7 +3,7 @@ * * This file contains SPC-3 compliant asymmetric logical unit assigntment (ALUA) * - * (c) Copyright 2009-2012 RisingTide Systems LLC. + * (c) Copyright 2009-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -557,6 +557,9 @@ target_alua_state_check(struct se_cmd *cmd) * a ALUA logical unit group. */ tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; + if (!tg_pt_gp_mem) + return 0; + spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); @@ -730,7 +733,7 @@ static int core_alua_write_tpg_metadata( if (ret < 0) pr_err("Error writing ALUA metadata file: %s\n", path); fput(file); - return ret ? -EIO : 0; + return (ret < 0) ? -EIO : 0; } /* @@ -1756,10 +1759,10 @@ ssize_t core_alua_store_access_type( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_access_type\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1) && (tmp != 2) && (tmp != 3)) { pr_err("Illegal value for alua_access_type:" @@ -1794,10 +1797,10 @@ ssize_t core_alua_store_nonop_delay_msecs( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract nonop_delay_msecs\n"); - return -EINVAL; + return ret; } if (tmp > ALUA_MAX_NONOP_DELAY_MSECS) { pr_err("Passed nonop_delay_msecs: %lu, exceeds" @@ -1825,10 +1828,10 @@ ssize_t core_alua_store_trans_delay_msecs( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract trans_delay_msecs\n"); - return -EINVAL; + return ret; } if (tmp > ALUA_MAX_TRANS_DELAY_MSECS) { pr_err("Passed trans_delay_msecs: %lu, exceeds" @@ -1856,10 +1859,10 @@ ssize_t core_alua_store_implict_trans_secs( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract implict_trans_secs\n"); - return -EINVAL; + return ret; } if (tmp > ALUA_MAX_IMPLICT_TRANS_SECS) { pr_err("Passed implict_trans_secs: %lu, exceeds" @@ -1887,10 +1890,10 @@ ssize_t core_alua_store_preferred_bit( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract preferred ALUA value\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1)) { pr_err("Illegal value for preferred ALUA: %lu\n", tmp); @@ -1922,10 +1925,10 @@ ssize_t core_alua_store_offline_bit( if (!lun->lun_sep) return -ENODEV; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_tg_pt_offline value\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1)) { pr_err("Illegal value for alua_tg_pt_offline: %lu\n", @@ -1961,10 +1964,10 @@ ssize_t core_alua_store_secondary_status( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_tg_pt_status\n"); - return -EINVAL; + return ret; } if ((tmp != ALUA_STATUS_NONE) && (tmp != ALUA_STATUS_ALTERED_BY_EXPLICT_STPG) && @@ -1994,10 +1997,10 @@ ssize_t core_alua_store_secondary_write_metadata( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_tg_pt_write_md\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1)) { pr_err("Illegal value for alua_tg_pt_write_md:" diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index e4d22933efaf..82e81c542e43 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -3,7 +3,7 @@ * * This file contains ConfigFS logic for the Generic Target Engine project. * - * (c) Copyright 2008-2012 RisingTide Systems LLC. + * (c) Copyright 2008-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -48,6 +48,7 @@ #include "target_core_alua.h" #include "target_core_pr.h" #include "target_core_rd.h" +#include "target_core_xcopy.h" extern struct t10_alua_lu_gp *default_lu_gp; @@ -268,7 +269,7 @@ static struct configfs_subsystem target_core_fabrics = { }, }; -static struct configfs_subsystem *target_core_subsystem[] = { +struct configfs_subsystem *target_core_subsystem[] = { &target_core_fabrics, NULL, }; @@ -577,9 +578,9 @@ static ssize_t target_core_dev_store_attr_##_name( \ unsigned long val; \ int ret; \ \ - ret = strict_strtoul(page, 0, &val); \ + ret = kstrtoul(page, 0, &val); \ if (ret < 0) { \ - pr_err("strict_strtoul() failed with" \ + pr_err("kstrtoul() failed with" \ " ret: %d\n", ret); \ return -EINVAL; \ } \ @@ -636,6 +637,12 @@ SE_DEV_ATTR(emulate_tpu, S_IRUGO | S_IWUSR); DEF_DEV_ATTRIB(emulate_tpws); SE_DEV_ATTR(emulate_tpws, S_IRUGO | S_IWUSR); +DEF_DEV_ATTRIB(emulate_caw); +SE_DEV_ATTR(emulate_caw, S_IRUGO | S_IWUSR); + +DEF_DEV_ATTRIB(emulate_3pc); +SE_DEV_ATTR(emulate_3pc, S_IRUGO | S_IWUSR); + DEF_DEV_ATTRIB(enforce_pr_isids); SE_DEV_ATTR(enforce_pr_isids, S_IRUGO | S_IWUSR); @@ -693,6 +700,8 @@ static struct configfs_attribute *target_core_dev_attrib_attrs[] = { &target_core_dev_attrib_emulate_tas.attr, &target_core_dev_attrib_emulate_tpu.attr, &target_core_dev_attrib_emulate_tpws.attr, + &target_core_dev_attrib_emulate_caw.attr, + &target_core_dev_attrib_emulate_3pc.attr, &target_core_dev_attrib_enforce_pr_isids.attr, &target_core_dev_attrib_is_nonrot.attr, &target_core_dev_attrib_emulate_rest_reord.attr, @@ -1310,9 +1319,9 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( ret = -ENOMEM; goto out; } - ret = strict_strtoull(arg_p, 0, &tmp_ll); + ret = kstrtoull(arg_p, 0, &tmp_ll); if (ret < 0) { - pr_err("strict_strtoull() failed for" + pr_err("kstrtoull() failed for" " sa_res_key=\n"); goto out; } @@ -1836,11 +1845,11 @@ static ssize_t target_core_alua_lu_gp_store_attr_lu_gp_id( unsigned long lu_gp_id; int ret; - ret = strict_strtoul(page, 0, &lu_gp_id); + ret = kstrtoul(page, 0, &lu_gp_id); if (ret < 0) { - pr_err("strict_strtoul() returned %d for" + pr_err("kstrtoul() returned %d for" " lu_gp_id\n", ret); - return -EINVAL; + return ret; } if (lu_gp_id > 0x0000ffff) { pr_err("ALUA lu_gp_id: %lu exceeds maximum:" @@ -2032,11 +2041,11 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_state( return -EINVAL; } - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract new ALUA access state from" " %s\n", page); - return -EINVAL; + return ret; } new_state = (int)tmp; @@ -2079,11 +2088,11 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_status( return -EINVAL; } - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract new ALUA access status" " from %s\n", page); - return -EINVAL; + return ret; } new_status = (int)tmp; @@ -2139,10 +2148,10 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_write_metadata( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_write_metadata\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1)) { @@ -2263,11 +2272,11 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_tg_pt_gp_id( unsigned long tg_pt_gp_id; int ret; - ret = strict_strtoul(page, 0, &tg_pt_gp_id); + ret = kstrtoul(page, 0, &tg_pt_gp_id); if (ret < 0) { - pr_err("strict_strtoul() returned %d for" + pr_err("kstrtoul() returned %d for" " tg_pt_gp_id\n", ret); - return -EINVAL; + return ret; } if (tg_pt_gp_id > 0x0000ffff) { pr_err("ALUA tg_pt_gp_id: %lu exceeds maximum:" @@ -2676,10 +2685,10 @@ static ssize_t target_core_hba_store_attr_hba_mode(struct se_hba *hba, if (transport->pmode_enable_hba == NULL) return -EINVAL; - ret = strict_strtoul(page, 0, &mode_flag); + ret = kstrtoul(page, 0, &mode_flag); if (ret < 0) { pr_err("Unable to extract hba mode flag: %d\n", ret); - return -EINVAL; + return ret; } if (hba->dev_count) { @@ -2767,11 +2776,11 @@ static struct config_group *target_core_call_addhbatotarget( str++; /* Skip to start of plugin dependent ID */ } - ret = strict_strtoul(str, 0, &plugin_dep_id); + ret = kstrtoul(str, 0, &plugin_dep_id); if (ret < 0) { - pr_err("strict_strtoul() returned %d for" + pr_err("kstrtoul() returned %d for" " plugin_dep_id\n", ret); - return ERR_PTR(-EINVAL); + return ERR_PTR(ret); } /* * Load up TCM subsystem plugins if they have not already been loaded. @@ -2927,6 +2936,10 @@ static int __init target_core_init_configfs(void) if (ret < 0) goto out; + ret = target_xcopy_setup_pt(); + if (ret < 0) + goto out; + return 0; out: @@ -2999,6 +3012,7 @@ static void __exit target_core_exit_configfs(void) core_dev_release_virtual_lun0(); rd_module_exit(); + target_xcopy_release_pt(); release_se_kmem_caches(); } diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 8f4142fe5f19..d90dbb0f1a69 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -4,7 +4,7 @@ * This file contains the TCM Virtual Device and Disk Transport * agnostic related functions. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -47,6 +47,9 @@ #include "target_core_pr.h" #include "target_core_ua.h" +DEFINE_MUTEX(g_device_mutex); +LIST_HEAD(g_device_list); + static struct se_hba *lun0_hba; /* not static, needed by tpg.c */ struct se_device *g_lun0_dev; @@ -890,6 +893,32 @@ int se_dev_set_emulate_tpws(struct se_device *dev, int flag) return 0; } +int se_dev_set_emulate_caw(struct se_device *dev, int flag) +{ + if (flag != 0 && flag != 1) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + dev->dev_attrib.emulate_caw = flag; + pr_debug("dev[%p]: SE Device CompareAndWrite (AtomicTestandSet): %d\n", + dev, flag); + + return 0; +} + +int se_dev_set_emulate_3pc(struct se_device *dev, int flag) +{ + if (flag != 0 && flag != 1) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + dev->dev_attrib.emulate_3pc = flag; + pr_debug("dev[%p]: SE Device 3rd Party Copy (EXTENDED_COPY): %d\n", + dev, flag); + + return 0; +} + int se_dev_set_enforce_pr_isids(struct se_device *dev, int flag) { if ((flag != 0) && (flag != 1)) { @@ -1393,6 +1422,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) INIT_LIST_HEAD(&dev->delayed_cmd_list); INIT_LIST_HEAD(&dev->state_list); INIT_LIST_HEAD(&dev->qf_cmd_list); + INIT_LIST_HEAD(&dev->g_dev_node); spin_lock_init(&dev->stats_lock); spin_lock_init(&dev->execute_task_lock); spin_lock_init(&dev->delayed_cmd_lock); @@ -1400,6 +1430,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) spin_lock_init(&dev->se_port_lock); spin_lock_init(&dev->se_tmr_lock); spin_lock_init(&dev->qf_cmd_lock); + sema_init(&dev->caw_sem, 1); atomic_set(&dev->dev_ordered_id, 0); INIT_LIST_HEAD(&dev->t10_wwn.t10_vpd_list); spin_lock_init(&dev->t10_wwn.t10_vpd_lock); @@ -1423,6 +1454,8 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->dev_attrib.emulate_tas = DA_EMULATE_TAS; dev->dev_attrib.emulate_tpu = DA_EMULATE_TPU; dev->dev_attrib.emulate_tpws = DA_EMULATE_TPWS; + dev->dev_attrib.emulate_caw = DA_EMULATE_CAW; + dev->dev_attrib.emulate_3pc = DA_EMULATE_3PC; dev->dev_attrib.enforce_pr_isids = DA_ENFORCE_PR_ISIDS; dev->dev_attrib.is_nonrot = DA_IS_NONROT; dev->dev_attrib.emulate_rest_reord = DA_EMULATE_REST_REORD; @@ -1510,6 +1543,11 @@ int target_configure_device(struct se_device *dev) spin_lock(&hba->device_lock); hba->dev_count++; spin_unlock(&hba->device_lock); + + mutex_lock(&g_device_mutex); + list_add_tail(&dev->g_dev_node, &g_device_list); + mutex_unlock(&g_device_mutex); + return 0; out_free_alua: @@ -1528,6 +1566,10 @@ void target_free_device(struct se_device *dev) if (dev->dev_flags & DF_CONFIGURED) { destroy_workqueue(dev->tmr_wq); + mutex_lock(&g_device_mutex); + list_del(&dev->g_dev_node); + mutex_unlock(&g_device_mutex); + spin_lock(&hba->device_lock); hba->dev_count--; spin_unlock(&hba->device_lock); diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index eb56eb129563..3503996d7d10 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -4,7 +4,7 @@ * This file contains generic fabric module configfs infrastructure for * TCM v4.x code * - * (c) Copyright 2010-2012 RisingTide Systems LLC. + * (c) Copyright 2010-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -189,9 +189,11 @@ static ssize_t target_fabric_mappedlun_store_write_protect( struct se_node_acl *se_nacl = lacl->se_lun_nacl; struct se_portal_group *se_tpg = se_nacl->se_tpg; unsigned long op; + int ret; - if (strict_strtoul(page, 0, &op)) - return -EINVAL; + ret = kstrtoul(page, 0, &op); + if (ret) + return ret; if ((op != 1) && (op != 0)) return -EINVAL; @@ -350,7 +352,10 @@ static struct config_group *target_fabric_make_mappedlun( * Determine the Mapped LUN value. This is what the SCSI Initiator * Port will actually see. */ - if (strict_strtoul(buf + 4, 0, &mapped_lun) || mapped_lun > UINT_MAX) { + ret = kstrtoul(buf + 4, 0, &mapped_lun); + if (ret) + goto out; + if (mapped_lun > UINT_MAX) { ret = -EINVAL; goto out; } @@ -875,7 +880,10 @@ static struct config_group *target_fabric_make_lun( " \"lun_$LUN_NUMBER\"\n"); return ERR_PTR(-EINVAL); } - if (strict_strtoul(name + 4, 0, &unpacked_lun) || unpacked_lun > UINT_MAX) + errno = kstrtoul(name + 4, 0, &unpacked_lun); + if (errno) + return ERR_PTR(errno); + if (unpacked_lun > UINT_MAX) return ERR_PTR(-EINVAL); lun = core_get_lun_from_tpg(se_tpg, unpacked_lun); diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 687b0b0a4aa6..0d1cf8b4f49f 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -4,7 +4,7 @@ * This file contains generic high level protocol identifier and PR * handlers for TCM fabric modules * - * (c) Copyright 2010-2012 RisingTide Systems LLC. + * (c) Copyright 2010-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index b11890d85120..b662f89dedac 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -3,7 +3,7 @@ * * This file contains the Storage Engine <-> FILEIO transport specific functions * - * (c) Copyright 2005-2012 RisingTide Systems LLC. + * (c) Copyright 2005-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -547,11 +547,9 @@ fd_execute_unmap(struct se_cmd *cmd) } static sense_reason_t -fd_execute_rw(struct se_cmd *cmd) +fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, + enum dma_data_direction data_direction) { - struct scatterlist *sgl = cmd->t_data_sg; - u32 sgl_nents = cmd->t_data_nents; - enum dma_data_direction data_direction = cmd->data_direction; struct se_device *dev = cmd->se_dev; int ret = 0; @@ -635,10 +633,10 @@ static ssize_t fd_set_configfs_dev_params(struct se_device *dev, ret = -ENOMEM; break; } - ret = strict_strtoull(arg_p, 0, &fd_dev->fd_dev_size); + ret = kstrtoull(arg_p, 0, &fd_dev->fd_dev_size); kfree(arg_p); if (ret < 0) { - pr_err("strict_strtoull() failed for" + pr_err("kstrtoull() failed for" " fd_dev_size=\n"); goto out; } diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c index d2616cd48f1e..a25051a37dd7 100644 --- a/drivers/target/target_core_hba.c +++ b/drivers/target/target_core_hba.c @@ -3,7 +3,7 @@ * * This file contains the TCM HBA Transport related functions. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index aa1620abec6d..b9a3394fe479 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -4,7 +4,7 @@ * This file contains the Storage Engine <-> Linux BlockIO transport * specific functions. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -536,10 +536,10 @@ static ssize_t iblock_set_configfs_dev_params(struct se_device *dev, ret = -ENOMEM; break; } - ret = strict_strtoul(arg_p, 0, &tmp_readonly); + ret = kstrtoul(arg_p, 0, &tmp_readonly); kfree(arg_p); if (ret < 0) { - pr_err("strict_strtoul() failed for" + pr_err("kstrtoul() failed for" " readonly=\n"); goto out; } @@ -587,11 +587,9 @@ static ssize_t iblock_show_configfs_dev_params(struct se_device *dev, char *b) } static sense_reason_t -iblock_execute_rw(struct se_cmd *cmd) +iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, + enum dma_data_direction data_direction) { - struct scatterlist *sgl = cmd->t_data_sg; - u32 sgl_nents = cmd->t_data_nents; - enum dma_data_direction data_direction = cmd->data_direction; struct se_device *dev = cmd->se_dev; struct iblock_req *ibr; struct bio *bio; diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 18d49df4d0ac..579128abe3f5 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -33,6 +33,8 @@ int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *, int); int se_dev_set_emulate_tas(struct se_device *, int); int se_dev_set_emulate_tpu(struct se_device *, int); int se_dev_set_emulate_tpws(struct se_device *, int); +int se_dev_set_emulate_caw(struct se_device *, int); +int se_dev_set_emulate_3pc(struct se_device *, int); int se_dev_set_enforce_pr_isids(struct se_device *, int); int se_dev_set_is_nonrot(struct se_device *, int); int se_dev_set_emulate_rest_reord(struct se_device *dev, int); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index bd78faf67c6b..d1ae4c5c3ffd 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -4,7 +4,7 @@ * This file contains SPC-3 compliant persistent reservations and * legacy SPC-2 reservations with compatible reservation handling (CRH=1) * - * (c) Copyright 2009-2012 RisingTide Systems LLC. + * (c) Copyright 2009-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -1949,7 +1949,7 @@ static int __core_scsi3_write_aptpl_to_file( pr_debug("Error writing APTPL metadata file: %s\n", path); fput(file); - return ret ? -EIO : 0; + return (ret < 0) ? -EIO : 0; } /* diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index e992b27aa090..551c96ca60ac 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -3,7 +3,7 @@ * * This file contains the generic target mode <-> Linux SCSI subsystem plugin. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -1050,9 +1050,8 @@ pscsi_execute_cmd(struct se_cmd *cmd) req = blk_get_request(pdv->pdv_sd->request_queue, (data_direction == DMA_TO_DEVICE), GFP_KERNEL); - if (!req || IS_ERR(req)) { - pr_err("PSCSI: blk_get_request() failed: %ld\n", - req ? IS_ERR(req) : -ENOMEM); + if (!req) { + pr_err("PSCSI: blk_get_request() failed\n"); ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; goto fail; } diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 51127d15d5c5..131327ac7f5b 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -4,7 +4,7 @@ * This file contains the Storage Engine <-> Ramdisk transport * specific functions. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -280,11 +280,9 @@ static struct rd_dev_sg_table *rd_get_sg_table(struct rd_dev *rd_dev, u32 page) } static sense_reason_t -rd_execute_rw(struct se_cmd *cmd) +rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, + enum dma_data_direction data_direction) { - struct scatterlist *sgl = cmd->t_data_sg; - u32 sgl_nents = cmd->t_data_nents; - enum dma_data_direction data_direction = cmd->data_direction; struct se_device *se_dev = cmd->se_dev; struct rd_dev *dev = RD_DEV(se_dev); struct rd_dev_sg_table *table; diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 8a462773d0c8..6c17295e8d7c 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1,7 +1,7 @@ /* * SCSI Block Commands (SBC) parsing and emulation. * - * (c) Copyright 2002-2012 RisingTide Systems LLC. + * (c) Copyright 2002-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -25,6 +25,7 @@ #include <linux/ratelimit.h> #include <asm/unaligned.h> #include <scsi/scsi.h> +#include <scsi/scsi_tcq.h> #include <target/target_core_base.h> #include <target/target_core_backend.h> @@ -280,13 +281,13 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o return 0; } -static void xdreadwrite_callback(struct se_cmd *cmd) +static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd) { unsigned char *buf, *addr; struct scatterlist *sg; unsigned int offset; - int i; - int count; + sense_reason_t ret = TCM_NO_SENSE; + int i, count; /* * From sbc3r22.pdf section 5.48 XDWRITEREAD (10) command * @@ -301,7 +302,7 @@ static void xdreadwrite_callback(struct se_cmd *cmd) buf = kmalloc(cmd->data_length, GFP_KERNEL); if (!buf) { pr_err("Unable to allocate xor_callback buf\n"); - return; + return TCM_OUT_OF_RESOURCES; } /* * Copy the scatterlist WRITE buffer located at cmd->t_data_sg @@ -320,8 +321,10 @@ static void xdreadwrite_callback(struct se_cmd *cmd) offset = 0; for_each_sg(cmd->t_bidi_data_sg, sg, cmd->t_bidi_data_nents, count) { addr = kmap_atomic(sg_page(sg)); - if (!addr) + if (!addr) { + ret = TCM_OUT_OF_RESOURCES; goto out; + } for (i = 0; i < sg->length; i++) *(addr + sg->offset + i) ^= *(buf + offset + i); @@ -332,6 +335,193 @@ static void xdreadwrite_callback(struct se_cmd *cmd) out: kfree(buf); + return ret; +} + +static sense_reason_t +sbc_execute_rw(struct se_cmd *cmd) +{ + return cmd->execute_rw(cmd, cmd->t_data_sg, cmd->t_data_nents, + cmd->data_direction); +} + +static sense_reason_t compare_and_write_post(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + + cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST; + /* + * Unlock ->caw_sem originally obtained during sbc_compare_and_write() + * before the original READ I/O submission. + */ + up(&dev->caw_sem); + + return TCM_NO_SENSE; +} + +static sense_reason_t compare_and_write_callback(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + struct scatterlist *write_sg = NULL, *sg; + unsigned char *buf, *addr; + struct sg_mapping_iter m; + unsigned int offset = 0, len; + unsigned int nlbas = cmd->t_task_nolb; + unsigned int block_size = dev->dev_attrib.block_size; + unsigned int compare_len = (nlbas * block_size); + sense_reason_t ret = TCM_NO_SENSE; + int rc, i; + + /* + * Handle early failure in transport_generic_request_failure(), + * which will not have taken ->caw_mutex yet.. + */ + if (!cmd->t_data_sg || !cmd->t_bidi_data_sg) + return TCM_NO_SENSE; + + buf = kzalloc(cmd->data_length, GFP_KERNEL); + if (!buf) { + pr_err("Unable to allocate compare_and_write buf\n"); + ret = TCM_OUT_OF_RESOURCES; + goto out; + } + + write_sg = kzalloc(sizeof(struct scatterlist) * cmd->t_data_nents, + GFP_KERNEL); + if (!write_sg) { + pr_err("Unable to allocate compare_and_write sg\n"); + ret = TCM_OUT_OF_RESOURCES; + goto out; + } + /* + * Setup verify and write data payloads from total NumberLBAs. + */ + rc = sg_copy_to_buffer(cmd->t_data_sg, cmd->t_data_nents, buf, + cmd->data_length); + if (!rc) { + pr_err("sg_copy_to_buffer() failed for compare_and_write\n"); + ret = TCM_OUT_OF_RESOURCES; + goto out; + } + /* + * Compare against SCSI READ payload against verify payload + */ + for_each_sg(cmd->t_bidi_data_sg, sg, cmd->t_bidi_data_nents, i) { + addr = (unsigned char *)kmap_atomic(sg_page(sg)); + if (!addr) { + ret = TCM_OUT_OF_RESOURCES; + goto out; + } + + len = min(sg->length, compare_len); + + if (memcmp(addr, buf + offset, len)) { + pr_warn("Detected MISCOMPARE for addr: %p buf: %p\n", + addr, buf + offset); + kunmap_atomic(addr); + goto miscompare; + } + kunmap_atomic(addr); + + offset += len; + compare_len -= len; + if (!compare_len) + break; + } + + i = 0; + len = cmd->t_task_nolb * block_size; + sg_miter_start(&m, cmd->t_data_sg, cmd->t_data_nents, SG_MITER_TO_SG); + /* + * Currently assumes NoLB=1 and SGLs are PAGE_SIZE.. + */ + while (len) { + sg_miter_next(&m); + + if (block_size < PAGE_SIZE) { + sg_set_page(&write_sg[i], m.page, block_size, + block_size); + } else { + sg_miter_next(&m); + sg_set_page(&write_sg[i], m.page, block_size, + 0); + } + len -= block_size; + i++; + } + sg_miter_stop(&m); + /* + * Save the original SGL + nents values before updating to new + * assignments, to be released in transport_free_pages() -> + * transport_reset_sgl_orig() + */ + cmd->t_data_sg_orig = cmd->t_data_sg; + cmd->t_data_sg = write_sg; + cmd->t_data_nents_orig = cmd->t_data_nents; + cmd->t_data_nents = 1; + + cmd->sam_task_attr = MSG_HEAD_TAG; + cmd->transport_complete_callback = compare_and_write_post; + /* + * Now reset ->execute_cmd() to the normal sbc_execute_rw() handler + * for submitting the adjusted SGL to write instance user-data. + */ + cmd->execute_cmd = sbc_execute_rw; + + spin_lock_irq(&cmd->t_state_lock); + cmd->t_state = TRANSPORT_PROCESSING; + cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT; + spin_unlock_irq(&cmd->t_state_lock); + + __target_execute_cmd(cmd); + + kfree(buf); + return ret; + +miscompare: + pr_warn("Target/%s: Send MISCOMPARE check condition and sense\n", + dev->transport->name); + ret = TCM_MISCOMPARE_VERIFY; +out: + /* + * In the MISCOMPARE or failure case, unlock ->caw_sem obtained in + * sbc_compare_and_write() before the original READ I/O submission. + */ + up(&dev->caw_sem); + kfree(write_sg); + kfree(buf); + return ret; +} + +static sense_reason_t +sbc_compare_and_write(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + sense_reason_t ret; + int rc; + /* + * Submit the READ first for COMPARE_AND_WRITE to perform the + * comparision using SGLs at cmd->t_bidi_data_sg.. + */ + rc = down_interruptible(&dev->caw_sem); + if ((rc != 0) || signal_pending(current)) { + cmd->transport_complete_callback = NULL; + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } + + ret = cmd->execute_rw(cmd, cmd->t_bidi_data_sg, cmd->t_bidi_data_nents, + DMA_FROM_DEVICE); + if (ret) { + cmd->transport_complete_callback = NULL; + up(&dev->caw_sem); + return ret; + } + /* + * Unlock of dev->caw_sem to occur in compare_and_write_callback() + * upon MISCOMPARE, or in compare_and_write_done() upon completion + * of WRITE instance user-data. + */ + return TCM_NO_SENSE; } sense_reason_t @@ -348,31 +538,36 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) sectors = transport_get_sectors_6(cdb); cmd->t_task_lba = transport_lba_21(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case READ_10: sectors = transport_get_sectors_10(cdb); cmd->t_task_lba = transport_lba_32(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case READ_12: sectors = transport_get_sectors_12(cdb); cmd->t_task_lba = transport_lba_32(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case READ_16: sectors = transport_get_sectors_16(cdb); cmd->t_task_lba = transport_lba_64(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case WRITE_6: sectors = transport_get_sectors_6(cdb); cmd->t_task_lba = transport_lba_21(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case WRITE_10: case WRITE_VERIFY: @@ -381,7 +576,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case WRITE_12: sectors = transport_get_sectors_12(cdb); @@ -389,7 +585,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case WRITE_16: sectors = transport_get_sectors_16(cdb); @@ -397,7 +594,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case XDWRITEREAD_10: if (cmd->data_direction != DMA_TO_DEVICE || @@ -411,7 +609,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) /* * Setup BIDI XOR callback to be run after I/O completion. */ - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; cmd->transport_complete_callback = &xdreadwrite_callback; if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; @@ -434,7 +633,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) * Setup BIDI XOR callback to be run during after I/O * completion. */ - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; cmd->transport_complete_callback = &xdreadwrite_callback; if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; @@ -461,6 +661,28 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) } break; } + case COMPARE_AND_WRITE: + sectors = cdb[13]; + /* + * Currently enforce COMPARE_AND_WRITE for a single sector + */ + if (sectors > 1) { + pr_err("COMPARE_AND_WRITE contains NoLB: %u greater" + " than 1\n", sectors); + return TCM_INVALID_CDB_FIELD; + } + /* + * Double size because we have two buffers, note that + * zero is not an error.. + */ + size = 2 * sbc_get_size(cmd, sectors); + cmd->t_task_lba = get_unaligned_be64(&cdb[2]); + cmd->t_task_nolb = sectors; + cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB | SCF_COMPARE_AND_WRITE; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_compare_and_write; + cmd->transport_complete_callback = compare_and_write_callback; + break; case READ_CAPACITY: size = READ_CAP_LEN; cmd->execute_cmd = sbc_emulate_readcapacity; @@ -600,7 +822,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return TCM_ADDRESS_OUT_OF_RANGE; } - size = sbc_get_size(cmd, sectors); + if (!(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE)) + size = sbc_get_size(cmd, sectors); } return target_cmd_size_check(cmd, size); diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 9fabbf7214cd..074539558a54 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -1,7 +1,7 @@ /* * SCSI Primary Commands (SPC) parsing and emulation. * - * (c) Copyright 2002-2012 RisingTide Systems LLC. + * (c) Copyright 2002-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -35,7 +35,7 @@ #include "target_core_alua.h" #include "target_core_pr.h" #include "target_core_ua.h" - +#include "target_core_xcopy.h" static void spc_fill_alua_data(struct se_port *port, unsigned char *buf) { @@ -95,6 +95,12 @@ spc_emulate_inquiry_std(struct se_cmd *cmd, unsigned char *buf) */ spc_fill_alua_data(lun->lun_sep, buf); + /* + * Set Third-Party Copy (3PC) bit to indicate support for EXTENDED_COPY + */ + if (dev->dev_attrib.emulate_3pc) + buf[5] |= 0x8; + buf[7] = 0x2; /* CmdQue=1 */ memcpy(&buf[8], "LIO-ORG ", 8); @@ -129,8 +135,8 @@ spc_emulate_evpd_80(struct se_cmd *cmd, unsigned char *buf) return 0; } -static void spc_parse_naa_6h_vendor_specific(struct se_device *dev, - unsigned char *buf) +void spc_parse_naa_6h_vendor_specific(struct se_device *dev, + unsigned char *buf) { unsigned char *p = &dev->t10_wwn.unit_serial[0]; int cnt; @@ -460,6 +466,11 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) /* Set WSNZ to 1 */ buf[4] = 0x01; + /* + * Set MAXIMUM COMPARE AND WRITE LENGTH + */ + if (dev->dev_attrib.emulate_caw) + buf[5] = 0x01; /* * Set OPTIMAL TRANSFER LENGTH GRANULARITY @@ -1250,8 +1261,14 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size) *size = (cdb[6] << 24) | (cdb[7] << 16) | (cdb[8] << 8) | cdb[9]; break; case EXTENDED_COPY: - case READ_ATTRIBUTE: + *size = get_unaligned_be32(&cdb[10]); + cmd->execute_cmd = target_do_xcopy; + break; case RECEIVE_COPY_RESULTS: + *size = get_unaligned_be32(&cdb[10]); + cmd->execute_cmd = target_do_receive_copy_results; + break; + case READ_ATTRIBUTE: case WRITE_ATTRIBUTE: *size = (cdb[10] << 24) | (cdb[11] << 16) | (cdb[12] << 8) | cdb[13]; diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index d154ce797180..9c642e02cba1 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -4,7 +4,7 @@ * Modern ConfigFS group context specific statistics based on original * target_core_mib.c code * - * (c) Copyright 2006-2012 RisingTide Systems LLC. + * (c) Copyright 2006-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 0d7cacb91107..250009909d49 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -3,7 +3,7 @@ * * This file contains SPC-3 task management infrastructure * - * (c) Copyright 2009-2012 RisingTide Systems LLC. + * (c) Copyright 2009-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index aac9d2727e3c..b9a6ec0aa5fe 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -3,7 +3,7 @@ * * This file contains generic Target Portal Group related functions. * - * (c) Copyright 2002-2012 RisingTide Systems LLC. + * (c) Copyright 2002-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index d8e49d79f8cc..84747cc1aac0 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3,7 +3,7 @@ * * This file contains the Generic Target Engine Core. * - * (c) Copyright 2002-2012 RisingTide Systems LLC. + * (c) Copyright 2002-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -67,7 +67,6 @@ struct kmem_cache *t10_alua_tg_pt_gp_mem_cache; static void transport_complete_task_attr(struct se_cmd *cmd); static void transport_handle_queue_full(struct se_cmd *cmd, struct se_device *dev); -static int transport_generic_get_mem(struct se_cmd *cmd); static int transport_put_cmd(struct se_cmd *cmd); static void target_complete_ok_work(struct work_struct *work); @@ -232,6 +231,50 @@ struct se_session *transport_init_session(void) } EXPORT_SYMBOL(transport_init_session); +int transport_alloc_session_tags(struct se_session *se_sess, + unsigned int tag_num, unsigned int tag_size) +{ + int rc; + + se_sess->sess_cmd_map = kzalloc(tag_num * tag_size, GFP_KERNEL); + if (!se_sess->sess_cmd_map) { + pr_err("Unable to allocate se_sess->sess_cmd_map\n"); + return -ENOMEM; + } + + rc = percpu_ida_init(&se_sess->sess_tag_pool, tag_num); + if (rc < 0) { + pr_err("Unable to init se_sess->sess_tag_pool," + " tag_num: %u\n", tag_num); + kfree(se_sess->sess_cmd_map); + se_sess->sess_cmd_map = NULL; + return -ENOMEM; + } + + return 0; +} +EXPORT_SYMBOL(transport_alloc_session_tags); + +struct se_session *transport_init_session_tags(unsigned int tag_num, + unsigned int tag_size) +{ + struct se_session *se_sess; + int rc; + + se_sess = transport_init_session(); + if (IS_ERR(se_sess)) + return se_sess; + + rc = transport_alloc_session_tags(se_sess, tag_num, tag_size); + if (rc < 0) { + transport_free_session(se_sess); + return ERR_PTR(-ENOMEM); + } + + return se_sess; +} +EXPORT_SYMBOL(transport_init_session_tags); + /* * Called with spin_lock_irqsave(&struct se_portal_group->session_lock called. */ @@ -367,6 +410,10 @@ EXPORT_SYMBOL(transport_deregister_session_configfs); void transport_free_session(struct se_session *se_sess) { + if (se_sess->sess_cmd_map) { + percpu_ida_destroy(&se_sess->sess_tag_pool); + kfree(se_sess->sess_cmd_map); + } kmem_cache_free(se_sess_cache, se_sess); } EXPORT_SYMBOL(transport_free_session); @@ -1206,7 +1253,7 @@ int transport_handle_cdb_direct( } EXPORT_SYMBOL(transport_handle_cdb_direct); -static sense_reason_t +sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_count, struct scatterlist *sgl_bidi, u32 sgl_bidi_count) { @@ -1512,6 +1559,13 @@ void transport_generic_request_failure(struct se_cmd *cmd, * For SAM Task Attribute emulation for failed struct se_cmd */ transport_complete_task_attr(cmd); + /* + * Handle special case for COMPARE_AND_WRITE failure, where the + * callback is expected to drop the per device ->caw_mutex. + */ + if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) && + cmd->transport_complete_callback) + cmd->transport_complete_callback(cmd); switch (sense_reason) { case TCM_NON_EXISTENT_LUN: @@ -1579,7 +1633,7 @@ queue_full: } EXPORT_SYMBOL(transport_generic_request_failure); -static void __target_execute_cmd(struct se_cmd *cmd) +void __target_execute_cmd(struct se_cmd *cmd) { sense_reason_t ret; @@ -1784,7 +1838,7 @@ static void transport_complete_qf(struct se_cmd *cmd) ret = cmd->se_tfo->queue_data_in(cmd); break; case DMA_TO_DEVICE: - if (cmd->t_bidi_data_sg) { + if (cmd->se_cmd_flags & SCF_BIDI) { ret = cmd->se_tfo->queue_data_in(cmd); if (ret < 0) break; @@ -1856,10 +1910,25 @@ static void target_complete_ok_work(struct work_struct *work) } /* * Check for a callback, used by amongst other things - * XDWRITE_READ_10 emulation. + * XDWRITE_READ_10 and COMPARE_AND_WRITE emulation. */ - if (cmd->transport_complete_callback) - cmd->transport_complete_callback(cmd); + if (cmd->transport_complete_callback) { + sense_reason_t rc; + + rc = cmd->transport_complete_callback(cmd); + if (!rc && !(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE_POST)) { + return; + } else if (rc) { + ret = transport_send_check_condition_and_sense(cmd, + rc, 0); + if (ret == -EAGAIN || ret == -ENOMEM) + goto queue_full; + + transport_lun_remove_cmd(cmd); + transport_cmd_check_stop_to_fabric(cmd); + return; + } + } switch (cmd->data_direction) { case DMA_FROM_DEVICE: @@ -1885,7 +1954,7 @@ static void target_complete_ok_work(struct work_struct *work) /* * Check if we need to send READ payload for BIDI-COMMAND */ - if (cmd->t_bidi_data_sg) { + if (cmd->se_cmd_flags & SCF_BIDI) { spin_lock(&cmd->se_lun->lun_sep_lock); if (cmd->se_lun->lun_sep) { cmd->se_lun->lun_sep->sep_stats.tx_data_octets += @@ -1930,10 +1999,29 @@ static inline void transport_free_sgl(struct scatterlist *sgl, int nents) kfree(sgl); } +static inline void transport_reset_sgl_orig(struct se_cmd *cmd) +{ + /* + * Check for saved t_data_sg that may be used for COMPARE_AND_WRITE + * emulation, and free + reset pointers if necessary.. + */ + if (!cmd->t_data_sg_orig) + return; + + kfree(cmd->t_data_sg); + cmd->t_data_sg = cmd->t_data_sg_orig; + cmd->t_data_sg_orig = NULL; + cmd->t_data_nents = cmd->t_data_nents_orig; + cmd->t_data_nents_orig = 0; +} + static inline void transport_free_pages(struct se_cmd *cmd) { - if (cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) + if (cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) { + transport_reset_sgl_orig(cmd); return; + } + transport_reset_sgl_orig(cmd); transport_free_sgl(cmd->t_data_sg, cmd->t_data_nents); cmd->t_data_sg = NULL; @@ -2029,24 +2117,22 @@ void transport_kunmap_data_sg(struct se_cmd *cmd) } EXPORT_SYMBOL(transport_kunmap_data_sg); -static int -transport_generic_get_mem(struct se_cmd *cmd) +int +target_alloc_sgl(struct scatterlist **sgl, unsigned int *nents, u32 length, + bool zero_page) { - u32 length = cmd->data_length; - unsigned int nents; + struct scatterlist *sg; struct page *page; - gfp_t zero_flag; + gfp_t zero_flag = (zero_page) ? __GFP_ZERO : 0; + unsigned int nent; int i = 0; - nents = DIV_ROUND_UP(length, PAGE_SIZE); - cmd->t_data_sg = kmalloc(sizeof(struct scatterlist) * nents, GFP_KERNEL); - if (!cmd->t_data_sg) + nent = DIV_ROUND_UP(length, PAGE_SIZE); + sg = kmalloc(sizeof(struct scatterlist) * nent, GFP_KERNEL); + if (!sg) return -ENOMEM; - cmd->t_data_nents = nents; - sg_init_table(cmd->t_data_sg, nents); - - zero_flag = cmd->se_cmd_flags & SCF_SCSI_DATA_CDB ? 0 : __GFP_ZERO; + sg_init_table(sg, nent); while (length) { u32 page_len = min_t(u32, length, PAGE_SIZE); @@ -2054,19 +2140,20 @@ transport_generic_get_mem(struct se_cmd *cmd) if (!page) goto out; - sg_set_page(&cmd->t_data_sg[i], page, page_len, 0); + sg_set_page(&sg[i], page, page_len, 0); length -= page_len; i++; } + *sgl = sg; + *nents = nent; return 0; out: while (i > 0) { i--; - __free_page(sg_page(&cmd->t_data_sg[i])); + __free_page(sg_page(&sg[i])); } - kfree(cmd->t_data_sg); - cmd->t_data_sg = NULL; + kfree(sg); return -ENOMEM; } @@ -2087,7 +2174,27 @@ transport_generic_new_cmd(struct se_cmd *cmd) */ if (!(cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) && cmd->data_length) { - ret = transport_generic_get_mem(cmd); + bool zero_flag = !(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB); + + if ((cmd->se_cmd_flags & SCF_BIDI) || + (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE)) { + u32 bidi_length; + + if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) + bidi_length = cmd->t_task_nolb * + cmd->se_dev->dev_attrib.block_size; + else + bidi_length = cmd->data_length; + + ret = target_alloc_sgl(&cmd->t_bidi_data_sg, + &cmd->t_bidi_data_nents, + bidi_length, zero_flag); + if (ret < 0) + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } + + ret = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents, + cmd->data_length, zero_flag); if (ret < 0) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } @@ -2740,6 +2847,15 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, buffer[SPC_ASC_KEY_OFFSET] = asc; buffer[SPC_ASCQ_KEY_OFFSET] = ascq; break; + case TCM_MISCOMPARE_VERIFY: + /* CURRENT ERROR */ + buffer[0] = 0x70; + buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; + buffer[SPC_SENSE_KEY_OFFSET] = MISCOMPARE; + /* MISCOMPARE DURING VERIFY OPERATION */ + buffer[SPC_ASC_KEY_OFFSET] = 0x1d; + buffer[SPC_ASCQ_KEY_OFFSET] = 0x00; + break; case TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE: default: /* CURRENT ERROR */ diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c index bf0e390ce2d7..b04467e7547c 100644 --- a/drivers/target/target_core_ua.c +++ b/drivers/target/target_core_ua.c @@ -3,7 +3,7 @@ * * This file contains logic for SPC-3 Unit Attention emulation * - * (c) Copyright 2009-2012 RisingTide Systems LLC. + * (c) Copyright 2009-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c new file mode 100644 index 000000000000..4d22e7d2adca --- /dev/null +++ b/drivers/target/target_core_xcopy.c @@ -0,0 +1,1081 @@ +/******************************************************************************* + * Filename: target_core_xcopy.c + * + * This file contains support for SPC-4 Extended-Copy offload with generic + * TCM backends. + * + * Copyright (c) 2011-2013 Datera, Inc. All rights reserved. + * + * Author: + * Nicholas A. Bellinger <nab@daterainc.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + ******************************************************************************/ + +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/configfs.h> +#include <scsi/scsi.h> +#include <scsi/scsi_cmnd.h> +#include <asm/unaligned.h> + +#include <target/target_core_base.h> +#include <target/target_core_backend.h> +#include <target/target_core_fabric.h> +#include <target/target_core_configfs.h> + +#include "target_core_pr.h" +#include "target_core_ua.h" +#include "target_core_xcopy.h" + +static struct workqueue_struct *xcopy_wq = NULL; +/* + * From target_core_spc.c + */ +extern void spc_parse_naa_6h_vendor_specific(struct se_device *, unsigned char *); +/* + * From target_core_device.c + */ +extern struct mutex g_device_mutex; +extern struct list_head g_device_list; +/* + * From target_core_configfs.c + */ +extern struct configfs_subsystem *target_core_subsystem[]; + +static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf) +{ + int off = 0; + + buf[off++] = (0x6 << 4); + buf[off++] = 0x01; + buf[off++] = 0x40; + buf[off] = (0x5 << 4); + + spc_parse_naa_6h_vendor_specific(dev, &buf[off]); + return 0; +} + +static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op *xop, + bool src) +{ + struct se_device *se_dev; + struct configfs_subsystem *subsys = target_core_subsystem[0]; + unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn; + int rc; + + if (src == true) + dev_wwn = &xop->dst_tid_wwn[0]; + else + dev_wwn = &xop->src_tid_wwn[0]; + + mutex_lock(&g_device_mutex); + list_for_each_entry(se_dev, &g_device_list, g_dev_node) { + + memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); + target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]); + + rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); + if (rc != 0) + continue; + + if (src == true) { + xop->dst_dev = se_dev; + pr_debug("XCOPY 0xe4: Setting xop->dst_dev: %p from located" + " se_dev\n", xop->dst_dev); + } else { + xop->src_dev = se_dev; + pr_debug("XCOPY 0xe4: Setting xop->src_dev: %p from located" + " se_dev\n", xop->src_dev); + } + + rc = configfs_depend_item(subsys, + &se_dev->dev_group.cg_item); + if (rc != 0) { + pr_err("configfs_depend_item attempt failed:" + " %d for se_dev: %p\n", rc, se_dev); + mutex_unlock(&g_device_mutex); + return rc; + } + + pr_debug("Called configfs_depend_item for subsys: %p se_dev: %p" + " se_dev->se_dev_group: %p\n", subsys, se_dev, + &se_dev->dev_group); + + mutex_unlock(&g_device_mutex); + return 0; + } + mutex_unlock(&g_device_mutex); + + pr_err("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); + return -EINVAL; +} + +static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop, + unsigned char *p, bool src) +{ + unsigned char *desc = p; + unsigned short ript; + u8 desig_len; + /* + * Extract RELATIVE INITIATOR PORT IDENTIFIER + */ + ript = get_unaligned_be16(&desc[2]); + pr_debug("XCOPY 0xe4: RELATIVE INITIATOR PORT IDENTIFIER: %hu\n", ript); + /* + * Check for supported code set, association, and designator type + */ + if ((desc[4] & 0x0f) != 0x1) { + pr_err("XCOPY 0xe4: code set of non binary type not supported\n"); + return -EINVAL; + } + if ((desc[5] & 0x30) != 0x00) { + pr_err("XCOPY 0xe4: association other than LUN not supported\n"); + return -EINVAL; + } + if ((desc[5] & 0x0f) != 0x3) { + pr_err("XCOPY 0xe4: designator type unsupported: 0x%02x\n", + (desc[5] & 0x0f)); + return -EINVAL; + } + /* + * Check for matching 16 byte length for NAA IEEE Registered Extended + * Assigned designator + */ + desig_len = desc[7]; + if (desig_len != 16) { + pr_err("XCOPY 0xe4: invalid desig_len: %d\n", (int)desig_len); + return -EINVAL; + } + pr_debug("XCOPY 0xe4: desig_len: %d\n", (int)desig_len); + /* + * Check for NAA IEEE Registered Extended Assigned header.. + */ + if ((desc[8] & 0xf0) != 0x60) { + pr_err("XCOPY 0xe4: Unsupported DESIGNATOR TYPE: 0x%02x\n", + (desc[8] & 0xf0)); + return -EINVAL; + } + + if (src == true) { + memcpy(&xop->src_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN); + /* + * Determine if the source designator matches the local device + */ + if (!memcmp(&xop->local_dev_wwn[0], &xop->src_tid_wwn[0], + XCOPY_NAA_IEEE_REGEX_LEN)) { + xop->op_origin = XCOL_SOURCE_RECV_OP; + xop->src_dev = se_cmd->se_dev; + pr_debug("XCOPY 0xe4: Set xop->src_dev %p from source" + " received xop\n", xop->src_dev); + } + } else { + memcpy(&xop->dst_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN); + /* + * Determine if the destination designator matches the local device + */ + if (!memcmp(&xop->local_dev_wwn[0], &xop->dst_tid_wwn[0], + XCOPY_NAA_IEEE_REGEX_LEN)) { + xop->op_origin = XCOL_DEST_RECV_OP; + xop->dst_dev = se_cmd->se_dev; + pr_debug("XCOPY 0xe4: Set xop->dst_dev: %p from destination" + " received xop\n", xop->dst_dev); + } + } + + return 0; +} + +static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, + struct xcopy_op *xop, unsigned char *p, + unsigned short tdll) +{ + struct se_device *local_dev = se_cmd->se_dev; + unsigned char *desc = p; + int offset = tdll % XCOPY_TARGET_DESC_LEN, rc, ret = 0; + unsigned short start = 0; + bool src = true; + + if (offset != 0) { + pr_err("XCOPY target descriptor list length is not" + " multiple of %d\n", XCOPY_TARGET_DESC_LEN); + return -EINVAL; + } + if (tdll > 64) { + pr_err("XCOPY target descriptor supports a maximum" + " two src/dest descriptors, tdll: %hu too large..\n", tdll); + return -EINVAL; + } + /* + * Generate an IEEE Registered Extended designator based upon the + * se_device the XCOPY was received upon.. + */ + memset(&xop->local_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); + target_xcopy_gen_naa_ieee(local_dev, &xop->local_dev_wwn[0]); + + while (start < tdll) { + /* + * Check target descriptor identification with 0xE4 type with + * use VPD 0x83 WWPN matching .. + */ + switch (desc[0]) { + case 0xe4: + rc = target_xcopy_parse_tiddesc_e4(se_cmd, xop, + &desc[0], src); + if (rc != 0) + goto out; + /* + * Assume target descriptors are in source -> destination order.. + */ + if (src == true) + src = false; + else + src = true; + start += XCOPY_TARGET_DESC_LEN; + desc += XCOPY_TARGET_DESC_LEN; + ret++; + break; + default: + pr_err("XCOPY unsupported descriptor type code:" + " 0x%02x\n", desc[0]); + goto out; + } + } + + if (xop->op_origin == XCOL_SOURCE_RECV_OP) + rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, true); + else + rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, false); + + if (rc < 0) + goto out; + + pr_debug("XCOPY TGT desc: Source dev: %p NAA IEEE WWN: 0x%16phN\n", + xop->src_dev, &xop->src_tid_wwn[0]); + pr_debug("XCOPY TGT desc: Dest dev: %p NAA IEEE WWN: 0x%16phN\n", + xop->dst_dev, &xop->dst_tid_wwn[0]); + + return ret; + +out: + return -EINVAL; +} + +static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op *xop, + unsigned char *p) +{ + unsigned char *desc = p; + int dc = (desc[1] & 0x02); + unsigned short desc_len; + + desc_len = get_unaligned_be16(&desc[2]); + if (desc_len != 0x18) { + pr_err("XCOPY segment desc 0x02: Illegal desc_len:" + " %hu\n", desc_len); + return -EINVAL; + } + + xop->stdi = get_unaligned_be16(&desc[4]); + xop->dtdi = get_unaligned_be16(&desc[6]); + pr_debug("XCOPY seg desc 0x02: desc_len: %hu stdi: %hu dtdi: %hu, DC: %d\n", + desc_len, xop->stdi, xop->dtdi, dc); + + xop->nolb = get_unaligned_be16(&desc[10]); + xop->src_lba = get_unaligned_be64(&desc[12]); + xop->dst_lba = get_unaligned_be64(&desc[20]); + pr_debug("XCOPY seg desc 0x02: nolb: %hu src_lba: %llu dst_lba: %llu\n", + xop->nolb, (unsigned long long)xop->src_lba, + (unsigned long long)xop->dst_lba); + + if (dc != 0) { + xop->dbl = (desc[29] << 16) & 0xff; + xop->dbl |= (desc[30] << 8) & 0xff; + xop->dbl |= desc[31] & 0xff; + + pr_debug("XCOPY seg desc 0x02: DC=1 w/ dbl: %u\n", xop->dbl); + } + return 0; +} + +static int target_xcopy_parse_segment_descriptors(struct se_cmd *se_cmd, + struct xcopy_op *xop, unsigned char *p, + unsigned int sdll) +{ + unsigned char *desc = p; + unsigned int start = 0; + int offset = sdll % XCOPY_SEGMENT_DESC_LEN, rc, ret = 0; + + if (offset != 0) { + pr_err("XCOPY segment descriptor list length is not" + " multiple of %d\n", XCOPY_SEGMENT_DESC_LEN); + return -EINVAL; + } + + while (start < sdll) { + /* + * Check segment descriptor type code for block -> block + */ + switch (desc[0]) { + case 0x02: + rc = target_xcopy_parse_segdesc_02(se_cmd, xop, desc); + if (rc < 0) + goto out; + + ret++; + start += XCOPY_SEGMENT_DESC_LEN; + desc += XCOPY_SEGMENT_DESC_LEN; + break; + default: + pr_err("XCOPY unspported segment descriptor" + "type: 0x%02x\n", desc[0]); + goto out; + } + } + + return ret; + +out: + return -EINVAL; +} + +/* + * Start xcopy_pt ops + */ + +struct xcopy_pt_cmd { + bool remote_port; + struct se_cmd se_cmd; + struct xcopy_op *xcopy_op; + struct completion xpt_passthrough_sem; +}; + +static struct se_port xcopy_pt_port; +static struct se_portal_group xcopy_pt_tpg; +static struct se_session xcopy_pt_sess; +static struct se_node_acl xcopy_pt_nacl; + +static char *xcopy_pt_get_fabric_name(void) +{ + return "xcopy-pt"; +} + +static u32 xcopy_pt_get_tag(struct se_cmd *se_cmd) +{ + return 0; +} + +static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd) +{ + return 0; +} + +static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop) +{ + struct configfs_subsystem *subsys = target_core_subsystem[0]; + struct se_device *remote_dev; + + if (xop->op_origin == XCOL_SOURCE_RECV_OP) + remote_dev = xop->dst_dev; + else + remote_dev = xop->src_dev; + + pr_debug("Calling configfs_undepend_item for subsys: %p" + " remote_dev: %p remote_dev->dev_group: %p\n", + subsys, remote_dev, &remote_dev->dev_group.cg_item); + + configfs_undepend_item(subsys, &remote_dev->dev_group.cg_item); +} + +static void xcopy_pt_release_cmd(struct se_cmd *se_cmd) +{ + struct xcopy_pt_cmd *xpt_cmd = container_of(se_cmd, + struct xcopy_pt_cmd, se_cmd); + + if (xpt_cmd->remote_port) + kfree(se_cmd->se_lun); + + kfree(xpt_cmd); +} + +static int xcopy_pt_check_stop_free(struct se_cmd *se_cmd) +{ + struct xcopy_pt_cmd *xpt_cmd = container_of(se_cmd, + struct xcopy_pt_cmd, se_cmd); + + complete(&xpt_cmd->xpt_passthrough_sem); + return 0; +} + +static int xcopy_pt_write_pending(struct se_cmd *se_cmd) +{ + return 0; +} + +static int xcopy_pt_write_pending_status(struct se_cmd *se_cmd) +{ + return 0; +} + +static int xcopy_pt_queue_data_in(struct se_cmd *se_cmd) +{ + return 0; +} + +static int xcopy_pt_queue_status(struct se_cmd *se_cmd) +{ + return 0; +} + +static struct target_core_fabric_ops xcopy_pt_tfo = { + .get_fabric_name = xcopy_pt_get_fabric_name, + .get_task_tag = xcopy_pt_get_tag, + .get_cmd_state = xcopy_pt_get_cmd_state, + .release_cmd = xcopy_pt_release_cmd, + .check_stop_free = xcopy_pt_check_stop_free, + .write_pending = xcopy_pt_write_pending, + .write_pending_status = xcopy_pt_write_pending_status, + .queue_data_in = xcopy_pt_queue_data_in, + .queue_status = xcopy_pt_queue_status, +}; + +/* + * End xcopy_pt_ops + */ + +int target_xcopy_setup_pt(void) +{ + xcopy_wq = alloc_workqueue("xcopy_wq", WQ_MEM_RECLAIM, 0); + if (!xcopy_wq) { + pr_err("Unable to allocate xcopy_wq\n"); + return -ENOMEM; + } + + memset(&xcopy_pt_port, 0, sizeof(struct se_port)); + INIT_LIST_HEAD(&xcopy_pt_port.sep_alua_list); + INIT_LIST_HEAD(&xcopy_pt_port.sep_list); + mutex_init(&xcopy_pt_port.sep_tg_pt_md_mutex); + + memset(&xcopy_pt_tpg, 0, sizeof(struct se_portal_group)); + INIT_LIST_HEAD(&xcopy_pt_tpg.se_tpg_node); + INIT_LIST_HEAD(&xcopy_pt_tpg.acl_node_list); + INIT_LIST_HEAD(&xcopy_pt_tpg.tpg_sess_list); + + xcopy_pt_port.sep_tpg = &xcopy_pt_tpg; + xcopy_pt_tpg.se_tpg_tfo = &xcopy_pt_tfo; + + memset(&xcopy_pt_nacl, 0, sizeof(struct se_node_acl)); + INIT_LIST_HEAD(&xcopy_pt_nacl.acl_list); + INIT_LIST_HEAD(&xcopy_pt_nacl.acl_sess_list); + memset(&xcopy_pt_sess, 0, sizeof(struct se_session)); + INIT_LIST_HEAD(&xcopy_pt_sess.sess_list); + INIT_LIST_HEAD(&xcopy_pt_sess.sess_acl_list); + + xcopy_pt_nacl.se_tpg = &xcopy_pt_tpg; + xcopy_pt_nacl.nacl_sess = &xcopy_pt_sess; + + xcopy_pt_sess.se_tpg = &xcopy_pt_tpg; + xcopy_pt_sess.se_node_acl = &xcopy_pt_nacl; + + return 0; +} + +void target_xcopy_release_pt(void) +{ + if (xcopy_wq) + destroy_workqueue(xcopy_wq); +} + +static void target_xcopy_setup_pt_port( + struct xcopy_pt_cmd *xpt_cmd, + struct xcopy_op *xop, + bool remote_port) +{ + struct se_cmd *ec_cmd = xop->xop_se_cmd; + struct se_cmd *pt_cmd = &xpt_cmd->se_cmd; + + if (xop->op_origin == XCOL_SOURCE_RECV_OP) { + /* + * Honor destination port reservations for X-COPY PUSH emulation + * when CDB is received on local source port, and READs blocks to + * WRITE on remote destination port. + */ + if (remote_port) { + xpt_cmd->remote_port = remote_port; + pt_cmd->se_lun->lun_sep = &xcopy_pt_port; + pr_debug("Setup emulated remote DEST xcopy_pt_port: %p to" + " cmd->se_lun->lun_sep for X-COPY data PUSH\n", + pt_cmd->se_lun->lun_sep); + } else { + pt_cmd->se_lun = ec_cmd->se_lun; + pt_cmd->se_dev = ec_cmd->se_dev; + + pr_debug("Honoring local SRC port from ec_cmd->se_dev:" + " %p\n", pt_cmd->se_dev); + pt_cmd->se_lun = ec_cmd->se_lun; + pr_debug("Honoring local SRC port from ec_cmd->se_lun: %p\n", + pt_cmd->se_lun); + } + } else { + /* + * Honor source port reservation for X-COPY PULL emulation + * when CDB is received on local desintation port, and READs + * blocks from the remote source port to WRITE on local + * destination port. + */ + if (remote_port) { + xpt_cmd->remote_port = remote_port; + pt_cmd->se_lun->lun_sep = &xcopy_pt_port; + pr_debug("Setup emulated remote SRC xcopy_pt_port: %p to" + " cmd->se_lun->lun_sep for X-COPY data PULL\n", + pt_cmd->se_lun->lun_sep); + } else { + pt_cmd->se_lun = ec_cmd->se_lun; + pt_cmd->se_dev = ec_cmd->se_dev; + + pr_debug("Honoring local DST port from ec_cmd->se_dev:" + " %p\n", pt_cmd->se_dev); + pt_cmd->se_lun = ec_cmd->se_lun; + pr_debug("Honoring local DST port from ec_cmd->se_lun: %p\n", + pt_cmd->se_lun); + } + } +} + +static int target_xcopy_init_pt_lun( + struct xcopy_pt_cmd *xpt_cmd, + struct xcopy_op *xop, + struct se_device *se_dev, + struct se_cmd *pt_cmd, + bool remote_port) +{ + /* + * Don't allocate + init an pt_cmd->se_lun if honoring local port for + * reservations. The pt_cmd->se_lun pointer will be setup from within + * target_xcopy_setup_pt_port() + */ + if (remote_port == false) { + pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH; + return 0; + } + + pt_cmd->se_lun = kzalloc(sizeof(struct se_lun), GFP_KERNEL); + if (!pt_cmd->se_lun) { + pr_err("Unable to allocate pt_cmd->se_lun\n"); + return -ENOMEM; + } + init_completion(&pt_cmd->se_lun->lun_shutdown_comp); + INIT_LIST_HEAD(&pt_cmd->se_lun->lun_cmd_list); + INIT_LIST_HEAD(&pt_cmd->se_lun->lun_acl_list); + spin_lock_init(&pt_cmd->se_lun->lun_acl_lock); + spin_lock_init(&pt_cmd->se_lun->lun_cmd_lock); + spin_lock_init(&pt_cmd->se_lun->lun_sep_lock); + + pt_cmd->se_dev = se_dev; + + pr_debug("Setup emulated se_dev: %p from se_dev\n", pt_cmd->se_dev); + pt_cmd->se_lun->lun_se_dev = se_dev; + pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH; + + pr_debug("Setup emulated se_dev: %p to pt_cmd->se_lun->lun_se_dev\n", + pt_cmd->se_lun->lun_se_dev); + + return 0; +} + +static int target_xcopy_setup_pt_cmd( + struct xcopy_pt_cmd *xpt_cmd, + struct xcopy_op *xop, + struct se_device *se_dev, + unsigned char *cdb, + bool remote_port, + bool alloc_mem) +{ + struct se_cmd *cmd = &xpt_cmd->se_cmd; + sense_reason_t sense_rc; + int ret = 0, rc; + /* + * Setup LUN+port to honor reservations based upon xop->op_origin for + * X-COPY PUSH or X-COPY PULL based upon where the CDB was received. + */ + rc = target_xcopy_init_pt_lun(xpt_cmd, xop, se_dev, cmd, remote_port); + if (rc < 0) { + ret = rc; + goto out; + } + xpt_cmd->xcopy_op = xop; + target_xcopy_setup_pt_port(xpt_cmd, xop, remote_port); + + sense_rc = target_setup_cmd_from_cdb(cmd, cdb); + if (sense_rc) { + ret = -EINVAL; + goto out; + } + + if (alloc_mem) { + rc = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents, + cmd->data_length, false); + if (rc < 0) { + ret = rc; + goto out; + } + /* + * Set this bit so that transport_free_pages() allows the + * caller to release SGLs + physical memory allocated by + * transport_generic_get_mem().. + */ + cmd->se_cmd_flags |= SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; + } else { + /* + * Here the previously allocated SGLs for the internal READ + * are mapped zero-copy to the internal WRITE. + */ + sense_rc = transport_generic_map_mem_to_cmd(cmd, + xop->xop_data_sg, xop->xop_data_nents, + NULL, 0); + if (sense_rc) { + ret = -EINVAL; + goto out; + } + + pr_debug("Setup PASSTHROUGH_NOALLOC t_data_sg: %p t_data_nents:" + " %u\n", cmd->t_data_sg, cmd->t_data_nents); + } + + return 0; + +out: + if (remote_port == true) + kfree(cmd->se_lun); + return ret; +} + +static int target_xcopy_issue_pt_cmd(struct xcopy_pt_cmd *xpt_cmd) +{ + struct se_cmd *se_cmd = &xpt_cmd->se_cmd; + sense_reason_t sense_rc; + + sense_rc = transport_generic_new_cmd(se_cmd); + if (sense_rc) + return -EINVAL; + + if (se_cmd->data_direction == DMA_TO_DEVICE) + target_execute_cmd(se_cmd); + + wait_for_completion_interruptible(&xpt_cmd->xpt_passthrough_sem); + + pr_debug("target_xcopy_issue_pt_cmd(): SCSI status: 0x%02x\n", + se_cmd->scsi_status); + return 0; +} + +static int target_xcopy_read_source( + struct se_cmd *ec_cmd, + struct xcopy_op *xop, + struct se_device *src_dev, + sector_t src_lba, + u32 src_sectors) +{ + struct xcopy_pt_cmd *xpt_cmd; + struct se_cmd *se_cmd; + u32 length = (src_sectors * src_dev->dev_attrib.block_size); + int rc; + unsigned char cdb[16]; + bool remote_port = (xop->op_origin == XCOL_DEST_RECV_OP); + + xpt_cmd = kzalloc(sizeof(struct xcopy_pt_cmd), GFP_KERNEL); + if (!xpt_cmd) { + pr_err("Unable to allocate xcopy_pt_cmd\n"); + return -ENOMEM; + } + init_completion(&xpt_cmd->xpt_passthrough_sem); + se_cmd = &xpt_cmd->se_cmd; + + memset(&cdb[0], 0, 16); + cdb[0] = READ_16; + put_unaligned_be64(src_lba, &cdb[2]); + put_unaligned_be32(src_sectors, &cdb[10]); + pr_debug("XCOPY: Built READ_16: LBA: %llu Sectors: %u Length: %u\n", + (unsigned long long)src_lba, src_sectors, length); + + transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, + DMA_FROM_DEVICE, 0, NULL); + xop->src_pt_cmd = xpt_cmd; + + rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, src_dev, &cdb[0], + remote_port, true); + if (rc < 0) { + transport_generic_free_cmd(se_cmd, 0); + return rc; + } + + xop->xop_data_sg = se_cmd->t_data_sg; + xop->xop_data_nents = se_cmd->t_data_nents; + pr_debug("XCOPY-READ: Saved xop->xop_data_sg: %p, num: %u for READ" + " memory\n", xop->xop_data_sg, xop->xop_data_nents); + + rc = target_xcopy_issue_pt_cmd(xpt_cmd); + if (rc < 0) { + transport_generic_free_cmd(se_cmd, 0); + return rc; + } + /* + * Clear off the allocated t_data_sg, that has been saved for + * zero-copy WRITE submission reuse in struct xcopy_op.. + */ + se_cmd->t_data_sg = NULL; + se_cmd->t_data_nents = 0; + + return 0; +} + +static int target_xcopy_write_destination( + struct se_cmd *ec_cmd, + struct xcopy_op *xop, + struct se_device *dst_dev, + sector_t dst_lba, + u32 dst_sectors) +{ + struct xcopy_pt_cmd *xpt_cmd; + struct se_cmd *se_cmd; + u32 length = (dst_sectors * dst_dev->dev_attrib.block_size); + int rc; + unsigned char cdb[16]; + bool remote_port = (xop->op_origin == XCOL_SOURCE_RECV_OP); + + xpt_cmd = kzalloc(sizeof(struct xcopy_pt_cmd), GFP_KERNEL); + if (!xpt_cmd) { + pr_err("Unable to allocate xcopy_pt_cmd\n"); + return -ENOMEM; + } + init_completion(&xpt_cmd->xpt_passthrough_sem); + se_cmd = &xpt_cmd->se_cmd; + + memset(&cdb[0], 0, 16); + cdb[0] = WRITE_16; + put_unaligned_be64(dst_lba, &cdb[2]); + put_unaligned_be32(dst_sectors, &cdb[10]); + pr_debug("XCOPY: Built WRITE_16: LBA: %llu Sectors: %u Length: %u\n", + (unsigned long long)dst_lba, dst_sectors, length); + + transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, + DMA_TO_DEVICE, 0, NULL); + xop->dst_pt_cmd = xpt_cmd; + + rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, dst_dev, &cdb[0], + remote_port, false); + if (rc < 0) { + struct se_cmd *src_cmd = &xop->src_pt_cmd->se_cmd; + /* + * If the failure happened before the t_mem_list hand-off in + * target_xcopy_setup_pt_cmd(), Reset memory + clear flag so that + * core releases this memory on error during X-COPY WRITE I/O. + */ + src_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; + src_cmd->t_data_sg = xop->xop_data_sg; + src_cmd->t_data_nents = xop->xop_data_nents; + + transport_generic_free_cmd(se_cmd, 0); + return rc; + } + + rc = target_xcopy_issue_pt_cmd(xpt_cmd); + if (rc < 0) { + se_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; + transport_generic_free_cmd(se_cmd, 0); + return rc; + } + + return 0; +} + +static void target_xcopy_do_work(struct work_struct *work) +{ + struct xcopy_op *xop = container_of(work, struct xcopy_op, xop_work); + struct se_device *src_dev = xop->src_dev, *dst_dev = xop->dst_dev; + struct se_cmd *ec_cmd = xop->xop_se_cmd; + sector_t src_lba = xop->src_lba, dst_lba = xop->dst_lba, end_lba; + unsigned int max_sectors; + int rc; + unsigned short nolb = xop->nolb, cur_nolb, max_nolb, copied_nolb = 0; + + end_lba = src_lba + nolb; + /* + * Break up XCOPY I/O into hw_max_sectors sized I/O based on the + * smallest max_sectors between src_dev + dev_dev, or + */ + max_sectors = min(src_dev->dev_attrib.hw_max_sectors, + dst_dev->dev_attrib.hw_max_sectors); + max_sectors = min_t(u32, max_sectors, XCOPY_MAX_SECTORS); + + max_nolb = min_t(u16, max_sectors, ((u16)(~0U))); + + pr_debug("target_xcopy_do_work: nolb: %hu, max_nolb: %hu end_lba: %llu\n", + nolb, max_nolb, (unsigned long long)end_lba); + pr_debug("target_xcopy_do_work: Starting src_lba: %llu, dst_lba: %llu\n", + (unsigned long long)src_lba, (unsigned long long)dst_lba); + + while (src_lba < end_lba) { + cur_nolb = min(nolb, max_nolb); + + pr_debug("target_xcopy_do_work: Calling read src_dev: %p src_lba: %llu," + " cur_nolb: %hu\n", src_dev, (unsigned long long)src_lba, cur_nolb); + + rc = target_xcopy_read_source(ec_cmd, xop, src_dev, src_lba, cur_nolb); + if (rc < 0) + goto out; + + src_lba += cur_nolb; + pr_debug("target_xcopy_do_work: Incremented READ src_lba to %llu\n", + (unsigned long long)src_lba); + + pr_debug("target_xcopy_do_work: Calling write dst_dev: %p dst_lba: %llu," + " cur_nolb: %hu\n", dst_dev, (unsigned long long)dst_lba, cur_nolb); + + rc = target_xcopy_write_destination(ec_cmd, xop, dst_dev, + dst_lba, cur_nolb); + if (rc < 0) { + transport_generic_free_cmd(&xop->src_pt_cmd->se_cmd, 0); + goto out; + } + + dst_lba += cur_nolb; + pr_debug("target_xcopy_do_work: Incremented WRITE dst_lba to %llu\n", + (unsigned long long)dst_lba); + + copied_nolb += cur_nolb; + nolb -= cur_nolb; + + transport_generic_free_cmd(&xop->src_pt_cmd->se_cmd, 0); + xop->dst_pt_cmd->se_cmd.se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; + + transport_generic_free_cmd(&xop->dst_pt_cmd->se_cmd, 0); + } + + xcopy_pt_undepend_remotedev(xop); + kfree(xop); + + pr_debug("target_xcopy_do_work: Final src_lba: %llu, dst_lba: %llu\n", + (unsigned long long)src_lba, (unsigned long long)dst_lba); + pr_debug("target_xcopy_do_work: Blocks copied: %hu, Bytes Copied: %u\n", + copied_nolb, copied_nolb * dst_dev->dev_attrib.block_size); + + pr_debug("target_xcopy_do_work: Setting X-COPY GOOD status -> sending response\n"); + target_complete_cmd(ec_cmd, SAM_STAT_GOOD); + return; + +out: + xcopy_pt_undepend_remotedev(xop); + kfree(xop); + + pr_warn("target_xcopy_do_work: Setting X-COPY CHECK_CONDITION -> sending response\n"); + ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION; + target_complete_cmd(ec_cmd, SAM_STAT_CHECK_CONDITION); +} + +sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) +{ + struct xcopy_op *xop = NULL; + unsigned char *p = NULL, *seg_desc; + unsigned int list_id, list_id_usage, sdll, inline_dl, sa; + int rc; + unsigned short tdll; + + sa = se_cmd->t_task_cdb[1] & 0x1f; + if (sa != 0x00) { + pr_err("EXTENDED_COPY(LID4) not supported\n"); + return TCM_UNSUPPORTED_SCSI_OPCODE; + } + + p = transport_kmap_data_sg(se_cmd); + if (!p) { + pr_err("transport_kmap_data_sg() failed in target_do_xcopy\n"); + return TCM_OUT_OF_RESOURCES; + } + + list_id = p[0]; + if (list_id != 0x00) { + pr_err("XCOPY with non zero list_id: 0x%02x\n", list_id); + goto out; + } + list_id_usage = (p[1] & 0x18); + /* + * Determine TARGET DESCRIPTOR LIST LENGTH + SEGMENT DESCRIPTOR LIST LENGTH + */ + tdll = get_unaligned_be16(&p[2]); + sdll = get_unaligned_be32(&p[8]); + + inline_dl = get_unaligned_be32(&p[12]); + if (inline_dl != 0) { + pr_err("XCOPY with non zero inline data length\n"); + goto out; + } + + xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL); + if (!xop) { + pr_err("Unable to allocate xcopy_op\n"); + goto out; + } + xop->xop_se_cmd = se_cmd; + + pr_debug("Processing XCOPY with list_id: 0x%02x list_id_usage: 0x%02x" + " tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage, + tdll, sdll, inline_dl); + + rc = target_xcopy_parse_target_descriptors(se_cmd, xop, &p[16], tdll); + if (rc <= 0) + goto out; + + pr_debug("XCOPY: Processed %d target descriptors, length: %u\n", rc, + rc * XCOPY_TARGET_DESC_LEN); + seg_desc = &p[16]; + seg_desc += (rc * XCOPY_TARGET_DESC_LEN); + + rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc, sdll); + if (rc <= 0) { + xcopy_pt_undepend_remotedev(xop); + goto out; + } + transport_kunmap_data_sg(se_cmd); + + pr_debug("XCOPY: Processed %d segment descriptors, length: %u\n", rc, + rc * XCOPY_SEGMENT_DESC_LEN); + INIT_WORK(&xop->xop_work, target_xcopy_do_work); + queue_work(xcopy_wq, &xop->xop_work); + return TCM_NO_SENSE; + +out: + if (p) + transport_kunmap_data_sg(se_cmd); + kfree(xop); + return TCM_INVALID_CDB_FIELD; +} + +static sense_reason_t target_rcr_operating_parameters(struct se_cmd *se_cmd) +{ + unsigned char *p; + + p = transport_kmap_data_sg(se_cmd); + if (!p) { + pr_err("transport_kmap_data_sg failed in" + " target_rcr_operating_parameters\n"); + return TCM_OUT_OF_RESOURCES; + } + + if (se_cmd->data_length < 54) { + pr_err("Receive Copy Results Op Parameters length" + " too small: %u\n", se_cmd->data_length); + transport_kunmap_data_sg(se_cmd); + return TCM_INVALID_CDB_FIELD; + } + /* + * Set SNLID=1 (Supports no List ID) + */ + p[4] = 0x1; + /* + * MAXIMUM TARGET DESCRIPTOR COUNT + */ + put_unaligned_be16(RCR_OP_MAX_TARGET_DESC_COUNT, &p[8]); + /* + * MAXIMUM SEGMENT DESCRIPTOR COUNT + */ + put_unaligned_be16(RCR_OP_MAX_SG_DESC_COUNT, &p[10]); + /* + * MAXIMUM DESCRIPTOR LIST LENGTH + */ + put_unaligned_be32(RCR_OP_MAX_DESC_LIST_LEN, &p[12]); + /* + * MAXIMUM SEGMENT LENGTH + */ + put_unaligned_be32(RCR_OP_MAX_SEGMENT_LEN, &p[16]); + /* + * MAXIMUM INLINE DATA LENGTH for SA 0x04 (NOT SUPPORTED) + */ + put_unaligned_be32(0x0, &p[20]); + /* + * HELD DATA LIMIT + */ + put_unaligned_be32(0x0, &p[24]); + /* + * MAXIMUM STREAM DEVICE TRANSFER SIZE + */ + put_unaligned_be32(0x0, &p[28]); + /* + * TOTAL CONCURRENT COPIES + */ + put_unaligned_be16(RCR_OP_TOTAL_CONCURR_COPIES, &p[34]); + /* + * MAXIMUM CONCURRENT COPIES + */ + p[36] = RCR_OP_MAX_CONCURR_COPIES; + /* + * DATA SEGMENT GRANULARITY (log 2) + */ + p[37] = RCR_OP_DATA_SEG_GRAN_LOG2; + /* + * INLINE DATA GRANULARITY log 2) + */ + p[38] = RCR_OP_INLINE_DATA_GRAN_LOG2; + /* + * HELD DATA GRANULARITY + */ + p[39] = RCR_OP_HELD_DATA_GRAN_LOG2; + /* + * IMPLEMENTED DESCRIPTOR LIST LENGTH + */ + p[43] = 0x2; + /* + * List of implemented descriptor type codes (ordered) + */ + p[44] = 0x02; /* Copy Block to Block device */ + p[45] = 0xe4; /* Identification descriptor target descriptor */ + + /* + * AVAILABLE DATA (n-3) + */ + put_unaligned_be32(42, &p[0]); + + transport_kunmap_data_sg(se_cmd); + target_complete_cmd(se_cmd, GOOD); + + return TCM_NO_SENSE; +} + +sense_reason_t target_do_receive_copy_results(struct se_cmd *se_cmd) +{ + unsigned char *cdb = &se_cmd->t_task_cdb[0]; + int sa = (cdb[1] & 0x1f), list_id = cdb[2]; + sense_reason_t rc = TCM_NO_SENSE; + + pr_debug("Entering target_do_receive_copy_results: SA: 0x%02x, List ID:" + " 0x%02x, AL: %u\n", sa, list_id, se_cmd->data_length); + + if (list_id != 0) { + pr_err("Receive Copy Results with non zero list identifier" + " not supported\n"); + return TCM_INVALID_CDB_FIELD; + } + + switch (sa) { + case RCR_SA_OPERATING_PARAMETERS: + rc = target_rcr_operating_parameters(se_cmd); + break; + case RCR_SA_COPY_STATUS: + case RCR_SA_RECEIVE_DATA: + case RCR_SA_FAILED_SEGMENT_DETAILS: + default: + pr_err("Unsupported SA for receive copy results: 0x%02x\n", sa); + return TCM_INVALID_CDB_FIELD; + } + + return rc; +} diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h new file mode 100644 index 000000000000..700a981c7b41 --- /dev/null +++ b/drivers/target/target_core_xcopy.h @@ -0,0 +1,62 @@ +#define XCOPY_TARGET_DESC_LEN 32 +#define XCOPY_SEGMENT_DESC_LEN 28 +#define XCOPY_NAA_IEEE_REGEX_LEN 16 +#define XCOPY_MAX_SECTORS 1024 + +enum xcopy_origin_list { + XCOL_SOURCE_RECV_OP = 0x01, + XCOL_DEST_RECV_OP = 0x02, +}; + +struct xcopy_pt_cmd; + +struct xcopy_op { + int op_origin; + + struct se_cmd *xop_se_cmd; + struct se_device *src_dev; + unsigned char src_tid_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; + struct se_device *dst_dev; + unsigned char dst_tid_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; + unsigned char local_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; + + sector_t src_lba; + sector_t dst_lba; + unsigned short stdi; + unsigned short dtdi; + unsigned short nolb; + unsigned int dbl; + + struct xcopy_pt_cmd *src_pt_cmd; + struct xcopy_pt_cmd *dst_pt_cmd; + + u32 xop_data_nents; + struct scatterlist *xop_data_sg; + struct work_struct xop_work; +}; + +/* + * Receive Copy Results Sevice Actions + */ +#define RCR_SA_COPY_STATUS 0x00 +#define RCR_SA_RECEIVE_DATA 0x01 +#define RCR_SA_OPERATING_PARAMETERS 0x03 +#define RCR_SA_FAILED_SEGMENT_DETAILS 0x04 + +/* + * Receive Copy Results defs for Operating Parameters + */ +#define RCR_OP_MAX_TARGET_DESC_COUNT 0x2 +#define RCR_OP_MAX_SG_DESC_COUNT 0x1 +#define RCR_OP_MAX_DESC_LIST_LEN 1024 +#define RCR_OP_MAX_SEGMENT_LEN 268435456 /* 256 MB */ +#define RCR_OP_TOTAL_CONCURR_COPIES 0x1 /* Must be <= 16384 */ +#define RCR_OP_MAX_CONCURR_COPIES 0x1 /* Must be <= 255 */ +#define RCR_OP_DATA_SEG_GRAN_LOG2 9 /* 512 bytes in log 2 */ +#define RCR_OP_INLINE_DATA_GRAN_LOG2 9 /* 512 bytes in log 2 */ +#define RCR_OP_HELD_DATA_GRAN_LOG2 9 /* 512 bytes in log 2 */ + +extern int target_xcopy_setup_pt(void); +extern void target_xcopy_release_pt(void); +extern sense_reason_t target_do_xcopy(struct se_cmd *); +extern sense_reason_t target_do_receive_copy_results(struct se_cmd *); diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index b74feb0d5133..4e0050840a72 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -311,7 +311,11 @@ static struct se_portal_group *ft_add_tpg( */ if (strstr(name, "tpgt_") != name) return NULL; - if (strict_strtoul(name + 5, 10, &index) || index > UINT_MAX) + + ret = kstrtoul(name + 5, 10, &index); + if (ret) + return NULL; + if (index > UINT_MAX) return NULL; lacl = container_of(wwn, struct ft_lport_acl, fc_lport_wwn); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 0c27c7df1b09..4b79a1f2f901 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1,12 +1,12 @@ /******************************************************************************* * Vhost kernel TCM fabric driver for virtio SCSI initiators * - * (C) Copyright 2010-2012 RisingTide Systems LLC. + * (C) Copyright 2010-2013 Datera, Inc. * (C) Copyright 2010-2012 IBM Corp. * * Licensed to the Linux Foundation under the General Public License (GPL) version 2. * - * Authors: Nicholas A. Bellinger <nab@risingtidesystems.com> + * Authors: Nicholas A. Bellinger <nab@daterainc.com> * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> * * This program is free software; you can redistribute it and/or modify @@ -48,12 +48,16 @@ #include <linux/virtio_scsi.h> #include <linux/llist.h> #include <linux/bitmap.h> +#include <linux/percpu_ida.h> #include "vhost.h" #define TCM_VHOST_VERSION "v0.1" #define TCM_VHOST_NAMELEN 256 #define TCM_VHOST_MAX_CDB_SIZE 32 +#define TCM_VHOST_DEFAULT_TAGS 256 +#define TCM_VHOST_PREALLOC_SGLS 2048 +#define TCM_VHOST_PREALLOC_PAGES 2048 struct vhost_scsi_inflight { /* Wait for the flush operation to finish */ @@ -79,6 +83,7 @@ struct tcm_vhost_cmd { u32 tvc_lun; /* Pointer to the SGL formatted memory from virtio-scsi */ struct scatterlist *tvc_sgl; + struct page **tvc_upages; /* Pointer to response */ struct virtio_scsi_cmd_resp __user *tvc_resp; /* Pointer to vhost_scsi for our device */ @@ -450,17 +455,16 @@ static void tcm_vhost_release_cmd(struct se_cmd *se_cmd) { struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd, struct tcm_vhost_cmd, tvc_se_cmd); + struct se_session *se_sess = se_cmd->se_sess; if (tv_cmd->tvc_sgl_count) { u32 i; for (i = 0; i < tv_cmd->tvc_sgl_count; i++) put_page(sg_page(&tv_cmd->tvc_sgl[i])); - - kfree(tv_cmd->tvc_sgl); } tcm_vhost_put_inflight(tv_cmd->inflight); - kfree(tv_cmd); + percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag); } static int tcm_vhost_shutdown_session(struct se_session *se_sess) @@ -704,7 +708,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) } static struct tcm_vhost_cmd * -vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq, +vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct tcm_vhost_tpg *tpg, struct virtio_scsi_cmd_req *v_req, u32 exp_data_len, @@ -712,18 +716,27 @@ vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq, { struct tcm_vhost_cmd *cmd; struct tcm_vhost_nexus *tv_nexus; + struct se_session *se_sess; + struct scatterlist *sg; + struct page **pages; + int tag; tv_nexus = tpg->tpg_nexus; if (!tv_nexus) { pr_err("Unable to locate active struct tcm_vhost_nexus\n"); return ERR_PTR(-EIO); } + se_sess = tv_nexus->tvn_se_sess; - cmd = kzalloc(sizeof(struct tcm_vhost_cmd), GFP_ATOMIC); - if (!cmd) { - pr_err("Unable to allocate struct tcm_vhost_cmd\n"); - return ERR_PTR(-ENOMEM); - } + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_KERNEL); + cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[tag]; + sg = cmd->tvc_sgl; + pages = cmd->tvc_upages; + memset(cmd, 0, sizeof(struct tcm_vhost_cmd)); + + cmd->tvc_sgl = sg; + cmd->tvc_upages = pages; + cmd->tvc_se_cmd.map_tag = tag; cmd->tvc_tag = v_req->tag; cmd->tvc_task_attr = v_req->task_attr; cmd->tvc_exp_data_len = exp_data_len; @@ -740,7 +753,8 @@ vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq, * Returns the number of scatterlist entries used or -errno on error. */ static int -vhost_scsi_map_to_sgl(struct scatterlist *sgl, +vhost_scsi_map_to_sgl(struct tcm_vhost_cmd *tv_cmd, + struct scatterlist *sgl, unsigned int sgl_count, struct iovec *iov, int write) @@ -752,13 +766,25 @@ vhost_scsi_map_to_sgl(struct scatterlist *sgl, struct page **pages; int ret, i; + if (sgl_count > TCM_VHOST_PREALLOC_SGLS) { + pr_err("vhost_scsi_map_to_sgl() psgl_count: %u greater than" + " preallocated TCM_VHOST_PREALLOC_SGLS: %u\n", + sgl_count, TCM_VHOST_PREALLOC_SGLS); + return -ENOBUFS; + } + pages_nr = iov_num_pages(iov); if (pages_nr > sgl_count) return -ENOBUFS; - pages = kmalloc(pages_nr * sizeof(struct page *), GFP_KERNEL); - if (!pages) - return -ENOMEM; + if (pages_nr > TCM_VHOST_PREALLOC_PAGES) { + pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than" + " preallocated TCM_VHOST_PREALLOC_PAGES: %u\n", + pages_nr, TCM_VHOST_PREALLOC_PAGES); + return -ENOBUFS; + } + + pages = tv_cmd->tvc_upages; ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages); /* No pages were pinned */ @@ -783,7 +809,6 @@ vhost_scsi_map_to_sgl(struct scatterlist *sgl, } out: - kfree(pages); return ret; } @@ -807,24 +832,20 @@ vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd, /* TODO overflow checking */ - sg = kmalloc(sizeof(cmd->tvc_sgl[0]) * sgl_count, GFP_ATOMIC); - if (!sg) - return -ENOMEM; - pr_debug("%s sg %p sgl_count %u is_err %d\n", __func__, - sg, sgl_count, !sg); + sg = cmd->tvc_sgl; + pr_debug("%s sg %p sgl_count %u\n", __func__, sg, sgl_count); sg_init_table(sg, sgl_count); - cmd->tvc_sgl = sg; cmd->tvc_sgl_count = sgl_count; pr_debug("Mapping %u iovecs for %u pages\n", niov, sgl_count); for (i = 0; i < niov; i++) { - ret = vhost_scsi_map_to_sgl(sg, sgl_count, &iov[i], write); + ret = vhost_scsi_map_to_sgl(cmd, sg, sgl_count, &iov[i], + write); if (ret < 0) { for (i = 0; i < cmd->tvc_sgl_count; i++) put_page(sg_page(&cmd->tvc_sgl[i])); - kfree(cmd->tvc_sgl); - cmd->tvc_sgl = NULL; + cmd->tvc_sgl_count = 0; return ret; } @@ -989,10 +1010,10 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) for (i = 0; i < data_num; i++) exp_data_len += vq->iov[data_first + i].iov_len; - cmd = vhost_scsi_allocate_cmd(vq, tpg, &v_req, - exp_data_len, data_direction); + cmd = vhost_scsi_get_tag(vq, tpg, &v_req, + exp_data_len, data_direction); if (IS_ERR(cmd)) { - vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n", + vq_err(vq, "vhost_scsi_get_tag failed %ld\n", PTR_ERR(cmd)); goto err_cmd; } @@ -1654,11 +1675,31 @@ static void tcm_vhost_drop_nodeacl(struct se_node_acl *se_acl) kfree(nacl); } +static void tcm_vhost_free_cmd_map_res(struct tcm_vhost_nexus *nexus, + struct se_session *se_sess) +{ + struct tcm_vhost_cmd *tv_cmd; + unsigned int i; + + if (!se_sess->sess_cmd_map) + return; + + for (i = 0; i < TCM_VHOST_DEFAULT_TAGS; i++) { + tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i]; + + kfree(tv_cmd->tvc_sgl); + kfree(tv_cmd->tvc_upages); + } +} + static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, const char *name) { struct se_portal_group *se_tpg; + struct se_session *se_sess; struct tcm_vhost_nexus *tv_nexus; + struct tcm_vhost_cmd *tv_cmd; + unsigned int i; mutex_lock(&tpg->tv_tpg_mutex); if (tpg->tpg_nexus) { @@ -1675,14 +1716,37 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, return -ENOMEM; } /* - * Initialize the struct se_session pointer + * Initialize the struct se_session pointer and setup tagpool + * for struct tcm_vhost_cmd descriptors */ - tv_nexus->tvn_se_sess = transport_init_session(); + tv_nexus->tvn_se_sess = transport_init_session_tags( + TCM_VHOST_DEFAULT_TAGS, + sizeof(struct tcm_vhost_cmd)); if (IS_ERR(tv_nexus->tvn_se_sess)) { mutex_unlock(&tpg->tv_tpg_mutex); kfree(tv_nexus); return -ENOMEM; } + se_sess = tv_nexus->tvn_se_sess; + for (i = 0; i < TCM_VHOST_DEFAULT_TAGS; i++) { + tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i]; + + tv_cmd->tvc_sgl = kzalloc(sizeof(struct scatterlist) * + TCM_VHOST_PREALLOC_SGLS, GFP_KERNEL); + if (!tv_cmd->tvc_sgl) { + mutex_unlock(&tpg->tv_tpg_mutex); + pr_err("Unable to allocate tv_cmd->tvc_sgl\n"); + goto out; + } + + tv_cmd->tvc_upages = kzalloc(sizeof(struct page *) * + TCM_VHOST_PREALLOC_PAGES, GFP_KERNEL); + if (!tv_cmd->tvc_upages) { + mutex_unlock(&tpg->tv_tpg_mutex); + pr_err("Unable to allocate tv_cmd->tvc_upages\n"); + goto out; + } + } /* * Since we are running in 'demo mode' this call with generate a * struct se_node_acl for the tcm_vhost struct se_portal_group with @@ -1694,9 +1758,7 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, mutex_unlock(&tpg->tv_tpg_mutex); pr_debug("core_tpg_check_initiator_node_acl() failed" " for %s\n", name); - transport_free_session(tv_nexus->tvn_se_sess); - kfree(tv_nexus); - return -ENOMEM; + goto out; } /* * Now register the TCM vhost virtual I_T Nexus as active with the @@ -1708,6 +1770,12 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, mutex_unlock(&tpg->tv_tpg_mutex); return 0; + +out: + tcm_vhost_free_cmd_map_res(tv_nexus, se_sess); + transport_free_session(se_sess); + kfree(tv_nexus); + return -ENOMEM; } static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg) @@ -1747,6 +1815,8 @@ static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg) pr_debug("TCM_vhost_ConfigFS: Removing I_T Nexus to emulated" " %s Initiator Port: %s\n", tcm_vhost_dump_proto_id(tpg->tport), tv_nexus->tvn_se_sess->se_node_acl->initiatorname); + + tcm_vhost_free_cmd_map_res(tv_nexus, se_sess); /* * Release the SCSI I_T Nexus to the emulated vhost Target Port */ diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h new file mode 100644 index 000000000000..0b23edbee309 --- /dev/null +++ b/include/linux/percpu_ida.h @@ -0,0 +1,60 @@ +#ifndef __PERCPU_IDA_H__ +#define __PERCPU_IDA_H__ + +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/init.h> +#include <linux/spinlock_types.h> +#include <linux/wait.h> +#include <linux/cpumask.h> + +struct percpu_ida_cpu; + +struct percpu_ida { + /* + * number of tags available to be allocated, as passed to + * percpu_ida_init() + */ + unsigned nr_tags; + + struct percpu_ida_cpu __percpu *tag_cpu; + + /* + * Bitmap of cpus that (may) have tags on their percpu freelists: + * steal_tags() uses this to decide when to steal tags, and which cpus + * to try stealing from. + * + * It's ok for a freelist to be empty when its bit is set - steal_tags() + * will just keep looking - but the bitmap _must_ be set whenever a + * percpu freelist does have tags. + */ + cpumask_t cpus_have_tags; + + struct { + spinlock_t lock; + /* + * When we go to steal tags from another cpu (see steal_tags()), + * we want to pick a cpu at random. Cycling through them every + * time we steal is a bit easier and more or less equivalent: + */ + unsigned cpu_last_stolen; + + /* For sleeping on allocation failure */ + wait_queue_head_t wait; + + /* + * Global freelist - it's a stack where nr_free points to the + * top + */ + unsigned nr_free; + unsigned *freelist; + } ____cacheline_aligned_in_smp; +}; + +int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp); +void percpu_ida_free(struct percpu_ida *pool, unsigned tag); + +void percpu_ida_destroy(struct percpu_ida *pool); +int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags); + +#endif /* __PERCPU_IDA_H__ */ diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index d477bfb73fb9..66d42edfb3fc 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -144,6 +144,7 @@ enum scsi_timeouts { #define ACCESS_CONTROL_IN 0x86 #define ACCESS_CONTROL_OUT 0x87 #define READ_16 0x88 +#define COMPARE_AND_WRITE 0x89 #define WRITE_16 0x8a #define READ_ATTRIBUTE 0x8c #define WRITE_ATTRIBUTE 0x8d diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index e5d09d242ba3..a12589c4ee92 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -6,13 +6,13 @@ struct iscsit_transport { #define ISCSIT_TRANSPORT_NAME 16 char name[ISCSIT_TRANSPORT_NAME]; int transport_type; + int priv_size; struct module *owner; struct list_head t_node; int (*iscsit_setup_np)(struct iscsi_np *, struct __kernel_sockaddr_storage *); int (*iscsit_accept_np)(struct iscsi_np *, struct iscsi_conn *); void (*iscsit_free_np)(struct iscsi_np *); void (*iscsit_free_conn)(struct iscsi_conn *); - struct iscsi_cmd *(*iscsit_alloc_cmd)(struct iscsi_conn *, gfp_t); int (*iscsit_get_login_rx)(struct iscsi_conn *, struct iscsi_login *); int (*iscsit_put_login_tx)(struct iscsi_conn *, struct iscsi_login *, u32); int (*iscsit_immediate_queue)(struct iscsi_conn *, struct iscsi_cmd *, int); @@ -22,6 +22,11 @@ struct iscsit_transport { int (*iscsit_queue_status)(struct iscsi_conn *, struct iscsi_cmd *); }; +static inline void *iscsit_priv_cmd(struct iscsi_cmd *cmd) +{ + return (void *)(cmd + 1); +} + /* * From iscsi_target_transport.c */ @@ -92,3 +97,4 @@ extern int iscsit_tmr_post_handler(struct iscsi_cmd *, struct iscsi_conn *); extern struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *, gfp_t); extern int iscsit_sequence_cmd(struct iscsi_conn *, struct iscsi_cmd *, unsigned char *, __be32); +extern void iscsit_release_cmd(struct iscsi_cmd *); diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index ffa2696d64dc..5ebe21cd5d1c 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -39,7 +39,8 @@ struct se_subsystem_api { }; struct sbc_ops { - sense_reason_t (*execute_rw)(struct se_cmd *cmd); + sense_reason_t (*execute_rw)(struct se_cmd *cmd, struct scatterlist *, + u32, enum dma_data_direction); sense_reason_t (*execute_sync_cache)(struct se_cmd *cmd); sense_reason_t (*execute_write_same)(struct se_cmd *cmd); sense_reason_t (*execute_write_same_unmap)(struct se_cmd *cmd); @@ -73,6 +74,10 @@ int transport_set_vpd_ident(struct t10_vpd *, unsigned char *); /* core helpers also used by command snooping in pscsi */ void *transport_kmap_data_sg(struct se_cmd *); void transport_kunmap_data_sg(struct se_cmd *); +/* core helpers also used by xcopy during internal command setup */ +int target_alloc_sgl(struct scatterlist **, unsigned int *, u32, bool); +sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *, + struct scatterlist *, u32, struct scatterlist *, u32); void array_free(void *array, int n); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index e34fc904f2e1..5bdb8b7d2a69 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -5,11 +5,12 @@ #include <linux/configfs.h> #include <linux/dma-mapping.h> #include <linux/blkdev.h> +#include <linux/percpu_ida.h> #include <scsi/scsi_cmnd.h> #include <net/sock.h> #include <net/tcp.h> -#define TARGET_CORE_MOD_VERSION "v4.1.0-rc2-ml" +#define TARGET_CORE_MOD_VERSION "v4.1.0" #define TARGET_CORE_VERSION TARGET_CORE_MOD_VERSION /* Maximum Number of LUNs per Target Portal Group */ @@ -96,6 +97,10 @@ * block/blk-lib.c:blkdev_issue_discard() */ #define DA_EMULATE_TPWS 0 +/* Emulation for CompareAndWrite (AtomicTestandSet) by default */ +#define DA_EMULATE_CAW 1 +/* Emulation for 3rd Party Copy (ExtendedCopy) by default */ +#define DA_EMULATE_3PC 1 /* No Emulation for PSCSI by default */ #define DA_EMULATE_ALUA 0 /* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */ @@ -158,6 +163,9 @@ enum se_cmd_flags_table { SCF_ALUA_NON_OPTIMIZED = 0x00008000, SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00020000, SCF_ACK_KREF = 0x00040000, + SCF_COMPARE_AND_WRITE = 0x00080000, + SCF_COMPARE_AND_WRITE_POST = 0x00100000, + SCF_CMD_XCOPY_PASSTHROUGH = 0x00200000, }; /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */ @@ -196,6 +204,7 @@ enum tcm_sense_reason_table { TCM_ADDRESS_OUT_OF_RANGE = R(0x11), TCM_OUT_OF_RESOURCES = R(0x12), TCM_PARAMETER_LIST_LENGTH_ERROR = R(0x13), + TCM_MISCOMPARE_VERIFY = R(0x14), #undef R }; @@ -415,6 +424,8 @@ struct se_cmd { enum dma_data_direction data_direction; /* For SAM Task Attribute */ int sam_task_attr; + /* Used for se_sess->sess_tag_pool */ + unsigned int map_tag; /* Transport protocol dependent state, see transport_state_table */ enum transport_state_table t_state; unsigned cmd_wait_set:1; @@ -444,11 +455,14 @@ struct se_cmd { struct kref cmd_kref; struct target_core_fabric_ops *se_tfo; sense_reason_t (*execute_cmd)(struct se_cmd *); - void (*transport_complete_callback)(struct se_cmd *); + sense_reason_t (*execute_rw)(struct se_cmd *, struct scatterlist *, + u32, enum dma_data_direction); + sense_reason_t (*transport_complete_callback)(struct se_cmd *); unsigned char *t_task_cdb; unsigned char __t_task_cdb[TCM_MAX_COMMAND_SIZE]; unsigned long long t_task_lba; + unsigned int t_task_nolb; unsigned int transport_state; #define CMD_T_ABORTED (1 << 0) #define CMD_T_ACTIVE (1 << 1) @@ -469,7 +483,9 @@ struct se_cmd { struct work_struct work; struct scatterlist *t_data_sg; + struct scatterlist *t_data_sg_orig; unsigned int t_data_nents; + unsigned int t_data_nents_orig; void *t_data_vmap; struct scatterlist *t_bidi_data_sg; unsigned int t_bidi_data_nents; @@ -536,6 +552,8 @@ struct se_session { struct list_head sess_wait_list; spinlock_t sess_cmd_lock; struct kref sess_kref; + void *sess_cmd_map; + struct percpu_ida sess_tag_pool; }; struct se_device; @@ -589,6 +607,8 @@ struct se_dev_attrib { int emulate_tas; int emulate_tpu; int emulate_tpws; + int emulate_caw; + int emulate_3pc; int enforce_pr_isids; int is_nonrot; int emulate_rest_reord; @@ -656,6 +676,7 @@ struct se_device { spinlock_t se_port_lock; spinlock_t se_tmr_lock; spinlock_t qf_cmd_lock; + struct semaphore caw_sem; /* Used for legacy SPC-2 reservationsa */ struct se_node_acl *dev_reserved_node_acl; /* Used for ALUA Logical Unit Group membership */ @@ -669,6 +690,7 @@ struct se_device { struct list_head delayed_cmd_list; struct list_head state_list; struct list_head qf_cmd_list; + struct list_head g_dev_node; /* Pointer to associated SE HBA */ struct se_hba *se_hba; /* T10 Inquiry and VPD WWN Information */ diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 7a16178424f9..882b650e32be 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -84,6 +84,9 @@ struct target_core_fabric_ops { }; struct se_session *transport_init_session(void); +int transport_alloc_session_tags(struct se_session *, unsigned int, + unsigned int); +struct se_session *transport_init_session_tags(unsigned int, unsigned int); void __transport_register_session(struct se_portal_group *, struct se_node_acl *, struct se_session *, void *); void transport_register_session(struct se_portal_group *, @@ -131,6 +134,7 @@ int core_tmr_alloc_req(struct se_cmd *, void *, u8, gfp_t); void core_tmr_release_req(struct se_tmr_req *); int transport_generic_handle_tmr(struct se_cmd *); void transport_generic_request_failure(struct se_cmd *, sense_reason_t); +void __target_execute_cmd(struct se_cmd *); int transport_lookup_tmr_lun(struct se_cmd *, u32); struct se_node_acl *core_tpg_check_initiator_node_acl(struct se_portal_group *, @@ -175,4 +179,30 @@ u32 iscsi_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl * char *iscsi_parse_pr_out_transport_id(struct se_portal_group *, const char *, u32 *, char **); +/* + * The LIO target core uses DMA_TO_DEVICE to mean that data is going + * to the target (eg handling a WRITE) and DMA_FROM_DEVICE to mean + * that data is coming from the target (eg handling a READ). However, + * this is just the opposite of what we have to tell the DMA mapping + * layer -- eg when handling a READ, the HBA will have to DMA the data + * out of memory so it can send it to the initiator, which means we + * need to use DMA_TO_DEVICE when we map the data. + */ +static inline enum dma_data_direction +target_reverse_dma_direction(struct se_cmd *se_cmd) +{ + if (se_cmd->se_cmd_flags & SCF_BIDI) + return DMA_BIDIRECTIONAL; + + switch (se_cmd->data_direction) { + case DMA_TO_DEVICE: + return DMA_FROM_DEVICE; + case DMA_FROM_DEVICE: + return DMA_TO_DEVICE; + case DMA_NONE: + default: + return DMA_NONE; + } +} + #endif /* TARGET_CORE_FABRICH */ diff --git a/lib/Makefile b/lib/Makefile index f2cb3082697c..f3bb2cb98adf 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o percpu-refcount.o + earlycpio.o percpu-refcount.o percpu_ida.o obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o lib-$(CONFIG_MMU) += ioremap.o @@ -25,7 +25,8 @@ obj-y += lockref.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ - bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o + bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ + percpu_ida.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c new file mode 100644 index 000000000000..bab1ba2a4c71 --- /dev/null +++ b/lib/percpu_ida.c @@ -0,0 +1,335 @@ +/* + * Percpu IDA library + * + * Copyright (C) 2013 Datera, Inc. Kent Overstreet + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/bug.h> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/hardirq.h> +#include <linux/idr.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/percpu.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/spinlock.h> +#include <linux/percpu_ida.h> + +/* + * Number of tags we move between the percpu freelist and the global freelist at + * a time + */ +#define IDA_PCPU_BATCH_MOVE 32U + +/* Max size of percpu freelist, */ +#define IDA_PCPU_SIZE ((IDA_PCPU_BATCH_MOVE * 3) / 2) + +struct percpu_ida_cpu { + /* + * Even though this is percpu, we need a lock for tag stealing by remote + * CPUs: + */ + spinlock_t lock; + + /* nr_free/freelist form a stack of free IDs */ + unsigned nr_free; + unsigned freelist[]; +}; + +static inline void move_tags(unsigned *dst, unsigned *dst_nr, + unsigned *src, unsigned *src_nr, + unsigned nr) +{ + *src_nr -= nr; + memcpy(dst + *dst_nr, src + *src_nr, sizeof(unsigned) * nr); + *dst_nr += nr; +} + +/* + * Try to steal tags from a remote cpu's percpu freelist. + * + * We first check how many percpu freelists have tags - we don't steal tags + * unless enough percpu freelists have tags on them that it's possible more than + * half the total tags could be stuck on remote percpu freelists. + * + * Then we iterate through the cpus until we find some tags - we don't attempt + * to find the "best" cpu to steal from, to keep cacheline bouncing to a + * minimum. + */ +static inline void steal_tags(struct percpu_ida *pool, + struct percpu_ida_cpu *tags) +{ + unsigned cpus_have_tags, cpu = pool->cpu_last_stolen; + struct percpu_ida_cpu *remote; + + for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); + cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2; + cpus_have_tags--) { + cpu = cpumask_next(cpu, &pool->cpus_have_tags); + + if (cpu >= nr_cpu_ids) { + cpu = cpumask_first(&pool->cpus_have_tags); + if (cpu >= nr_cpu_ids) + BUG(); + } + + pool->cpu_last_stolen = cpu; + remote = per_cpu_ptr(pool->tag_cpu, cpu); + + cpumask_clear_cpu(cpu, &pool->cpus_have_tags); + + if (remote == tags) + continue; + + spin_lock(&remote->lock); + + if (remote->nr_free) { + memcpy(tags->freelist, + remote->freelist, + sizeof(unsigned) * remote->nr_free); + + tags->nr_free = remote->nr_free; + remote->nr_free = 0; + } + + spin_unlock(&remote->lock); + + if (tags->nr_free) + break; + } +} + +/* + * Pop up to IDA_PCPU_BATCH_MOVE IDs off the global freelist, and push them onto + * our percpu freelist: + */ +static inline void alloc_global_tags(struct percpu_ida *pool, + struct percpu_ida_cpu *tags) +{ + move_tags(tags->freelist, &tags->nr_free, + pool->freelist, &pool->nr_free, + min(pool->nr_free, IDA_PCPU_BATCH_MOVE)); +} + +static inline unsigned alloc_local_tag(struct percpu_ida *pool, + struct percpu_ida_cpu *tags) +{ + int tag = -ENOSPC; + + spin_lock(&tags->lock); + if (tags->nr_free) + tag = tags->freelist[--tags->nr_free]; + spin_unlock(&tags->lock); + + return tag; +} + +/** + * percpu_ida_alloc - allocate a tag + * @pool: pool to allocate from + * @gfp: gfp flags + * + * Returns a tag - an integer in the range [0..nr_tags) (passed to + * tag_pool_init()), or otherwise -ENOSPC on allocation failure. + * + * Safe to be called from interrupt context (assuming it isn't passed + * __GFP_WAIT, of course). + * + * @gfp indicates whether or not to wait until a free id is available (it's not + * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep + * however long it takes until another thread frees an id (same semantics as a + * mempool). + * + * Will not fail if passed __GFP_WAIT. + */ +int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) +{ + DEFINE_WAIT(wait); + struct percpu_ida_cpu *tags; + unsigned long flags; + int tag; + + local_irq_save(flags); + tags = this_cpu_ptr(pool->tag_cpu); + + /* Fastpath */ + tag = alloc_local_tag(pool, tags); + if (likely(tag >= 0)) { + local_irq_restore(flags); + return tag; + } + + while (1) { + spin_lock(&pool->lock); + + /* + * prepare_to_wait() must come before steal_tags(), in case + * percpu_ida_free() on another cpu flips a bit in + * cpus_have_tags + * + * global lock held and irqs disabled, don't need percpu lock + */ + prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); + + if (!tags->nr_free) + alloc_global_tags(pool, tags); + if (!tags->nr_free) + steal_tags(pool, tags); + + if (tags->nr_free) { + tag = tags->freelist[--tags->nr_free]; + if (tags->nr_free) + cpumask_set_cpu(smp_processor_id(), + &pool->cpus_have_tags); + } + + spin_unlock(&pool->lock); + local_irq_restore(flags); + + if (tag >= 0 || !(gfp & __GFP_WAIT)) + break; + + schedule(); + + local_irq_save(flags); + tags = this_cpu_ptr(pool->tag_cpu); + } + + finish_wait(&pool->wait, &wait); + return tag; +} +EXPORT_SYMBOL_GPL(percpu_ida_alloc); + +/** + * percpu_ida_free - free a tag + * @pool: pool @tag was allocated from + * @tag: a tag previously allocated with percpu_ida_alloc() + * + * Safe to be called from interrupt context. + */ +void percpu_ida_free(struct percpu_ida *pool, unsigned tag) +{ + struct percpu_ida_cpu *tags; + unsigned long flags; + unsigned nr_free; + + BUG_ON(tag >= pool->nr_tags); + + local_irq_save(flags); + tags = this_cpu_ptr(pool->tag_cpu); + + spin_lock(&tags->lock); + tags->freelist[tags->nr_free++] = tag; + + nr_free = tags->nr_free; + spin_unlock(&tags->lock); + + if (nr_free == 1) { + cpumask_set_cpu(smp_processor_id(), + &pool->cpus_have_tags); + wake_up(&pool->wait); + } + + if (nr_free == IDA_PCPU_SIZE) { + spin_lock(&pool->lock); + + /* + * Global lock held and irqs disabled, don't need percpu + * lock + */ + if (tags->nr_free == IDA_PCPU_SIZE) { + move_tags(pool->freelist, &pool->nr_free, + tags->freelist, &tags->nr_free, + IDA_PCPU_BATCH_MOVE); + + wake_up(&pool->wait); + } + spin_unlock(&pool->lock); + } + + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(percpu_ida_free); + +/** + * percpu_ida_destroy - release a tag pool's resources + * @pool: pool to free + * + * Frees the resources allocated by percpu_ida_init(). + */ +void percpu_ida_destroy(struct percpu_ida *pool) +{ + free_percpu(pool->tag_cpu); + free_pages((unsigned long) pool->freelist, + get_order(pool->nr_tags * sizeof(unsigned))); +} +EXPORT_SYMBOL_GPL(percpu_ida_destroy); + +/** + * percpu_ida_init - initialize a percpu tag pool + * @pool: pool to initialize + * @nr_tags: number of tags that will be available for allocation + * + * Initializes @pool so that it can be used to allocate tags - integers in the + * range [0, nr_tags). Typically, they'll be used by driver code to refer to a + * preallocated array of tag structures. + * + * Allocation is percpu, but sharding is limited by nr_tags - for best + * performance, the workload should not span more cpus than nr_tags / 128. + */ +int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) +{ + unsigned i, cpu, order; + + memset(pool, 0, sizeof(*pool)); + + init_waitqueue_head(&pool->wait); + spin_lock_init(&pool->lock); + pool->nr_tags = nr_tags; + + /* Guard against overflow */ + if (nr_tags > (unsigned) INT_MAX + 1) { + pr_err("percpu_ida_init(): nr_tags too large\n"); + return -EINVAL; + } + + order = get_order(nr_tags * sizeof(unsigned)); + pool->freelist = (void *) __get_free_pages(GFP_KERNEL, order); + if (!pool->freelist) + return -ENOMEM; + + for (i = 0; i < nr_tags; i++) + pool->freelist[i] = i; + + pool->nr_free = nr_tags; + + pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) + + IDA_PCPU_SIZE * sizeof(unsigned), + sizeof(unsigned)); + if (!pool->tag_cpu) + goto err; + + for_each_possible_cpu(cpu) + spin_lock_init(&per_cpu_ptr(pool->tag_cpu, cpu)->lock); + + return 0; +err: + percpu_ida_destroy(pool); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(percpu_ida_init); |