From 8b4922d3173d2eee7b43be8e5caec3ab7d30feff Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 24 Feb 2014 16:39:52 +0100 Subject: block: Stop abusing csd.list for fifo_time Block layer currently abuses rq->csd.list.next for storing fifo_time. That is a terrible hack and completely unnecessary as well. Union achieves the same space saving in a cleaner way. Signed-off-by: Jan Kara Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + include/linux/elevator.h | 6 ------ 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4afa4f8f6090..1e1fa3f93d5f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -99,6 +99,7 @@ struct request { union { struct call_single_data csd; struct work_struct mq_flush_work; + unsigned long fifo_time; }; struct request_queue *q; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 306dd8cd0b6f..0bdfd46f4735 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -202,12 +202,6 @@ enum { #define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) -/* - * Hack to reuse the csd.list list_head as the fifo time holder while - * the request is in the io scheduler. Saves an unsigned long in rq. - */ -#define rq_fifo_time(rq) ((unsigned long) (rq)->csd.list.next) -#define rq_set_fifo_time(rq,exp) ((rq)->csd.list.next = (void *) (exp)) #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) #define rq_fifo_clear(rq) do { \ list_del_init(&(rq)->queuelist); \ -- cgit v1.2.1 From d9a74df512e44580a34bf6e70f5d08c126507354 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 24 Feb 2014 16:39:53 +0100 Subject: block: Remove useless IPI struct initialization rq_fifo_clear() reset the csd.list through INIT_LIST_HEAD for no clear purpose. The csd.list doesn't need to be initialized as a list head because it's only ever used as a list node. Lets remove this useless initialization. Reviewed-by: Jan Kara Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- include/linux/elevator.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 0bdfd46f4735..df63bd3a8cf1 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -203,10 +203,7 @@ enum { #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) -#define rq_fifo_clear(rq) do { \ - list_del_init(&(rq)->queuelist); \ - INIT_LIST_HEAD(&(rq)->csd.list); \ - } while (0) +#define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist) #else /* CONFIG_BLOCK */ -- cgit v1.2.1 From 0ebeb79ce920bed3a4a55113534951d95a444fbb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 24 Feb 2014 16:39:56 +0100 Subject: smp: Remove unused list_head from csd Now that we got rid of all the remaining code which fiddled with csd.list, lets remove it. Signed-off-by: Jan Kara Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- include/linux/smp.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index 6ae004e437ea..4991c6b12831 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -17,10 +17,7 @@ extern void cpu_idle(void); typedef void (*smp_call_func_t)(void *info); struct call_single_data { - union { - struct list_head list; - struct llist_node llist; - }; + struct llist_node llist; smp_call_func_t func; void *info; u16 flags; -- cgit v1.2.1 From 08eed44c7249d381a099bc55577e55c6bb533160 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 24 Feb 2014 16:39:57 +0100 Subject: smp: Teach __smp_call_function_single() to check for offline cpus Align __smp_call_function_single() with smp_call_function_single() so that it also checks whether requested cpu is still online. Signed-off-by: Jan Kara Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- include/linux/smp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index 4991c6b12831..c39074c794c5 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -50,8 +50,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), smp_call_func_t func, void *info, bool wait, gfp_t gfp_flags); -void __smp_call_function_single(int cpuid, struct call_single_data *data, - int wait); +int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait); #ifdef CONFIG_SMP -- cgit v1.2.1 From fce8ad1568c57e7f334018dec4fa1744c926c135 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 24 Feb 2014 16:40:01 +0100 Subject: smp: Remove wait argument from __smp_call_function_single() The main point of calling __smp_call_function_single() is to send an IPI in a pure asynchronous way. By embedding a csd in an object, a caller can send the IPI without waiting for a previous one to complete as is required by smp_call_function_single() for example. As such, sending this kind of IPI can be safe even when irqs are disabled. This flexibility comes at the expense of the caller who then needs to synchronize the csd lifecycle by himself and make sure that IPIs on a single csd are serialized. This is how __smp_call_function_single() works when wait = 0 and this usecase is relevant. Now there don't seem to be any usecase with wait = 1 that can't be covered by smp_call_function_single() instead, which is safer. Lets look at the two possible scenario: 1) The user calls __smp_call_function_single(wait = 1) on a csd embedded in an object. It looks like a nice and convenient pattern at the first sight because we can then retrieve the object from the IPI handler easily. But actually it is a waste of memory space in the object since the csd can be allocated from the stack by smp_call_function_single(wait = 1) and the object can be passed an the IPI argument. Besides that, embedding the csd in an object is more error prone because the caller must take care of the serialization of the IPIs for this csd. 2) The user calls __smp_call_function_single(wait = 1) on a csd that is allocated on the stack. It's ok but smp_call_function_single() can do it as well and it already takes care of the allocation on the stack. Again it's more simple and less error prone. Therefore, using the underscore prepend API version with wait = 1 is a bad pattern and a sign that the caller can do safer and more simple. There was a single user of that which has just been converted. So lets remove this option to discourage further users. Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- include/linux/smp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index c39074c794c5..b410a1f23281 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -50,7 +50,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), smp_call_func_t func, void *info, bool wait, gfp_t gfp_flags); -int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait); +int __smp_call_function_single(int cpu, struct call_single_data *csd); #ifdef CONFIG_SMP -- cgit v1.2.1 From c46fff2a3b29794b35d717b5680a27f31a6a6bc0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 24 Feb 2014 16:40:02 +0100 Subject: smp: Rename __smp_call_function_single() to smp_call_function_single_async() The name __smp_call_function_single() doesn't tell much about the properties of this function, especially when compared to smp_call_function_single(). The comments above the implementation are also misleading. The main point of this function is actually not to be able to embed the csd in an object. This is actually a requirement that result from the purpose of this function which is to raise an IPI asynchronously. As such it can be called with interrupts disabled. And this feature comes at the cost of the caller who then needs to serialize the IPIs on this csd. Lets rename the function and enhance the comments so that they reflect these properties. Suggested-by: Christoph Hellwig Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- include/linux/smp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index b410a1f23281..633f5edd7470 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -50,7 +50,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), smp_call_func_t func, void *info, bool wait, gfp_t gfp_flags); -int __smp_call_function_single(int cpu, struct call_single_data *csd); +int smp_call_function_single_async(int cpu, struct call_single_data *csd); #ifdef CONFIG_SMP -- cgit v1.2.1 From af5040da01ef980670b3741b3e10733ee3e33566 Mon Sep 17 00:00:00 2001 From: Roman Pen Date: Tue, 4 Mar 2014 23:13:10 +0900 Subject: blktrace: fix accounting of partially completed requests trace_block_rq_complete does not take into account that request can be partially completed, so we can get the following incorrect output of blkparser: C R 232 + 240 [0] C R 240 + 232 [0] C R 248 + 224 [0] C R 256 + 216 [0] but should be: C R 232 + 8 [0] C R 240 + 8 [0] C R 248 + 8 [0] C R 256 + 8 [0] Also, the whole output summary statistics of completed requests and final throughput will be incorrect. This patch takes into account real completion size of the request and fixes wrong completion accounting. Signed-off-by: Roman Pen CC: Steven Rostedt CC: Frederic Weisbecker CC: Ingo Molnar CC: linux-kernel@vger.kernel.org Cc: stable@kernel.org Signed-off-by: Jens Axboe --- include/trace/events/block.h | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index e76ae19a8d6f..e8a5eca1dbe5 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -132,6 +132,7 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue, * block_rq_complete - block IO operation completed by device driver * @q: queue containing the block operation request * @rq: block operations request + * @nr_bytes: number of completed bytes * * The block_rq_complete tracepoint event indicates that some portion * of operation request has been completed by the device driver. If @@ -139,11 +140,37 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue, * do for the request. If @rq->bio is non-NULL then there is * additional work required to complete the request. */ -DEFINE_EVENT(block_rq_with_error, block_rq_complete, +TRACE_EVENT(block_rq_complete, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request_queue *q, struct request *rq, + unsigned int nr_bytes), - TP_ARGS(q, rq) + TP_ARGS(q, rq, nr_bytes), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, RWBS_LEN ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_rq_pos(rq); + __entry->nr_sector = nr_bytes >> 9; + __entry->errors = rq->errors; + + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) ); DECLARE_EVENT_CLASS(block_rq, -- cgit v1.2.1 From 89f8b33ca1ea881d1d84542282cb85d07d02e78d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 13 Mar 2014 09:38:42 -0600 Subject: block: remove old blk_iopoll_enabled variable This was a debugging measure to toggle enabled/disabled when testing. But for real production setups, it's not safe to toggle this setting without either reloading drivers of quiescing IO first. Neither of which the toggle enforces. Additionally, it makes drivers deal with the conditional state. Remove it completely. It's up to the driver whether iopoll is enabled or not. Signed-off-by: Jens Axboe --- include/linux/blk-iopoll.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h index 308734d3d4a2..77ae77c0b704 100644 --- a/include/linux/blk-iopoll.h +++ b/include/linux/blk-iopoll.h @@ -43,6 +43,4 @@ extern void __blk_iopoll_complete(struct blk_iopoll *); extern void blk_iopoll_enable(struct blk_iopoll *); extern void blk_iopoll_disable(struct blk_iopoll *); -extern int blk_iopoll_enabled; - #endif -- cgit v1.2.1 From 95363efde193079541cb379eb47140e9c4d355d5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 14 Mar 2014 10:43:15 -0600 Subject: blk-mq: allow blk_mq_init_commands() to return failure If drivers do dynamic allocation in the hardware command init path, then we need to be able to handle and return failures. And if they do allocations or mappings in the init command path, then we need a cleanup function to free up that space at exit time. So add blk_mq_free_commands() as the cleanup function. This is required for the mtip32xx driver conversion to blk-mq. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 18ba8a627f46..33ff10ebcabb 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -117,7 +117,8 @@ enum { struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *); int blk_mq_register_disk(struct gendisk *); void blk_mq_unregister_disk(struct gendisk *); -void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); +int blk_mq_init_commands(struct request_queue *, int (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); +void blk_mq_free_commands(struct request_queue *, void (*free)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -- cgit v1.2.1 From 5d12f905cc50c0810628d0deedd478ec2db48659 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 19 Mar 2014 15:25:02 -0600 Subject: blk-mq: fix wrong usage of hctx->state vs hctx->flags BLK_MQ_F_* flags are for hctx->flags, and are non-atomic and set at registration time. BLK_MQ_S_* flags are dynamic and atomic, and are accessed through hctx->state. Some of the BLK_MQ_S_STOPPED uses were wrong. Additionally, the header file should not use a bit shift for the _S_ flags, as they are done through the set/test_bit functions. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 33ff10ebcabb..bb68b03741be 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -109,7 +109,7 @@ enum { BLK_MQ_F_SHOULD_SORT = 1 << 1, BLK_MQ_F_SHOULD_IPI = 1 << 2, - BLK_MQ_S_STOPPED = 1 << 0, + BLK_MQ_S_STOPPED = 0, BLK_MQ_MAX_DEPTH = 2048, }; -- cgit v1.2.1 From eeabc850b79336575da7be3dbe186a2da4de8293 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Mar 2014 08:57:37 -0600 Subject: blk-mq: merge blk_mq_insert_request and blk_mq_run_request It's almost identical to blk_mq_insert_request, so fold the two into one slightly more generic function by making the flush special case a bit smarted. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index bb68b03741be..349c730a579e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -122,8 +122,7 @@ void blk_mq_free_commands(struct request_queue *, void (*free)(void *data, struc void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -void blk_mq_insert_request(struct request_queue *, struct request *, - bool, bool); +void blk_mq_insert_request(struct request *, bool, bool, bool); void blk_mq_run_queues(struct request_queue *q, bool async); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); -- cgit v1.2.1