/
0001-bfq_iosched-block-prepare_IO_context_code-v1-2.6.36.patch
292 lines (254 loc) · 8.45 KB
/
0001-bfq_iosched-block-prepare_IO_context_code-v1-2.6.36.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
From 0ab626e3798f660cf494d352a1b0d7128e65b93f Mon Sep 17 00:00:00 2001
From: Francesco Allertsen <fallertsen@gmail.com>
Date: Tue, 12 Oct 2010 15:14:46 +0200
Subject: [PATCH 1/3] block: prepare I/O context code for BFQ
BFQ uses struct cfq_io_context to store its per-process per-device data,
reusing the same code for cic handling of CFQ. The code is not shared
ATM to minimize the impact of these patches.
This patch introduces a new hlist to each io_context to store all the
cic's allocated by BFQ to allow calling the right destructor on module
unload; the radix tree used for cic lookup needs to be duplicated
because it can contain dead keys inserted by a scheduler and later
retrieved by the other one.
Update the io_context exit and free paths to take care also of
the BFQ cic's.
Change the type of cfqq inside struct cfq_io_context to void *
to use it also for BFQ per-queue data.
A new bfq-specific ioprio_changed field is necessary, too, to avoid
clobbering cfq's one, so switch ioprio_changed to a bitmap, with one
element per scheduler.
Signed-off-by: Fabio Checconi <fabio@gandalf.sssup.it>
Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
Signed-off-by: Francesco Allertsen <fallertsen@gmail.com>
---
block/Kconfig.iosched | 26 ++++++++++++++++++++++++++
block/blk-ioc.c | 27 +++++++++++++++------------
block/cfq-iosched.c | 10 +++++++---
fs/ioprio.c | 9 +++++++--
include/linux/iocontext.h | 18 +++++++++++++++---
5 files changed, 70 insertions(+), 20 deletions(-)
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 3199b76..5905452 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -43,6 +43,28 @@ config CFQ_GROUP_IOSCHED
---help---
Enable group IO scheduling in CFQ.
+config IOSCHED_BFQ
+ tristate "BFQ I/O scheduler"
+ depends on EXPERIMENTAL
+ default n
+ ---help---
+ The BFQ I/O scheduler tries to distribute bandwidth among
+ all processes according to their weights.
+ It aims at distributing the bandwidth as desired, independently of
+ the disk parameters and with any workload. It also tries to
+ guarantee low latency to interactive and soft real-time
+ applications. If compiled built-in (saying Y here), BFQ can
+ be configured to support hierarchical scheduling.
+
+config CGROUP_BFQIO
+ bool "BFQ hierarchical scheduling support"
+ depends on CGROUPS && IOSCHED_BFQ=y
+ default n
+ ---help---
+ Enable hierarchical scheduling in BFQ, using the cgroups
+ filesystem interface. The name of the subsystem will be
+ bfqio.
+
choice
prompt "Default I/O scheduler"
default DEFAULT_CFQ
@@ -56,6 +78,9 @@ choice
config DEFAULT_CFQ
bool "CFQ" if IOSCHED_CFQ=y
+ config DEFAULT_BFQ
+ bool "BFQ" if IOSCHED_BFQ=y
+
config DEFAULT_NOOP
bool "No-op"
@@ -65,6 +90,7 @@ config DEFAULT_IOSCHED
string
default "deadline" if DEFAULT_DEADLINE
default "cfq" if DEFAULT_CFQ
+ default "bfq" if DEFAULT_BFQ
default "noop" if DEFAULT_NOOP
endmenu
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index d22c4c5..52ebea9 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -5,6 +5,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/bio.h>
+#include <linux/bitmap.h>
#include <linux/blkdev.h>
#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
#include <linux/slab.h>
@@ -16,13 +17,12 @@
*/
static struct kmem_cache *iocontext_cachep;
-static void cfq_dtor(struct io_context *ioc)
+static void hlist_sched_dtor(struct io_context *ioc, struct hlist_head *list)
{
- if (!hlist_empty(&ioc->cic_list)) {
+ if (!hlist_empty(list)) {
struct cfq_io_context *cic;
- cic = list_entry(ioc->cic_list.first, struct cfq_io_context,
- cic_list);
+ cic = list_entry(list->first, struct cfq_io_context, cic_list);
cic->dtor(ioc);
}
}
@@ -40,7 +40,9 @@ int put_io_context(struct io_context *ioc)
if (atomic_long_dec_and_test(&ioc->refcount)) {
rcu_read_lock();
- cfq_dtor(ioc);
+
+ hlist_sched_dtor(ioc, &ioc->cic_list);
+ hlist_sched_dtor(ioc, &ioc->bfq_cic_list);
rcu_read_unlock();
kmem_cache_free(iocontext_cachep, ioc);
@@ -50,15 +52,14 @@ int put_io_context(struct io_context *ioc)
}
EXPORT_SYMBOL(put_io_context);
-static void cfq_exit(struct io_context *ioc)
+static void hlist_sched_exit(struct io_context *ioc, struct hlist_head *list)
{
rcu_read_lock();
- if (!hlist_empty(&ioc->cic_list)) {
+ if (!hlist_empty(list)) {
struct cfq_io_context *cic;
- cic = list_entry(ioc->cic_list.first, struct cfq_io_context,
- cic_list);
+ cic = list_entry(list->first, struct cfq_io_context, cic_list);
cic->exit(ioc);
}
rcu_read_unlock();
@@ -75,8 +76,8 @@ void exit_io_context(struct task_struct *task)
task_unlock(task);
if (atomic_dec_and_test(&ioc->nr_tasks)) {
- cfq_exit(ioc);
-
+ hlist_sched_exit(ioc, &ioc->cic_list);
+ hlist_sched_exit(ioc, &ioc->bfq_cic_list);
}
put_io_context(ioc);
}
@@ -90,12 +91,14 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
atomic_long_set(&ret->refcount, 1);
atomic_set(&ret->nr_tasks, 1);
spin_lock_init(&ret->lock);
- ret->ioprio_changed = 0;
+ bitmap_zero(ret->ioprio_changed, IOC_IOPRIO_CHANGED_BITS);
ret->ioprio = 0;
ret->last_waited = 0; /* doesn't matter... */
ret->nr_batch_requests = 0; /* because this is 0 */
INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
INIT_HLIST_HEAD(&ret->cic_list);
+ INIT_RADIX_TREE(&ret->bfq_radix_root, GFP_ATOMIC | __GFP_HIGH);
+ INIT_HLIST_HEAD(&ret->bfq_cic_list);
ret->ioc_data = NULL;
}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 9eba291..7204dbe 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2825,7 +2825,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
static void cfq_ioc_set_ioprio(struct io_context *ioc)
{
call_for_each_cic(ioc, changed_ioprio);
- ioc->ioprio_changed = 0;
}
static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
@@ -3109,8 +3108,13 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
goto err_free;
out:
- smp_read_barrier_depends();
- if (unlikely(ioc->ioprio_changed))
+ /*
+ * test_and_clear_bit() implies a memory barrier, paired with
+ * the wmb() in fs/ioprio.c, so the value seen for ioprio is the
+ * new one.
+ */
+ if (unlikely(test_and_clear_bit(IOC_CFQ_IOPRIO_CHANGED,
+ ioc->ioprio_changed)))
cfq_ioc_set_ioprio(ioc);
#ifdef CONFIG_CFQ_GROUP_IOSCHED
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 748cfb9..3232045 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -30,7 +30,7 @@
int set_task_ioprio(struct task_struct *task, int ioprio)
{
- int err;
+ int err, i;
struct io_context *ioc;
const struct cred *cred = current_cred(), *tcred;
@@ -60,12 +60,17 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
err = -ENOMEM;
break;
}
+ /* let other ioc users see the new values */
+ smp_wmb();
task->io_context = ioc;
} while (1);
if (!err) {
ioc->ioprio = ioprio;
- ioc->ioprio_changed = 1;
+ /* make sure schedulers see the new ioprio value */
+ wmb();
+ for (i = 0; i < IOC_IOPRIO_CHANGED_BITS; i++)
+ set_bit(i, ioc->ioprio_changed);
}
task_unlock(task);
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 64d5291..8c7e254 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -1,14 +1,14 @@
#ifndef IOCONTEXT_H
#define IOCONTEXT_H
+#include <linux/bitmap.h>
#include <linux/radix-tree.h>
#include <linux/rcupdate.h>
-struct cfq_queue;
struct cfq_io_context {
void *key;
- struct cfq_queue *cfqq[2];
+ void *cfqq[2];
struct io_context *ioc;
@@ -28,6 +28,16 @@ struct cfq_io_context {
};
/*
+ * Indexes into the ioprio_changed bitmap. A bit set indicates that
+ * the corresponding I/O scheduler needs to see a ioprio update.
+ */
+enum {
+ IOC_CFQ_IOPRIO_CHANGED,
+ IOC_BFQ_IOPRIO_CHANGED,
+ IOC_IOPRIO_CHANGED_BITS
+};
+
+/*
* I/O subsystem state of the associated processes. It is refcounted
* and kmalloc'ed. These could be shared between processes.
*/
@@ -39,7 +49,7 @@ struct io_context {
spinlock_t lock;
unsigned short ioprio;
- unsigned short ioprio_changed;
+ DECLARE_BITMAP(ioprio_changed, IOC_IOPRIO_CHANGED_BITS);
#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
unsigned short cgroup_changed;
@@ -53,6 +63,8 @@ struct io_context {
struct radix_tree_root radix_root;
struct hlist_head cic_list;
+ struct radix_tree_root bfq_radix_root;
+ struct hlist_head bfq_cic_list;
void *ioc_data;
};
--
1.7.1