Btrfs: Improve space balancing code
[linux-2.6] / fs / btrfs / async-thread.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/version.h>
20 #include <linux/kthread.h>
21 #include <linux/list.h>
22 #include <linux/spinlock.h>
23 # include <linux/freezer.h>
24 #include "async-thread.h"
25
26 /*
27  * container for the kthread task pointer and the list of pending work
28  * One of these is allocated per thread.
29  */
30 struct btrfs_worker_thread {
31         /* pool we belong to */
32         struct btrfs_workers *workers;
33
34         /* list of struct btrfs_work that are waiting for service */
35         struct list_head pending;
36
37         /* list of worker threads from struct btrfs_workers */
38         struct list_head worker_list;
39
40         /* kthread */
41         struct task_struct *task;
42
43         /* number of things on the pending list */
44         atomic_t num_pending;
45
46         unsigned long sequence;
47
48         /* protects the pending list. */
49         spinlock_t lock;
50
51         /* set to non-zero when this thread is already awake and kicking */
52         int working;
53
54         /* are we currently idle */
55         int idle;
56 };
57
58 /*
59  * helper function to move a thread onto the idle list after it
60  * has finished some requests.
61  */
62 static void check_idle_worker(struct btrfs_worker_thread *worker)
63 {
64         if (!worker->idle && atomic_read(&worker->num_pending) <
65             worker->workers->idle_thresh / 2) {
66                 unsigned long flags;
67                 spin_lock_irqsave(&worker->workers->lock, flags);
68                 worker->idle = 1;
69                 list_move(&worker->worker_list, &worker->workers->idle_list);
70                 spin_unlock_irqrestore(&worker->workers->lock, flags);
71         }
72 }
73
74 /*
75  * helper function to move a thread off the idle list after new
76  * pending work is added.
77  */
78 static void check_busy_worker(struct btrfs_worker_thread *worker)
79 {
80         if (worker->idle && atomic_read(&worker->num_pending) >=
81             worker->workers->idle_thresh) {
82                 unsigned long flags;
83                 spin_lock_irqsave(&worker->workers->lock, flags);
84                 worker->idle = 0;
85                 list_move_tail(&worker->worker_list,
86                                &worker->workers->worker_list);
87                 spin_unlock_irqrestore(&worker->workers->lock, flags);
88         }
89 }
90
91 /*
92  * main loop for servicing work items
93  */
94 static int worker_loop(void *arg)
95 {
96         struct btrfs_worker_thread *worker = arg;
97         struct list_head *cur;
98         struct btrfs_work *work;
99         do {
100                 spin_lock_irq(&worker->lock);
101                 while(!list_empty(&worker->pending)) {
102                         cur = worker->pending.next;
103                         work = list_entry(cur, struct btrfs_work, list);
104                         list_del(&work->list);
105                         clear_bit(0, &work->flags);
106
107                         work->worker = worker;
108                         spin_unlock_irq(&worker->lock);
109
110                         work->func(work);
111
112                         atomic_dec(&worker->num_pending);
113                         spin_lock_irq(&worker->lock);
114                         check_idle_worker(worker);
115                 }
116                 worker->working = 0;
117                 if (freezing(current)) {
118                         refrigerator();
119                 } else {
120                         set_current_state(TASK_INTERRUPTIBLE);
121                         spin_unlock_irq(&worker->lock);
122                         schedule();
123                         __set_current_state(TASK_RUNNING);
124                 }
125         } while (!kthread_should_stop());
126         return 0;
127 }
128
129 /*
130  * this will wait for all the worker threads to shutdown
131  */
132 int btrfs_stop_workers(struct btrfs_workers *workers)
133 {
134         struct list_head *cur;
135         struct btrfs_worker_thread *worker;
136
137         list_splice_init(&workers->idle_list, &workers->worker_list);
138         while(!list_empty(&workers->worker_list)) {
139                 cur = workers->worker_list.next;
140                 worker = list_entry(cur, struct btrfs_worker_thread,
141                                     worker_list);
142                 kthread_stop(worker->task);
143                 list_del(&worker->worker_list);
144                 kfree(worker);
145         }
146         return 0;
147 }
148
149 /*
150  * simple init on struct btrfs_workers
151  */
152 void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
153 {
154         workers->num_workers = 0;
155         INIT_LIST_HEAD(&workers->worker_list);
156         INIT_LIST_HEAD(&workers->idle_list);
157         spin_lock_init(&workers->lock);
158         workers->max_workers = max;
159         workers->idle_thresh = 32;
160         workers->name = name;
161 }
162
163 /*
164  * starts new worker threads.  This does not enforce the max worker
165  * count in case you need to temporarily go past it.
166  */
167 int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
168 {
169         struct btrfs_worker_thread *worker;
170         int ret = 0;
171         int i;
172
173         for (i = 0; i < num_workers; i++) {
174                 worker = kzalloc(sizeof(*worker), GFP_NOFS);
175                 if (!worker) {
176                         ret = -ENOMEM;
177                         goto fail;
178                 }
179
180                 INIT_LIST_HEAD(&worker->pending);
181                 INIT_LIST_HEAD(&worker->worker_list);
182                 spin_lock_init(&worker->lock);
183                 atomic_set(&worker->num_pending, 0);
184                 worker->task = kthread_run(worker_loop, worker,
185                                            "btrfs-%s-%d", workers->name,
186                                            workers->num_workers + i);
187                 worker->workers = workers;
188                 if (IS_ERR(worker->task)) {
189                         kfree(worker);
190                         ret = PTR_ERR(worker->task);
191                         goto fail;
192                 }
193
194                 spin_lock_irq(&workers->lock);
195                 list_add_tail(&worker->worker_list, &workers->idle_list);
196                 worker->idle = 1;
197                 workers->num_workers++;
198                 spin_unlock_irq(&workers->lock);
199         }
200         return 0;
201 fail:
202         btrfs_stop_workers(workers);
203         return ret;
204 }
205
206 /*
207  * run through the list and find a worker thread that doesn't have a lot
208  * to do right now.  This can return null if we aren't yet at the thread
209  * count limit and all of the threads are busy.
210  */
211 static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
212 {
213         struct btrfs_worker_thread *worker;
214         struct list_head *next;
215         int enforce_min = workers->num_workers < workers->max_workers;
216
217         /*
218          * if we find an idle thread, don't move it to the end of the
219          * idle list.  This improves the chance that the next submission
220          * will reuse the same thread, and maybe catch it while it is still
221          * working
222          */
223         if (!list_empty(&workers->idle_list)) {
224                 next = workers->idle_list.next;
225                 worker = list_entry(next, struct btrfs_worker_thread,
226                                     worker_list);
227                 return worker;
228         }
229         if (enforce_min || list_empty(&workers->worker_list))
230                 return NULL;
231
232         /*
233          * if we pick a busy task, move the task to the end of the list.
234          * hopefully this will keep things somewhat evenly balanced.
235          * Do the move in batches based on the sequence number.  This groups
236          * requests submitted at roughly the same time onto the same worker.
237          */
238         next = workers->worker_list.next;
239         worker = list_entry(next, struct btrfs_worker_thread, worker_list);
240         atomic_inc(&worker->num_pending);
241         worker->sequence++;
242
243         if (worker->sequence % workers->idle_thresh == 0)
244                 list_move_tail(next, &workers->worker_list);
245         return worker;
246 }
247
248 /*
249  * selects a worker thread to take the next job.  This will either find
250  * an idle worker, start a new worker up to the max count, or just return
251  * one of the existing busy workers.
252  */
253 static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
254 {
255         struct btrfs_worker_thread *worker;
256         unsigned long flags;
257
258 again:
259         spin_lock_irqsave(&workers->lock, flags);
260         worker = next_worker(workers);
261         spin_unlock_irqrestore(&workers->lock, flags);
262
263         if (!worker) {
264                 spin_lock_irqsave(&workers->lock, flags);
265                 if (workers->num_workers >= workers->max_workers) {
266                         struct list_head *fallback = NULL;
267                         /*
268                          * we have failed to find any workers, just
269                          * return the force one
270                          */
271                         if (!list_empty(&workers->worker_list))
272                                 fallback = workers->worker_list.next;
273                         if (!list_empty(&workers->idle_list))
274                                 fallback = workers->idle_list.next;
275                         BUG_ON(!fallback);
276                         worker = list_entry(fallback,
277                                   struct btrfs_worker_thread, worker_list);
278                         spin_unlock_irqrestore(&workers->lock, flags);
279                 } else {
280                         spin_unlock_irqrestore(&workers->lock, flags);
281                         /* we're below the limit, start another worker */
282                         btrfs_start_workers(workers, 1);
283                         goto again;
284                 }
285         }
286         return worker;
287 }
288
289 /*
290  * btrfs_requeue_work just puts the work item back on the tail of the list
291  * it was taken from.  It is intended for use with long running work functions
292  * that make some progress and want to give the cpu up for others.
293  */
294 int btrfs_requeue_work(struct btrfs_work *work)
295 {
296         struct btrfs_worker_thread *worker = work->worker;
297         unsigned long flags;
298
299         if (test_and_set_bit(0, &work->flags))
300                 goto out;
301
302         spin_lock_irqsave(&worker->lock, flags);
303         atomic_inc(&worker->num_pending);
304         list_add_tail(&work->list, &worker->pending);
305
306         /* by definition we're busy, take ourselves off the idle
307          * list
308          */
309         if (worker->idle) {
310                 spin_lock_irqsave(&worker->workers->lock, flags);
311                 worker->idle = 0;
312                 list_move_tail(&worker->worker_list,
313                                &worker->workers->worker_list);
314                 spin_unlock_irqrestore(&worker->workers->lock, flags);
315         }
316
317         spin_unlock_irqrestore(&worker->lock, flags);
318
319 out:
320         return 0;
321 }
322
323 /*
324  * places a struct btrfs_work into the pending queue of one of the kthreads
325  */
326 int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
327 {
328         struct btrfs_worker_thread *worker;
329         unsigned long flags;
330         int wake = 0;
331
332         /* don't requeue something already on a list */
333         if (test_and_set_bit(0, &work->flags))
334                 goto out;
335
336         worker = find_worker(workers);
337
338         spin_lock_irqsave(&worker->lock, flags);
339         atomic_inc(&worker->num_pending);
340         check_busy_worker(worker);
341         list_add_tail(&work->list, &worker->pending);
342
343         /*
344          * avoid calling into wake_up_process if this thread has already
345          * been kicked
346          */
347         if (!worker->working)
348                 wake = 1;
349         worker->working = 1;
350
351         spin_unlock_irqrestore(&worker->lock, flags);
352
353         if (wake)
354                 wake_up_process(worker->task);
355 out:
356         return 0;
357 }