[PATCH] sched: less locking
[linux-2.6] / kernel / stop_machine.c
1 #include <linux/stop_machine.h>
2 #include <linux/kthread.h>
3 #include <linux/sched.h>
4 #include <linux/cpu.h>
5 #include <linux/err.h>
6 #include <linux/syscalls.h>
7 #include <asm/atomic.h>
8 #include <asm/semaphore.h>
9 #include <asm/uaccess.h>
10
11 /* Since we effect priority and affinity (both of which are visible
12  * to, and settable by outside processes) we do indirection via a
13  * kthread. */
14
15 /* Thread to stop each CPU in user context. */
16 enum stopmachine_state {
17         STOPMACHINE_WAIT,
18         STOPMACHINE_PREPARE,
19         STOPMACHINE_DISABLE_IRQ,
20         STOPMACHINE_EXIT,
21 };
22
23 static enum stopmachine_state stopmachine_state;
24 static unsigned int stopmachine_num_threads;
25 static atomic_t stopmachine_thread_ack;
26 static DECLARE_MUTEX(stopmachine_mutex);
27
28 static int stopmachine(void *cpu)
29 {
30         int irqs_disabled = 0;
31         int prepared = 0;
32
33         set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
34
35         /* Ack: we are alive */
36         smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
37         atomic_inc(&stopmachine_thread_ack);
38
39         /* Simple state machine */
40         while (stopmachine_state != STOPMACHINE_EXIT) {
41                 if (stopmachine_state == STOPMACHINE_DISABLE_IRQ 
42                     && !irqs_disabled) {
43                         local_irq_disable();
44                         irqs_disabled = 1;
45                         /* Ack: irqs disabled. */
46                         smp_mb(); /* Must read state first. */
47                         atomic_inc(&stopmachine_thread_ack);
48                 } else if (stopmachine_state == STOPMACHINE_PREPARE
49                            && !prepared) {
50                         /* Everyone is in place, hold CPU. */
51                         preempt_disable();
52                         prepared = 1;
53                         smp_mb(); /* Must read state first. */
54                         atomic_inc(&stopmachine_thread_ack);
55                 }
56                 /* Yield in first stage: migration threads need to
57                  * help our sisters onto their CPUs. */
58                 if (!prepared && !irqs_disabled)
59                         yield();
60                 else
61                         cpu_relax();
62         }
63
64         /* Ack: we are exiting. */
65         smp_mb(); /* Must read state first. */
66         atomic_inc(&stopmachine_thread_ack);
67
68         if (irqs_disabled)
69                 local_irq_enable();
70         if (prepared)
71                 preempt_enable();
72
73         return 0;
74 }
75
76 /* Change the thread state */
77 static void stopmachine_set_state(enum stopmachine_state state)
78 {
79         atomic_set(&stopmachine_thread_ack, 0);
80         smp_wmb();
81         stopmachine_state = state;
82         while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
83                 cpu_relax();
84 }
85
86 static int stop_machine(void)
87 {
88         int i, ret = 0;
89         struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
90         mm_segment_t old_fs = get_fs();
91
92         /* One high-prio thread per cpu.  We'll do this one. */
93         set_fs(KERNEL_DS);
94         sys_sched_setscheduler(current->pid, SCHED_FIFO,
95                                 (struct sched_param __user *)&param);
96         set_fs(old_fs);
97
98         atomic_set(&stopmachine_thread_ack, 0);
99         stopmachine_num_threads = 0;
100         stopmachine_state = STOPMACHINE_WAIT;
101
102         for_each_online_cpu(i) {
103                 if (i == raw_smp_processor_id())
104                         continue;
105                 ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
106                 if (ret < 0)
107                         break;
108                 stopmachine_num_threads++;
109         }
110
111         /* Wait for them all to come to life. */
112         while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
113                 yield();
114
115         /* If some failed, kill them all. */
116         if (ret < 0) {
117                 stopmachine_set_state(STOPMACHINE_EXIT);
118                 up(&stopmachine_mutex);
119                 return ret;
120         }
121
122         /* Don't schedule us away at this point, please. */
123         local_irq_disable();
124
125         /* Now they are all started, make them hold the CPUs, ready. */
126         stopmachine_set_state(STOPMACHINE_PREPARE);
127
128         /* Make them disable irqs. */
129         stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
130
131         return 0;
132 }
133
134 static void restart_machine(void)
135 {
136         stopmachine_set_state(STOPMACHINE_EXIT);
137         local_irq_enable();
138 }
139
140 struct stop_machine_data
141 {
142         int (*fn)(void *);
143         void *data;
144         struct completion done;
145 };
146
147 static int do_stop(void *_smdata)
148 {
149         struct stop_machine_data *smdata = _smdata;
150         int ret;
151
152         ret = stop_machine();
153         if (ret == 0) {
154                 ret = smdata->fn(smdata->data);
155                 restart_machine();
156         }
157
158         /* We're done: you can kthread_stop us now */
159         complete(&smdata->done);
160
161         /* Wait for kthread_stop */
162         set_current_state(TASK_INTERRUPTIBLE);
163         while (!kthread_should_stop()) {
164                 schedule();
165                 set_current_state(TASK_INTERRUPTIBLE);
166         }
167         __set_current_state(TASK_RUNNING);
168         return ret;
169 }
170
171 struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
172                                        unsigned int cpu)
173 {
174         struct stop_machine_data smdata;
175         struct task_struct *p;
176
177         smdata.fn = fn;
178         smdata.data = data;
179         init_completion(&smdata.done);
180
181         down(&stopmachine_mutex);
182
183         /* If they don't care which CPU fn runs on, bind to any online one. */
184         if (cpu == NR_CPUS)
185                 cpu = raw_smp_processor_id();
186
187         p = kthread_create(do_stop, &smdata, "kstopmachine");
188         if (!IS_ERR(p)) {
189                 kthread_bind(p, cpu);
190                 wake_up_process(p);
191                 wait_for_completion(&smdata.done);
192         }
193         up(&stopmachine_mutex);
194         return p;
195 }
196
197 int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
198 {
199         struct task_struct *p;
200         int ret;
201
202         /* No CPUs can come up or down during this. */
203         lock_cpu_hotplug();
204         p = __stop_machine_run(fn, data, cpu);
205         if (!IS_ERR(p))
206                 ret = kthread_stop(p);
207         else
208                 ret = PTR_ERR(p);
209         unlock_cpu_hotplug();
210
211         return ret;
212 }