IB/ehca: Improve latency by unlocking after triggering the hardware
[linux-2.6] / kernel / stop_machine.c
1 /* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
2  * GPL v2 and any later version.
3  */
4 #include <linux/cpu.h>
5 #include <linux/err.h>
6 #include <linux/kthread.h>
7 #include <linux/module.h>
8 #include <linux/sched.h>
9 #include <linux/stop_machine.h>
10 #include <linux/syscalls.h>
11 #include <linux/interrupt.h>
12
13 #include <asm/atomic.h>
14 #include <asm/semaphore.h>
15 #include <asm/uaccess.h>
16
17 /* Since we effect priority and affinity (both of which are visible
18  * to, and settable by outside processes) we do indirection via a
19  * kthread. */
20
21 /* Thread to stop each CPU in user context. */
22 enum stopmachine_state {
23         STOPMACHINE_WAIT,
24         STOPMACHINE_PREPARE,
25         STOPMACHINE_DISABLE_IRQ,
26         STOPMACHINE_EXIT,
27 };
28
29 static enum stopmachine_state stopmachine_state;
30 static unsigned int stopmachine_num_threads;
31 static atomic_t stopmachine_thread_ack;
32 static DECLARE_MUTEX(stopmachine_mutex);
33
34 static int stopmachine(void *cpu)
35 {
36         int irqs_disabled = 0;
37         int prepared = 0;
38
39         set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
40
41         /* Ack: we are alive */
42         smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
43         atomic_inc(&stopmachine_thread_ack);
44
45         /* Simple state machine */
46         while (stopmachine_state != STOPMACHINE_EXIT) {
47                 if (stopmachine_state == STOPMACHINE_DISABLE_IRQ 
48                     && !irqs_disabled) {
49                         local_irq_disable();
50                         hard_irq_disable();
51                         irqs_disabled = 1;
52                         /* Ack: irqs disabled. */
53                         smp_mb(); /* Must read state first. */
54                         atomic_inc(&stopmachine_thread_ack);
55                 } else if (stopmachine_state == STOPMACHINE_PREPARE
56                            && !prepared) {
57                         /* Everyone is in place, hold CPU. */
58                         preempt_disable();
59                         prepared = 1;
60                         smp_mb(); /* Must read state first. */
61                         atomic_inc(&stopmachine_thread_ack);
62                 }
63                 /* Yield in first stage: migration threads need to
64                  * help our sisters onto their CPUs. */
65                 if (!prepared && !irqs_disabled)
66                         yield();
67                 else
68                         cpu_relax();
69         }
70
71         /* Ack: we are exiting. */
72         smp_mb(); /* Must read state first. */
73         atomic_inc(&stopmachine_thread_ack);
74
75         if (irqs_disabled)
76                 local_irq_enable();
77         if (prepared)
78                 preempt_enable();
79
80         return 0;
81 }
82
83 /* Change the thread state */
84 static void stopmachine_set_state(enum stopmachine_state state)
85 {
86         atomic_set(&stopmachine_thread_ack, 0);
87         smp_wmb();
88         stopmachine_state = state;
89         while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
90                 cpu_relax();
91 }
92
93 static int stop_machine(void)
94 {
95         int i, ret = 0;
96         struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
97
98         /* One high-prio thread per cpu.  We'll do this one. */
99         sched_setscheduler(current, SCHED_FIFO, &param);
100
101         atomic_set(&stopmachine_thread_ack, 0);
102         stopmachine_num_threads = 0;
103         stopmachine_state = STOPMACHINE_WAIT;
104
105         for_each_online_cpu(i) {
106                 if (i == raw_smp_processor_id())
107                         continue;
108                 ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
109                 if (ret < 0)
110                         break;
111                 stopmachine_num_threads++;
112         }
113
114         /* Wait for them all to come to life. */
115         while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
116                 yield();
117
118         /* If some failed, kill them all. */
119         if (ret < 0) {
120                 stopmachine_set_state(STOPMACHINE_EXIT);
121                 return ret;
122         }
123
124         /* Now they are all started, make them hold the CPUs, ready. */
125         preempt_disable();
126         stopmachine_set_state(STOPMACHINE_PREPARE);
127
128         /* Make them disable irqs. */
129         local_irq_disable();
130         hard_irq_disable();
131         stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
132
133         return 0;
134 }
135
136 static void restart_machine(void)
137 {
138         stopmachine_set_state(STOPMACHINE_EXIT);
139         local_irq_enable();
140         preempt_enable_no_resched();
141 }
142
143 struct stop_machine_data
144 {
145         int (*fn)(void *);
146         void *data;
147         struct completion done;
148 };
149
150 static int do_stop(void *_smdata)
151 {
152         struct stop_machine_data *smdata = _smdata;
153         int ret;
154
155         ret = stop_machine();
156         if (ret == 0) {
157                 ret = smdata->fn(smdata->data);
158                 restart_machine();
159         }
160
161         /* We're done: you can kthread_stop us now */
162         complete(&smdata->done);
163
164         /* Wait for kthread_stop */
165         set_current_state(TASK_INTERRUPTIBLE);
166         while (!kthread_should_stop()) {
167                 schedule();
168                 set_current_state(TASK_INTERRUPTIBLE);
169         }
170         __set_current_state(TASK_RUNNING);
171         return ret;
172 }
173
174 struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
175                                        unsigned int cpu)
176 {
177         struct stop_machine_data smdata;
178         struct task_struct *p;
179
180         smdata.fn = fn;
181         smdata.data = data;
182         init_completion(&smdata.done);
183
184         down(&stopmachine_mutex);
185
186         /* If they don't care which CPU fn runs on, bind to any online one. */
187         if (cpu == NR_CPUS)
188                 cpu = raw_smp_processor_id();
189
190         p = kthread_create(do_stop, &smdata, "kstopmachine");
191         if (!IS_ERR(p)) {
192                 kthread_bind(p, cpu);
193                 wake_up_process(p);
194                 wait_for_completion(&smdata.done);
195         }
196         up(&stopmachine_mutex);
197         return p;
198 }
199
200 int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
201 {
202         struct task_struct *p;
203         int ret;
204
205         /* No CPUs can come up or down during this. */
206         lock_cpu_hotplug();
207         p = __stop_machine_run(fn, data, cpu);
208         if (!IS_ERR(p))
209                 ret = kthread_stop(p);
210         else
211                 ret = PTR_ERR(p);
212         unlock_cpu_hotplug();
213
214         return ret;
215 }
216 EXPORT_SYMBOL_GPL(stop_machine_run);