Pull novell-bugzilla-156426 into release branch
[linux-2.6] / lib / rwsem.c
1 /* rwsem.c: R/W semaphores: contention handling functions
2  *
3  * Written by David Howells (dhowells@redhat.com).
4  * Derived from arch/i386/kernel/semaphore.c
5  */
6 #include <linux/rwsem.h>
7 #include <linux/sched.h>
8 #include <linux/init.h>
9 #include <linux/module.h>
10
11 struct rwsem_waiter {
12         struct list_head list;
13         struct task_struct *task;
14         unsigned int flags;
15 #define RWSEM_WAITING_FOR_READ  0x00000001
16 #define RWSEM_WAITING_FOR_WRITE 0x00000002
17 };
18
19 #if RWSEM_DEBUG
20 #undef rwsemtrace
21 void rwsemtrace(struct rw_semaphore *sem, const char *str)
22 {
23         printk("sem=%p\n", sem);
24         printk("(sem)=%08lx\n", sem->count);
25         if (sem->debug)
26                 printk("[%d] %s({%08lx})\n", current->pid, str, sem->count);
27 }
28 #endif
29
30 /*
31  * handle the lock release when processes blocked on it that can now run
32  * - if we come here from up_xxxx(), then:
33  *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
34  *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
35  *   - there must be someone on the queue
36  * - the spinlock must be held by the caller
37  * - woken process blocks are discarded from the list after having task zeroed
38  * - writers are only woken if downgrading is false
39  */
40 static inline struct rw_semaphore *
41 __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
42 {
43         struct rwsem_waiter *waiter;
44         struct task_struct *tsk;
45         struct list_head *next;
46         signed long oldcount, woken, loop;
47
48         rwsemtrace(sem, "Entering __rwsem_do_wake");
49
50         if (downgrading)
51                 goto dont_wake_writers;
52
53         /* if we came through an up_xxxx() call, we only only wake someone up
54          * if we can transition the active part of the count from 0 -> 1
55          */
56  try_again:
57         oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem)
58                                                 - RWSEM_ACTIVE_BIAS;
59         if (oldcount & RWSEM_ACTIVE_MASK)
60                 goto undo;
61
62         waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
63
64         /* try to grant a single write lock if there's a writer at the front
65          * of the queue - note we leave the 'active part' of the count
66          * incremented by 1 and the waiting part incremented by 0x00010000
67          */
68         if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
69                 goto readers_only;
70
71         /* We must be careful not to touch 'waiter' after we set ->task = NULL.
72          * It is an allocated on the waiter's stack and may become invalid at
73          * any time after that point (due to a wakeup from another source).
74          */
75         list_del(&waiter->list);
76         tsk = waiter->task;
77         smp_mb();
78         waiter->task = NULL;
79         wake_up_process(tsk);
80         put_task_struct(tsk);
81         goto out;
82
83         /* don't want to wake any writers */
84  dont_wake_writers:
85         waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
86         if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
87                 goto out;
88
89         /* grant an infinite number of read locks to the readers at the front
90          * of the queue
91          * - note we increment the 'active part' of the count by the number of
92          *   readers before waking any processes up
93          */
94  readers_only:
95         woken = 0;
96         do {
97                 woken++;
98
99                 if (waiter->list.next == &sem->wait_list)
100                         break;
101
102                 waiter = list_entry(waiter->list.next,
103                                         struct rwsem_waiter, list);
104
105         } while (waiter->flags & RWSEM_WAITING_FOR_READ);
106
107         loop = woken;
108         woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS;
109         if (!downgrading)
110                 /* we'd already done one increment earlier */
111                 woken -= RWSEM_ACTIVE_BIAS;
112
113         rwsem_atomic_add(woken, sem);
114
115         next = sem->wait_list.next;
116         for (; loop > 0; loop--) {
117                 waiter = list_entry(next, struct rwsem_waiter, list);
118                 next = waiter->list.next;
119                 tsk = waiter->task;
120                 smp_mb();
121                 waiter->task = NULL;
122                 wake_up_process(tsk);
123                 put_task_struct(tsk);
124         }
125
126         sem->wait_list.next = next;
127         next->prev = &sem->wait_list;
128
129  out:
130         rwsemtrace(sem, "Leaving __rwsem_do_wake");
131         return sem;
132
133         /* undo the change to count, but check for a transition 1->0 */
134  undo:
135         if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0)
136                 goto out;
137         goto try_again;
138 }
139
140 /*
141  * wait for a lock to be granted
142  */
143 static inline struct rw_semaphore *
144 rwsem_down_failed_common(struct rw_semaphore *sem,
145                         struct rwsem_waiter *waiter, signed long adjustment)
146 {
147         struct task_struct *tsk = current;
148         signed long count;
149
150         set_task_state(tsk, TASK_UNINTERRUPTIBLE);
151
152         /* set up my own style of waitqueue */
153         spin_lock_irq(&sem->wait_lock);
154         waiter->task = tsk;
155         get_task_struct(tsk);
156
157         list_add_tail(&waiter->list, &sem->wait_list);
158
159         /* we're now waiting on the lock, but no longer actively read-locking */
160         count = rwsem_atomic_update(adjustment, sem);
161
162         /* if there are no active locks, wake the front queued process(es) up */
163         if (!(count & RWSEM_ACTIVE_MASK))
164                 sem = __rwsem_do_wake(sem, 0);
165
166         spin_unlock_irq(&sem->wait_lock);
167
168         /* wait to be given the lock */
169         for (;;) {
170                 if (!waiter->task)
171                         break;
172                 schedule();
173                 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
174         }
175
176         tsk->state = TASK_RUNNING;
177
178         return sem;
179 }
180
181 /*
182  * wait for the read lock to be granted
183  */
184 struct rw_semaphore fastcall __sched *
185 rwsem_down_read_failed(struct rw_semaphore *sem)
186 {
187         struct rwsem_waiter waiter;
188
189         rwsemtrace(sem, "Entering rwsem_down_read_failed");
190
191         waiter.flags = RWSEM_WAITING_FOR_READ;
192         rwsem_down_failed_common(sem, &waiter,
193                                 RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS);
194
195         rwsemtrace(sem, "Leaving rwsem_down_read_failed");
196         return sem;
197 }
198
199 /*
200  * wait for the write lock to be granted
201  */
202 struct rw_semaphore fastcall __sched *
203 rwsem_down_write_failed(struct rw_semaphore *sem)
204 {
205         struct rwsem_waiter waiter;
206
207         rwsemtrace(sem, "Entering rwsem_down_write_failed");
208
209         waiter.flags = RWSEM_WAITING_FOR_WRITE;
210         rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS);
211
212         rwsemtrace(sem, "Leaving rwsem_down_write_failed");
213         return sem;
214 }
215
216 /*
217  * handle waking up a waiter on the semaphore
218  * - up_read/up_write has decremented the active part of count if we come here
219  */
220 struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
221 {
222         unsigned long flags;
223
224         rwsemtrace(sem, "Entering rwsem_wake");
225
226         spin_lock_irqsave(&sem->wait_lock, flags);
227
228         /* do nothing if list empty */
229         if (!list_empty(&sem->wait_list))
230                 sem = __rwsem_do_wake(sem, 0);
231
232         spin_unlock_irqrestore(&sem->wait_lock, flags);
233
234         rwsemtrace(sem, "Leaving rwsem_wake");
235
236         return sem;
237 }
238
239 /*
240  * downgrade a write lock into a read lock
241  * - caller incremented waiting part of count and discovered it still negative
242  * - just wake up any readers at the front of the queue
243  */
244 struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
245 {
246         unsigned long flags;
247
248         rwsemtrace(sem, "Entering rwsem_downgrade_wake");
249
250         spin_lock_irqsave(&sem->wait_lock, flags);
251
252         /* do nothing if list empty */
253         if (!list_empty(&sem->wait_list))
254                 sem = __rwsem_do_wake(sem, 1);
255
256         spin_unlock_irqrestore(&sem->wait_lock, flags);
257
258         rwsemtrace(sem, "Leaving rwsem_downgrade_wake");
259         return sem;
260 }
261
262 EXPORT_SYMBOL(rwsem_down_read_failed);
263 EXPORT_SYMBOL(rwsem_down_write_failed);
264 EXPORT_SYMBOL(rwsem_wake);
265 EXPORT_SYMBOL(rwsem_downgrade_wake);
266 #if RWSEM_DEBUG
267 EXPORT_SYMBOL(rwsemtrace);
268 #endif