Merge branch 'printk-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6] / kernel / signal.c
1 /*
2  *  linux/kernel/signal.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  *
6  *  1997-11-02  Modified for POSIX.1b signals by Richard Henderson
7  *
8  *  2003-06-02  Jim Houston - Concurrent Computer Corp.
9  *              Changes to use preallocated sigqueue structures
10  *              to allow signals to be sent reliably.
11  */
12
13 #include <linux/slab.h>
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/sched.h>
17 #include <linux/fs.h>
18 #include <linux/tty.h>
19 #include <linux/binfmts.h>
20 #include <linux/security.h>
21 #include <linux/syscalls.h>
22 #include <linux/ptrace.h>
23 #include <linux/signal.h>
24 #include <linux/signalfd.h>
25 #include <linux/tracehook.h>
26 #include <linux/capability.h>
27 #include <linux/freezer.h>
28 #include <linux/pid_namespace.h>
29 #include <linux/nsproxy.h>
30 #include <trace/sched.h>
31
32 #include <asm/param.h>
33 #include <asm/uaccess.h>
34 #include <asm/unistd.h>
35 #include <asm/siginfo.h>
36 #include "audit.h"      /* audit_signal_info() */
37
38 /*
39  * SLAB caches for signal bits.
40  */
41
42 static struct kmem_cache *sigqueue_cachep;
43
44 DEFINE_TRACE(sched_signal_send);
45
46 static void __user *sig_handler(struct task_struct *t, int sig)
47 {
48         return t->sighand->action[sig - 1].sa.sa_handler;
49 }
50
51 static int sig_handler_ignored(void __user *handler, int sig)
52 {
53         /* Is it explicitly or implicitly ignored? */
54         return handler == SIG_IGN ||
55                 (handler == SIG_DFL && sig_kernel_ignore(sig));
56 }
57
58 static int sig_task_ignored(struct task_struct *t, int sig,
59                 int from_ancestor_ns)
60 {
61         void __user *handler;
62
63         handler = sig_handler(t, sig);
64
65         if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
66                         handler == SIG_DFL && !from_ancestor_ns)
67                 return 1;
68
69         return sig_handler_ignored(handler, sig);
70 }
71
72 static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns)
73 {
74         /*
75          * Blocked signals are never ignored, since the
76          * signal handler may change by the time it is
77          * unblocked.
78          */
79         if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
80                 return 0;
81
82         if (!sig_task_ignored(t, sig, from_ancestor_ns))
83                 return 0;
84
85         /*
86          * Tracers may want to know about even ignored signals.
87          */
88         return !tracehook_consider_ignored_signal(t, sig);
89 }
90
91 /*
92  * Re-calculate pending state from the set of locally pending
93  * signals, globally pending signals, and blocked signals.
94  */
95 static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
96 {
97         unsigned long ready;
98         long i;
99
100         switch (_NSIG_WORDS) {
101         default:
102                 for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;)
103                         ready |= signal->sig[i] &~ blocked->sig[i];
104                 break;
105
106         case 4: ready  = signal->sig[3] &~ blocked->sig[3];
107                 ready |= signal->sig[2] &~ blocked->sig[2];
108                 ready |= signal->sig[1] &~ blocked->sig[1];
109                 ready |= signal->sig[0] &~ blocked->sig[0];
110                 break;
111
112         case 2: ready  = signal->sig[1] &~ blocked->sig[1];
113                 ready |= signal->sig[0] &~ blocked->sig[0];
114                 break;
115
116         case 1: ready  = signal->sig[0] &~ blocked->sig[0];
117         }
118         return ready != 0;
119 }
120
121 #define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
122
123 static int recalc_sigpending_tsk(struct task_struct *t)
124 {
125         if (t->signal->group_stop_count > 0 ||
126             PENDING(&t->pending, &t->blocked) ||
127             PENDING(&t->signal->shared_pending, &t->blocked)) {
128                 set_tsk_thread_flag(t, TIF_SIGPENDING);
129                 return 1;
130         }
131         /*
132          * We must never clear the flag in another thread, or in current
133          * when it's possible the current syscall is returning -ERESTART*.
134          * So we don't clear it here, and only callers who know they should do.
135          */
136         return 0;
137 }
138
139 /*
140  * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up.
141  * This is superfluous when called on current, the wakeup is a harmless no-op.
142  */
143 void recalc_sigpending_and_wake(struct task_struct *t)
144 {
145         if (recalc_sigpending_tsk(t))
146                 signal_wake_up(t, 0);
147 }
148
149 void recalc_sigpending(void)
150 {
151         if (unlikely(tracehook_force_sigpending()))
152                 set_thread_flag(TIF_SIGPENDING);
153         else if (!recalc_sigpending_tsk(current) && !freezing(current))
154                 clear_thread_flag(TIF_SIGPENDING);
155
156 }
157
158 /* Given the mask, find the first available signal that should be serviced. */
159
160 int next_signal(struct sigpending *pending, sigset_t *mask)
161 {
162         unsigned long i, *s, *m, x;
163         int sig = 0;
164         
165         s = pending->signal.sig;
166         m = mask->sig;
167         switch (_NSIG_WORDS) {
168         default:
169                 for (i = 0; i < _NSIG_WORDS; ++i, ++s, ++m)
170                         if ((x = *s &~ *m) != 0) {
171                                 sig = ffz(~x) + i*_NSIG_BPW + 1;
172                                 break;
173                         }
174                 break;
175
176         case 2: if ((x = s[0] &~ m[0]) != 0)
177                         sig = 1;
178                 else if ((x = s[1] &~ m[1]) != 0)
179                         sig = _NSIG_BPW + 1;
180                 else
181                         break;
182                 sig += ffz(~x);
183                 break;
184
185         case 1: if ((x = *s &~ *m) != 0)
186                         sig = ffz(~x) + 1;
187                 break;
188         }
189         
190         return sig;
191 }
192
193 /*
194  * allocate a new signal queue record
195  * - this may be called without locks if and only if t == current, otherwise an
196  *   appopriate lock must be held to stop the target task from exiting
197  */
198 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
199                                          int override_rlimit)
200 {
201         struct sigqueue *q = NULL;
202         struct user_struct *user;
203
204         /*
205          * We won't get problems with the target's UID changing under us
206          * because changing it requires RCU be used, and if t != current, the
207          * caller must be holding the RCU readlock (by way of a spinlock) and
208          * we use RCU protection here
209          */
210         user = get_uid(__task_cred(t)->user);
211         atomic_inc(&user->sigpending);
212         if (override_rlimit ||
213             atomic_read(&user->sigpending) <=
214                         t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
215                 q = kmem_cache_alloc(sigqueue_cachep, flags);
216         if (unlikely(q == NULL)) {
217                 atomic_dec(&user->sigpending);
218                 free_uid(user);
219         } else {
220                 INIT_LIST_HEAD(&q->list);
221                 q->flags = 0;
222                 q->user = user;
223         }
224
225         return q;
226 }
227
228 static void __sigqueue_free(struct sigqueue *q)
229 {
230         if (q->flags & SIGQUEUE_PREALLOC)
231                 return;
232         atomic_dec(&q->user->sigpending);
233         free_uid(q->user);
234         kmem_cache_free(sigqueue_cachep, q);
235 }
236
237 void flush_sigqueue(struct sigpending *queue)
238 {
239         struct sigqueue *q;
240
241         sigemptyset(&queue->signal);
242         while (!list_empty(&queue->list)) {
243                 q = list_entry(queue->list.next, struct sigqueue , list);
244                 list_del_init(&q->list);
245                 __sigqueue_free(q);
246         }
247 }
248
249 /*
250  * Flush all pending signals for a task.
251  */
252 void flush_signals(struct task_struct *t)
253 {
254         unsigned long flags;
255
256         spin_lock_irqsave(&t->sighand->siglock, flags);
257         clear_tsk_thread_flag(t, TIF_SIGPENDING);
258         flush_sigqueue(&t->pending);
259         flush_sigqueue(&t->signal->shared_pending);
260         spin_unlock_irqrestore(&t->sighand->siglock, flags);
261 }
262
263 static void __flush_itimer_signals(struct sigpending *pending)
264 {
265         sigset_t signal, retain;
266         struct sigqueue *q, *n;
267
268         signal = pending->signal;
269         sigemptyset(&retain);
270
271         list_for_each_entry_safe(q, n, &pending->list, list) {
272                 int sig = q->info.si_signo;
273
274                 if (likely(q->info.si_code != SI_TIMER)) {
275                         sigaddset(&retain, sig);
276                 } else {
277                         sigdelset(&signal, sig);
278                         list_del_init(&q->list);
279                         __sigqueue_free(q);
280                 }
281         }
282
283         sigorsets(&pending->signal, &signal, &retain);
284 }
285
286 void flush_itimer_signals(void)
287 {
288         struct task_struct *tsk = current;
289         unsigned long flags;
290
291         spin_lock_irqsave(&tsk->sighand->siglock, flags);
292         __flush_itimer_signals(&tsk->pending);
293         __flush_itimer_signals(&tsk->signal->shared_pending);
294         spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
295 }
296
297 void ignore_signals(struct task_struct *t)
298 {
299         int i;
300
301         for (i = 0; i < _NSIG; ++i)
302                 t->sighand->action[i].sa.sa_handler = SIG_IGN;
303
304         flush_signals(t);
305 }
306
307 /*
308  * Flush all handlers for a task.
309  */
310
311 void
312 flush_signal_handlers(struct task_struct *t, int force_default)
313 {
314         int i;
315         struct k_sigaction *ka = &t->sighand->action[0];
316         for (i = _NSIG ; i != 0 ; i--) {
317                 if (force_default || ka->sa.sa_handler != SIG_IGN)
318                         ka->sa.sa_handler = SIG_DFL;
319                 ka->sa.sa_flags = 0;
320                 sigemptyset(&ka->sa.sa_mask);
321                 ka++;
322         }
323 }
324
325 int unhandled_signal(struct task_struct *tsk, int sig)
326 {
327         void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
328         if (is_global_init(tsk))
329                 return 1;
330         if (handler != SIG_IGN && handler != SIG_DFL)
331                 return 0;
332         return !tracehook_consider_fatal_signal(tsk, sig);
333 }
334
335
336 /* Notify the system that a driver wants to block all signals for this
337  * process, and wants to be notified if any signals at all were to be
338  * sent/acted upon.  If the notifier routine returns non-zero, then the
339  * signal will be acted upon after all.  If the notifier routine returns 0,
340  * then then signal will be blocked.  Only one block per process is
341  * allowed.  priv is a pointer to private data that the notifier routine
342  * can use to determine if the signal should be blocked or not.  */
343
344 void
345 block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
346 {
347         unsigned long flags;
348
349         spin_lock_irqsave(&current->sighand->siglock, flags);
350         current->notifier_mask = mask;
351         current->notifier_data = priv;
352         current->notifier = notifier;
353         spin_unlock_irqrestore(&current->sighand->siglock, flags);
354 }
355
356 /* Notify the system that blocking has ended. */
357
358 void
359 unblock_all_signals(void)
360 {
361         unsigned long flags;
362
363         spin_lock_irqsave(&current->sighand->siglock, flags);
364         current->notifier = NULL;
365         current->notifier_data = NULL;
366         recalc_sigpending();
367         spin_unlock_irqrestore(&current->sighand->siglock, flags);
368 }
369
370 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
371 {
372         struct sigqueue *q, *first = NULL;
373
374         /*
375          * Collect the siginfo appropriate to this signal.  Check if
376          * there is another siginfo for the same signal.
377         */
378         list_for_each_entry(q, &list->list, list) {
379                 if (q->info.si_signo == sig) {
380                         if (first)
381                                 goto still_pending;
382                         first = q;
383                 }
384         }
385
386         sigdelset(&list->signal, sig);
387
388         if (first) {
389 still_pending:
390                 list_del_init(&first->list);
391                 copy_siginfo(info, &first->info);
392                 __sigqueue_free(first);
393         } else {
394                 /* Ok, it wasn't in the queue.  This must be
395                    a fast-pathed signal or we must have been
396                    out of queue space.  So zero out the info.
397                  */
398                 info->si_signo = sig;
399                 info->si_errno = 0;
400                 info->si_code = 0;
401                 info->si_pid = 0;
402                 info->si_uid = 0;
403         }
404 }
405
406 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
407                         siginfo_t *info)
408 {
409         int sig = next_signal(pending, mask);
410
411         if (sig) {
412                 if (current->notifier) {
413                         if (sigismember(current->notifier_mask, sig)) {
414                                 if (!(current->notifier)(current->notifier_data)) {
415                                         clear_thread_flag(TIF_SIGPENDING);
416                                         return 0;
417                                 }
418                         }
419                 }
420
421                 collect_signal(sig, pending, info);
422         }
423
424         return sig;
425 }
426
427 /*
428  * Dequeue a signal and return the element to the caller, which is 
429  * expected to free it.
430  *
431  * All callers have to hold the siglock.
432  */
433 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
434 {
435         int signr;
436
437         /* We only dequeue private signals from ourselves, we don't let
438          * signalfd steal them
439          */
440         signr = __dequeue_signal(&tsk->pending, mask, info);
441         if (!signr) {
442                 signr = __dequeue_signal(&tsk->signal->shared_pending,
443                                          mask, info);
444                 /*
445                  * itimer signal ?
446                  *
447                  * itimers are process shared and we restart periodic
448                  * itimers in the signal delivery path to prevent DoS
449                  * attacks in the high resolution timer case. This is
450                  * compliant with the old way of self restarting
451                  * itimers, as the SIGALRM is a legacy signal and only
452                  * queued once. Changing the restart behaviour to
453                  * restart the timer in the signal dequeue path is
454                  * reducing the timer noise on heavy loaded !highres
455                  * systems too.
456                  */
457                 if (unlikely(signr == SIGALRM)) {
458                         struct hrtimer *tmr = &tsk->signal->real_timer;
459
460                         if (!hrtimer_is_queued(tmr) &&
461                             tsk->signal->it_real_incr.tv64 != 0) {
462                                 hrtimer_forward(tmr, tmr->base->get_time(),
463                                                 tsk->signal->it_real_incr);
464                                 hrtimer_restart(tmr);
465                         }
466                 }
467         }
468
469         recalc_sigpending();
470         if (!signr)
471                 return 0;
472
473         if (unlikely(sig_kernel_stop(signr))) {
474                 /*
475                  * Set a marker that we have dequeued a stop signal.  Our
476                  * caller might release the siglock and then the pending
477                  * stop signal it is about to process is no longer in the
478                  * pending bitmasks, but must still be cleared by a SIGCONT
479                  * (and overruled by a SIGKILL).  So those cases clear this
480                  * shared flag after we've set it.  Note that this flag may
481                  * remain set after the signal we return is ignored or
482                  * handled.  That doesn't matter because its only purpose
483                  * is to alert stop-signal processing code when another
484                  * processor has come along and cleared the flag.
485                  */
486                 tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
487         }
488         if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
489                 /*
490                  * Release the siglock to ensure proper locking order
491                  * of timer locks outside of siglocks.  Note, we leave
492                  * irqs disabled here, since the posix-timers code is
493                  * about to disable them again anyway.
494                  */
495                 spin_unlock(&tsk->sighand->siglock);
496                 do_schedule_next_timer(info);
497                 spin_lock(&tsk->sighand->siglock);
498         }
499         return signr;
500 }
501
502 /*
503  * Tell a process that it has a new active signal..
504  *
505  * NOTE! we rely on the previous spin_lock to
506  * lock interrupts for us! We can only be called with
507  * "siglock" held, and the local interrupt must
508  * have been disabled when that got acquired!
509  *
510  * No need to set need_resched since signal event passing
511  * goes through ->blocked
512  */
513 void signal_wake_up(struct task_struct *t, int resume)
514 {
515         unsigned int mask;
516
517         set_tsk_thread_flag(t, TIF_SIGPENDING);
518
519         /*
520          * For SIGKILL, we want to wake it up in the stopped/traced/killable
521          * case. We don't check t->state here because there is a race with it
522          * executing another processor and just now entering stopped state.
523          * By using wake_up_state, we ensure the process will wake up and
524          * handle its death signal.
525          */
526         mask = TASK_INTERRUPTIBLE;
527         if (resume)
528                 mask |= TASK_WAKEKILL;
529         if (!wake_up_state(t, mask))
530                 kick_process(t);
531 }
532
533 /*
534  * Remove signals in mask from the pending set and queue.
535  * Returns 1 if any signals were found.
536  *
537  * All callers must be holding the siglock.
538  *
539  * This version takes a sigset mask and looks at all signals,
540  * not just those in the first mask word.
541  */
542 static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
543 {
544         struct sigqueue *q, *n;
545         sigset_t m;
546
547         sigandsets(&m, mask, &s->signal);
548         if (sigisemptyset(&m))
549                 return 0;
550
551         signandsets(&s->signal, &s->signal, mask);
552         list_for_each_entry_safe(q, n, &s->list, list) {
553                 if (sigismember(mask, q->info.si_signo)) {
554                         list_del_init(&q->list);
555                         __sigqueue_free(q);
556                 }
557         }
558         return 1;
559 }
560 /*
561  * Remove signals in mask from the pending set and queue.
562  * Returns 1 if any signals were found.
563  *
564  * All callers must be holding the siglock.
565  */
566 static int rm_from_queue(unsigned long mask, struct sigpending *s)
567 {
568         struct sigqueue *q, *n;
569
570         if (!sigtestsetmask(&s->signal, mask))
571                 return 0;
572
573         sigdelsetmask(&s->signal, mask);
574         list_for_each_entry_safe(q, n, &s->list, list) {
575                 if (q->info.si_signo < SIGRTMIN &&
576                     (mask & sigmask(q->info.si_signo))) {
577                         list_del_init(&q->list);
578                         __sigqueue_free(q);
579                 }
580         }
581         return 1;
582 }
583
584 /*
585  * Bad permissions for sending the signal
586  * - the caller must hold at least the RCU read lock
587  */
588 static int check_kill_permission(int sig, struct siginfo *info,
589                                  struct task_struct *t)
590 {
591         const struct cred *cred = current_cred(), *tcred;
592         struct pid *sid;
593         int error;
594
595         if (!valid_signal(sig))
596                 return -EINVAL;
597
598         if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info)))
599                 return 0;
600
601         error = audit_signal_info(sig, t); /* Let audit system see the signal */
602         if (error)
603                 return error;
604
605         tcred = __task_cred(t);
606         if ((cred->euid ^ tcred->suid) &&
607             (cred->euid ^ tcred->uid) &&
608             (cred->uid  ^ tcred->suid) &&
609             (cred->uid  ^ tcred->uid) &&
610             !capable(CAP_KILL)) {
611                 switch (sig) {
612                 case SIGCONT:
613                         sid = task_session(t);
614                         /*
615                          * We don't return the error if sid == NULL. The
616                          * task was unhashed, the caller must notice this.
617                          */
618                         if (!sid || sid == task_session(current))
619                                 break;
620                 default:
621                         return -EPERM;
622                 }
623         }
624
625         return security_task_kill(t, info, sig, 0);
626 }
627
628 /*
629  * Handle magic process-wide effects of stop/continue signals. Unlike
630  * the signal actions, these happen immediately at signal-generation
631  * time regardless of blocking, ignoring, or handling.  This does the
632  * actual continuing for SIGCONT, but not the actual stopping for stop
633  * signals. The process stop is done as a signal action for SIG_DFL.
634  *
635  * Returns true if the signal should be actually delivered, otherwise
636  * it should be dropped.
637  */
638 static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
639 {
640         struct signal_struct *signal = p->signal;
641         struct task_struct *t;
642
643         if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) {
644                 /*
645                  * The process is in the middle of dying, nothing to do.
646                  */
647         } else if (sig_kernel_stop(sig)) {
648                 /*
649                  * This is a stop signal.  Remove SIGCONT from all queues.
650                  */
651                 rm_from_queue(sigmask(SIGCONT), &signal->shared_pending);
652                 t = p;
653                 do {
654                         rm_from_queue(sigmask(SIGCONT), &t->pending);
655                 } while_each_thread(p, t);
656         } else if (sig == SIGCONT) {
657                 unsigned int why;
658                 /*
659                  * Remove all stop signals from all queues,
660                  * and wake all threads.
661                  */
662                 rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending);
663                 t = p;
664                 do {
665                         unsigned int state;
666                         rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
667                         /*
668                          * If there is a handler for SIGCONT, we must make
669                          * sure that no thread returns to user mode before
670                          * we post the signal, in case it was the only
671                          * thread eligible to run the signal handler--then
672                          * it must not do anything between resuming and
673                          * running the handler.  With the TIF_SIGPENDING
674                          * flag set, the thread will pause and acquire the
675                          * siglock that we hold now and until we've queued
676                          * the pending signal.
677                          *
678                          * Wake up the stopped thread _after_ setting
679                          * TIF_SIGPENDING
680                          */
681                         state = __TASK_STOPPED;
682                         if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
683                                 set_tsk_thread_flag(t, TIF_SIGPENDING);
684                                 state |= TASK_INTERRUPTIBLE;
685                         }
686                         wake_up_state(t, state);
687                 } while_each_thread(p, t);
688
689                 /*
690                  * Notify the parent with CLD_CONTINUED if we were stopped.
691                  *
692                  * If we were in the middle of a group stop, we pretend it
693                  * was already finished, and then continued. Since SIGCHLD
694                  * doesn't queue we report only CLD_STOPPED, as if the next
695                  * CLD_CONTINUED was dropped.
696                  */
697                 why = 0;
698                 if (signal->flags & SIGNAL_STOP_STOPPED)
699                         why |= SIGNAL_CLD_CONTINUED;
700                 else if (signal->group_stop_count)
701                         why |= SIGNAL_CLD_STOPPED;
702
703                 if (why) {
704                         /*
705                          * The first thread which returns from finish_stop()
706                          * will take ->siglock, notice SIGNAL_CLD_MASK, and
707                          * notify its parent. See get_signal_to_deliver().
708                          */
709                         signal->flags = why | SIGNAL_STOP_CONTINUED;
710                         signal->group_stop_count = 0;
711                         signal->group_exit_code = 0;
712                 } else {
713                         /*
714                          * We are not stopped, but there could be a stop
715                          * signal in the middle of being processed after
716                          * being removed from the queue.  Clear that too.
717                          */
718                         signal->flags &= ~SIGNAL_STOP_DEQUEUED;
719                 }
720         }
721
722         return !sig_ignored(p, sig, from_ancestor_ns);
723 }
724
725 /*
726  * Test if P wants to take SIG.  After we've checked all threads with this,
727  * it's equivalent to finding no threads not blocking SIG.  Any threads not
728  * blocking SIG were ruled out because they are not running and already
729  * have pending signals.  Such threads will dequeue from the shared queue
730  * as soon as they're available, so putting the signal on the shared queue
731  * will be equivalent to sending it to one such thread.
732  */
733 static inline int wants_signal(int sig, struct task_struct *p)
734 {
735         if (sigismember(&p->blocked, sig))
736                 return 0;
737         if (p->flags & PF_EXITING)
738                 return 0;
739         if (sig == SIGKILL)
740                 return 1;
741         if (task_is_stopped_or_traced(p))
742                 return 0;
743         return task_curr(p) || !signal_pending(p);
744 }
745
746 static void complete_signal(int sig, struct task_struct *p, int group)
747 {
748         struct signal_struct *signal = p->signal;
749         struct task_struct *t;
750
751         /*
752          * Now find a thread we can wake up to take the signal off the queue.
753          *
754          * If the main thread wants the signal, it gets first crack.
755          * Probably the least surprising to the average bear.
756          */
757         if (wants_signal(sig, p))
758                 t = p;
759         else if (!group || thread_group_empty(p))
760                 /*
761                  * There is just one thread and it does not need to be woken.
762                  * It will dequeue unblocked signals before it runs again.
763                  */
764                 return;
765         else {
766                 /*
767                  * Otherwise try to find a suitable thread.
768                  */
769                 t = signal->curr_target;
770                 while (!wants_signal(sig, t)) {
771                         t = next_thread(t);
772                         if (t == signal->curr_target)
773                                 /*
774                                  * No thread needs to be woken.
775                                  * Any eligible threads will see
776                                  * the signal in the queue soon.
777                                  */
778                                 return;
779                 }
780                 signal->curr_target = t;
781         }
782
783         /*
784          * Found a killable thread.  If the signal will be fatal,
785          * then start taking the whole group down immediately.
786          */
787         if (sig_fatal(p, sig) &&
788             !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) &&
789             !sigismember(&t->real_blocked, sig) &&
790             (sig == SIGKILL ||
791              !tracehook_consider_fatal_signal(t, sig))) {
792                 /*
793                  * This signal will be fatal to the whole group.
794                  */
795                 if (!sig_kernel_coredump(sig)) {
796                         /*
797                          * Start a group exit and wake everybody up.
798                          * This way we don't have other threads
799                          * running and doing things after a slower
800                          * thread has the fatal signal pending.
801                          */
802                         signal->flags = SIGNAL_GROUP_EXIT;
803                         signal->group_exit_code = sig;
804                         signal->group_stop_count = 0;
805                         t = p;
806                         do {
807                                 sigaddset(&t->pending.signal, SIGKILL);
808                                 signal_wake_up(t, 1);
809                         } while_each_thread(p, t);
810                         return;
811                 }
812         }
813
814         /*
815          * The signal is already in the shared-pending queue.
816          * Tell the chosen thread to wake up and dequeue it.
817          */
818         signal_wake_up(t, sig == SIGKILL);
819         return;
820 }
821
822 static inline int legacy_queue(struct sigpending *signals, int sig)
823 {
824         return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
825 }
826
827 static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
828                         int group, int from_ancestor_ns)
829 {
830         struct sigpending *pending;
831         struct sigqueue *q;
832
833         trace_sched_signal_send(sig, t);
834
835         assert_spin_locked(&t->sighand->siglock);
836
837         if (!prepare_signal(sig, t, from_ancestor_ns))
838                 return 0;
839
840         pending = group ? &t->signal->shared_pending : &t->pending;
841         /*
842          * Short-circuit ignored signals and support queuing
843          * exactly one non-rt signal, so that we can get more
844          * detailed information about the cause of the signal.
845          */
846         if (legacy_queue(pending, sig))
847                 return 0;
848         /*
849          * fast-pathed signals for kernel-internal things like SIGSTOP
850          * or SIGKILL.
851          */
852         if (info == SEND_SIG_FORCED)
853                 goto out_set;
854
855         /* Real-time signals must be queued if sent by sigqueue, or
856            some other real-time mechanism.  It is implementation
857            defined whether kill() does so.  We attempt to do so, on
858            the principle of least surprise, but since kill is not
859            allowed to fail with EAGAIN when low on memory we just
860            make sure at least one signal gets delivered and don't
861            pass on the info struct.  */
862
863         q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
864                                              (is_si_special(info) ||
865                                               info->si_code >= 0)));
866         if (q) {
867                 list_add_tail(&q->list, &pending->list);
868                 switch ((unsigned long) info) {
869                 case (unsigned long) SEND_SIG_NOINFO:
870                         q->info.si_signo = sig;
871                         q->info.si_errno = 0;
872                         q->info.si_code = SI_USER;
873                         q->info.si_pid = task_tgid_nr_ns(current,
874                                                         task_active_pid_ns(t));
875                         q->info.si_uid = current_uid();
876                         break;
877                 case (unsigned long) SEND_SIG_PRIV:
878                         q->info.si_signo = sig;
879                         q->info.si_errno = 0;
880                         q->info.si_code = SI_KERNEL;
881                         q->info.si_pid = 0;
882                         q->info.si_uid = 0;
883                         break;
884                 default:
885                         copy_siginfo(&q->info, info);
886                         if (from_ancestor_ns)
887                                 q->info.si_pid = 0;
888                         break;
889                 }
890         } else if (!is_si_special(info)) {
891                 if (sig >= SIGRTMIN && info->si_code != SI_USER)
892                 /*
893                  * Queue overflow, abort.  We may abort if the signal was rt
894                  * and sent by user using something other than kill().
895                  */
896                         return -EAGAIN;
897         }
898
899 out_set:
900         signalfd_notify(t, sig);
901         sigaddset(&pending->signal, sig);
902         complete_signal(sig, t, group);
903         return 0;
904 }
905
906 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
907                         int group)
908 {
909         int from_ancestor_ns = 0;
910
911 #ifdef CONFIG_PID_NS
912         if (!is_si_special(info) && SI_FROMUSER(info) &&
913                         task_pid_nr_ns(current, task_active_pid_ns(t)) <= 0)
914                 from_ancestor_ns = 1;
915 #endif
916
917         return __send_signal(sig, info, t, group, from_ancestor_ns);
918 }
919
920 int print_fatal_signals;
921
922 static void print_fatal_signal(struct pt_regs *regs, int signr)
923 {
924         printk("%s/%d: potentially unexpected fatal signal %d.\n",
925                 current->comm, task_pid_nr(current), signr);
926
927 #if defined(__i386__) && !defined(__arch_um__)
928         printk("code at %08lx: ", regs->ip);
929         {
930                 int i;
931                 for (i = 0; i < 16; i++) {
932                         unsigned char insn;
933
934                         __get_user(insn, (unsigned char *)(regs->ip + i));
935                         printk("%02x ", insn);
936                 }
937         }
938 #endif
939         printk("\n");
940         preempt_disable();
941         show_regs(regs);
942         preempt_enable();
943 }
944
945 static int __init setup_print_fatal_signals(char *str)
946 {
947         get_option (&str, &print_fatal_signals);
948
949         return 1;
950 }
951
952 __setup("print-fatal-signals=", setup_print_fatal_signals);
953
954 int
955 __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
956 {
957         return send_signal(sig, info, p, 1);
958 }
959
960 static int
961 specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
962 {
963         return send_signal(sig, info, t, 0);
964 }
965
966 /*
967  * Force a signal that the process can't ignore: if necessary
968  * we unblock the signal and change any SIG_IGN to SIG_DFL.
969  *
970  * Note: If we unblock the signal, we always reset it to SIG_DFL,
971  * since we do not want to have a signal handler that was blocked
972  * be invoked when user space had explicitly blocked it.
973  *
974  * We don't want to have recursive SIGSEGV's etc, for example,
975  * that is why we also clear SIGNAL_UNKILLABLE.
976  */
977 int
978 force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
979 {
980         unsigned long int flags;
981         int ret, blocked, ignored;
982         struct k_sigaction *action;
983
984         spin_lock_irqsave(&t->sighand->siglock, flags);
985         action = &t->sighand->action[sig-1];
986         ignored = action->sa.sa_handler == SIG_IGN;
987         blocked = sigismember(&t->blocked, sig);
988         if (blocked || ignored) {
989                 action->sa.sa_handler = SIG_DFL;
990                 if (blocked) {
991                         sigdelset(&t->blocked, sig);
992                         recalc_sigpending_and_wake(t);
993                 }
994         }
995         if (action->sa.sa_handler == SIG_DFL)
996                 t->signal->flags &= ~SIGNAL_UNKILLABLE;
997         ret = specific_send_sig_info(sig, info, t);
998         spin_unlock_irqrestore(&t->sighand->siglock, flags);
999
1000         return ret;
1001 }
1002
1003 void
1004 force_sig_specific(int sig, struct task_struct *t)
1005 {
1006         force_sig_info(sig, SEND_SIG_FORCED, t);
1007 }
1008
1009 /*
1010  * Nuke all other threads in the group.
1011  */
1012 void zap_other_threads(struct task_struct *p)
1013 {
1014         struct task_struct *t;
1015
1016         p->signal->group_stop_count = 0;
1017
1018         for (t = next_thread(p); t != p; t = next_thread(t)) {
1019                 /*
1020                  * Don't bother with already dead threads
1021                  */
1022                 if (t->exit_state)
1023                         continue;
1024
1025                 /* SIGKILL will be handled before any pending SIGSTOP */
1026                 sigaddset(&t->pending.signal, SIGKILL);
1027                 signal_wake_up(t, 1);
1028         }
1029 }
1030
1031 int __fatal_signal_pending(struct task_struct *tsk)
1032 {
1033         return sigismember(&tsk->pending.signal, SIGKILL);
1034 }
1035 EXPORT_SYMBOL(__fatal_signal_pending);
1036
1037 struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
1038 {
1039         struct sighand_struct *sighand;
1040
1041         rcu_read_lock();
1042         for (;;) {
1043                 sighand = rcu_dereference(tsk->sighand);
1044                 if (unlikely(sighand == NULL))
1045                         break;
1046
1047                 spin_lock_irqsave(&sighand->siglock, *flags);
1048                 if (likely(sighand == tsk->sighand))
1049                         break;
1050                 spin_unlock_irqrestore(&sighand->siglock, *flags);
1051         }
1052         rcu_read_unlock();
1053
1054         return sighand;
1055 }
1056
1057 /*
1058  * send signal info to all the members of a group
1059  * - the caller must hold the RCU read lock at least
1060  */
1061 int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1062 {
1063         unsigned long flags;
1064         int ret;
1065
1066         ret = check_kill_permission(sig, info, p);
1067
1068         if (!ret && sig) {
1069                 ret = -ESRCH;
1070                 if (lock_task_sighand(p, &flags)) {
1071                         ret = __group_send_sig_info(sig, info, p);
1072                         unlock_task_sighand(p, &flags);
1073                 }
1074         }
1075
1076         return ret;
1077 }
1078
1079 /*
1080  * __kill_pgrp_info() sends a signal to a process group: this is what the tty
1081  * control characters do (^C, ^Z etc)
1082  * - the caller must hold at least a readlock on tasklist_lock
1083  */
1084 int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
1085 {
1086         struct task_struct *p = NULL;
1087         int retval, success;
1088
1089         success = 0;
1090         retval = -ESRCH;
1091         do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
1092                 int err = group_send_sig_info(sig, info, p);
1093                 success |= !err;
1094                 retval = err;
1095         } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
1096         return success ? 0 : retval;
1097 }
1098
1099 int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
1100 {
1101         int error = -ESRCH;
1102         struct task_struct *p;
1103
1104         rcu_read_lock();
1105 retry:
1106         p = pid_task(pid, PIDTYPE_PID);
1107         if (p) {
1108                 error = group_send_sig_info(sig, info, p);
1109                 if (unlikely(error == -ESRCH))
1110                         /*
1111                          * The task was unhashed in between, try again.
1112                          * If it is dead, pid_task() will return NULL,
1113                          * if we race with de_thread() it will find the
1114                          * new leader.
1115                          */
1116                         goto retry;
1117         }
1118         rcu_read_unlock();
1119
1120         return error;
1121 }
1122
1123 int
1124 kill_proc_info(int sig, struct siginfo *info, pid_t pid)
1125 {
1126         int error;
1127         rcu_read_lock();
1128         error = kill_pid_info(sig, info, find_vpid(pid));
1129         rcu_read_unlock();
1130         return error;
1131 }
1132
1133 /* like kill_pid_info(), but doesn't use uid/euid of "current" */
1134 int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
1135                       uid_t uid, uid_t euid, u32 secid)
1136 {
1137         int ret = -EINVAL;
1138         struct task_struct *p;
1139         const struct cred *pcred;
1140
1141         if (!valid_signal(sig))
1142                 return ret;
1143
1144         read_lock(&tasklist_lock);
1145         p = pid_task(pid, PIDTYPE_PID);
1146         if (!p) {
1147                 ret = -ESRCH;
1148                 goto out_unlock;
1149         }
1150         pcred = __task_cred(p);
1151         if ((info == SEND_SIG_NOINFO ||
1152              (!is_si_special(info) && SI_FROMUSER(info))) &&
1153             euid != pcred->suid && euid != pcred->uid &&
1154             uid  != pcred->suid && uid  != pcred->uid) {
1155                 ret = -EPERM;
1156                 goto out_unlock;
1157         }
1158         ret = security_task_kill(p, info, sig, secid);
1159         if (ret)
1160                 goto out_unlock;
1161         if (sig && p->sighand) {
1162                 unsigned long flags;
1163                 spin_lock_irqsave(&p->sighand->siglock, flags);
1164                 ret = __send_signal(sig, info, p, 1, 0);
1165                 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1166         }
1167 out_unlock:
1168         read_unlock(&tasklist_lock);
1169         return ret;
1170 }
1171 EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
1172
1173 /*
1174  * kill_something_info() interprets pid in interesting ways just like kill(2).
1175  *
1176  * POSIX specifies that kill(-1,sig) is unspecified, but what we have
1177  * is probably wrong.  Should make it like BSD or SYSV.
1178  */
1179
1180 static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
1181 {
1182         int ret;
1183
1184         if (pid > 0) {
1185                 rcu_read_lock();
1186                 ret = kill_pid_info(sig, info, find_vpid(pid));
1187                 rcu_read_unlock();
1188                 return ret;
1189         }
1190
1191         read_lock(&tasklist_lock);
1192         if (pid != -1) {
1193                 ret = __kill_pgrp_info(sig, info,
1194                                 pid ? find_vpid(-pid) : task_pgrp(current));
1195         } else {
1196                 int retval = 0, count = 0;
1197                 struct task_struct * p;
1198
1199                 for_each_process(p) {
1200                         if (task_pid_vnr(p) > 1 &&
1201                                         !same_thread_group(p, current)) {
1202                                 int err = group_send_sig_info(sig, info, p);
1203                                 ++count;
1204                                 if (err != -EPERM)
1205                                         retval = err;
1206                         }
1207                 }
1208                 ret = count ? retval : -ESRCH;
1209         }
1210         read_unlock(&tasklist_lock);
1211
1212         return ret;
1213 }
1214
1215 /*
1216  * These are for backward compatibility with the rest of the kernel source.
1217  */
1218
1219 /*
1220  * The caller must ensure the task can't exit.
1221  */
1222 int
1223 send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1224 {
1225         int ret;
1226         unsigned long flags;
1227
1228         /*
1229          * Make sure legacy kernel users don't send in bad values
1230          * (normal paths check this in check_kill_permission).
1231          */
1232         if (!valid_signal(sig))
1233                 return -EINVAL;
1234
1235         spin_lock_irqsave(&p->sighand->siglock, flags);
1236         ret = specific_send_sig_info(sig, info, p);
1237         spin_unlock_irqrestore(&p->sighand->siglock, flags);
1238         return ret;
1239 }
1240
1241 #define __si_special(priv) \
1242         ((priv) ? SEND_SIG_PRIV : SEND_SIG_NOINFO)
1243
1244 int
1245 send_sig(int sig, struct task_struct *p, int priv)
1246 {
1247         return send_sig_info(sig, __si_special(priv), p);
1248 }
1249
1250 void
1251 force_sig(int sig, struct task_struct *p)
1252 {
1253         force_sig_info(sig, SEND_SIG_PRIV, p);
1254 }
1255
1256 /*
1257  * When things go south during signal handling, we
1258  * will force a SIGSEGV. And if the signal that caused
1259  * the problem was already a SIGSEGV, we'll want to
1260  * make sure we don't even try to deliver the signal..
1261  */
1262 int
1263 force_sigsegv(int sig, struct task_struct *p)
1264 {
1265         if (sig == SIGSEGV) {
1266                 unsigned long flags;
1267                 spin_lock_irqsave(&p->sighand->siglock, flags);
1268                 p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
1269                 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1270         }
1271         force_sig(SIGSEGV, p);
1272         return 0;
1273 }
1274
1275 int kill_pgrp(struct pid *pid, int sig, int priv)
1276 {
1277         int ret;
1278
1279         read_lock(&tasklist_lock);
1280         ret = __kill_pgrp_info(sig, __si_special(priv), pid);
1281         read_unlock(&tasklist_lock);
1282
1283         return ret;
1284 }
1285 EXPORT_SYMBOL(kill_pgrp);
1286
1287 int kill_pid(struct pid *pid, int sig, int priv)
1288 {
1289         return kill_pid_info(sig, __si_special(priv), pid);
1290 }
1291 EXPORT_SYMBOL(kill_pid);
1292
1293 /*
1294  * These functions support sending signals using preallocated sigqueue
1295  * structures.  This is needed "because realtime applications cannot
1296  * afford to lose notifications of asynchronous events, like timer
1297  * expirations or I/O completions".  In the case of Posix Timers 
1298  * we allocate the sigqueue structure from the timer_create.  If this
1299  * allocation fails we are able to report the failure to the application
1300  * with an EAGAIN error.
1301  */
1302  
1303 struct sigqueue *sigqueue_alloc(void)
1304 {
1305         struct sigqueue *q;
1306
1307         if ((q = __sigqueue_alloc(current, GFP_KERNEL, 0)))
1308                 q->flags |= SIGQUEUE_PREALLOC;
1309         return(q);
1310 }
1311
1312 void sigqueue_free(struct sigqueue *q)
1313 {
1314         unsigned long flags;
1315         spinlock_t *lock = &current->sighand->siglock;
1316
1317         BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1318         /*
1319          * We must hold ->siglock while testing q->list
1320          * to serialize with collect_signal() or with
1321          * __exit_signal()->flush_sigqueue().
1322          */
1323         spin_lock_irqsave(lock, flags);
1324         q->flags &= ~SIGQUEUE_PREALLOC;
1325         /*
1326          * If it is queued it will be freed when dequeued,
1327          * like the "regular" sigqueue.
1328          */
1329         if (!list_empty(&q->list))
1330                 q = NULL;
1331         spin_unlock_irqrestore(lock, flags);
1332
1333         if (q)
1334                 __sigqueue_free(q);
1335 }
1336
1337 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1338 {
1339         int sig = q->info.si_signo;
1340         struct sigpending *pending;
1341         unsigned long flags;
1342         int ret;
1343
1344         BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1345
1346         ret = -1;
1347         if (!likely(lock_task_sighand(t, &flags)))
1348                 goto ret;
1349
1350         ret = 1; /* the signal is ignored */
1351         if (!prepare_signal(sig, t, 0))
1352                 goto out;
1353
1354         ret = 0;
1355         if (unlikely(!list_empty(&q->list))) {
1356                 /*
1357                  * If an SI_TIMER entry is already queue just increment
1358                  * the overrun count.
1359                  */
1360                 BUG_ON(q->info.si_code != SI_TIMER);
1361                 q->info.si_overrun++;
1362                 goto out;
1363         }
1364         q->info.si_overrun = 0;
1365
1366         signalfd_notify(t, sig);
1367         pending = group ? &t->signal->shared_pending : &t->pending;
1368         list_add_tail(&q->list, &pending->list);
1369         sigaddset(&pending->signal, sig);
1370         complete_signal(sig, t, group);
1371 out:
1372         unlock_task_sighand(t, &flags);
1373 ret:
1374         return ret;
1375 }
1376
1377 /*
1378  * Wake up any threads in the parent blocked in wait* syscalls.
1379  */
1380 static inline void __wake_up_parent(struct task_struct *p,
1381                                     struct task_struct *parent)
1382 {
1383         wake_up_interruptible_sync(&parent->signal->wait_chldexit);
1384 }
1385
1386 /*
1387  * Let a parent know about the death of a child.
1388  * For a stopped/continued status change, use do_notify_parent_cldstop instead.
1389  *
1390  * Returns -1 if our parent ignored us and so we've switched to
1391  * self-reaping, or else @sig.
1392  */
1393 int do_notify_parent(struct task_struct *tsk, int sig)
1394 {
1395         struct siginfo info;
1396         unsigned long flags;
1397         struct sighand_struct *psig;
1398         int ret = sig;
1399
1400         BUG_ON(sig == -1);
1401
1402         /* do_notify_parent_cldstop should have been called instead.  */
1403         BUG_ON(task_is_stopped_or_traced(tsk));
1404
1405         BUG_ON(!tsk->ptrace &&
1406                (tsk->group_leader != tsk || !thread_group_empty(tsk)));
1407
1408         info.si_signo = sig;
1409         info.si_errno = 0;
1410         /*
1411          * we are under tasklist_lock here so our parent is tied to
1412          * us and cannot exit and release its namespace.
1413          *
1414          * the only it can is to switch its nsproxy with sys_unshare,
1415          * bu uncharing pid namespaces is not allowed, so we'll always
1416          * see relevant namespace
1417          *
1418          * write_lock() currently calls preempt_disable() which is the
1419          * same as rcu_read_lock(), but according to Oleg, this is not
1420          * correct to rely on this
1421          */
1422         rcu_read_lock();
1423         info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
1424         info.si_uid = __task_cred(tsk)->uid;
1425         rcu_read_unlock();
1426
1427         info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
1428                                 tsk->signal->utime));
1429         info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
1430                                 tsk->signal->stime));
1431
1432         info.si_status = tsk->exit_code & 0x7f;
1433         if (tsk->exit_code & 0x80)
1434                 info.si_code = CLD_DUMPED;
1435         else if (tsk->exit_code & 0x7f)
1436                 info.si_code = CLD_KILLED;
1437         else {
1438                 info.si_code = CLD_EXITED;
1439                 info.si_status = tsk->exit_code >> 8;
1440         }
1441
1442         psig = tsk->parent->sighand;
1443         spin_lock_irqsave(&psig->siglock, flags);
1444         if (!tsk->ptrace && sig == SIGCHLD &&
1445             (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
1446              (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) {
1447                 /*
1448                  * We are exiting and our parent doesn't care.  POSIX.1
1449                  * defines special semantics for setting SIGCHLD to SIG_IGN
1450                  * or setting the SA_NOCLDWAIT flag: we should be reaped
1451                  * automatically and not left for our parent's wait4 call.
1452                  * Rather than having the parent do it as a magic kind of
1453                  * signal handler, we just set this to tell do_exit that we
1454                  * can be cleaned up without becoming a zombie.  Note that
1455                  * we still call __wake_up_parent in this case, because a
1456                  * blocked sys_wait4 might now return -ECHILD.
1457                  *
1458                  * Whether we send SIGCHLD or not for SA_NOCLDWAIT
1459                  * is implementation-defined: we do (if you don't want
1460                  * it, just use SIG_IGN instead).
1461                  */
1462                 ret = tsk->exit_signal = -1;
1463                 if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
1464                         sig = -1;
1465         }
1466         if (valid_signal(sig) && sig > 0)
1467                 __group_send_sig_info(sig, &info, tsk->parent);
1468         __wake_up_parent(tsk, tsk->parent);
1469         spin_unlock_irqrestore(&psig->siglock, flags);
1470
1471         return ret;
1472 }
1473
1474 static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
1475 {
1476         struct siginfo info;
1477         unsigned long flags;
1478         struct task_struct *parent;
1479         struct sighand_struct *sighand;
1480
1481         if (tsk->ptrace & PT_PTRACED)
1482                 parent = tsk->parent;
1483         else {
1484                 tsk = tsk->group_leader;
1485                 parent = tsk->real_parent;
1486         }
1487
1488         info.si_signo = SIGCHLD;
1489         info.si_errno = 0;
1490         /*
1491          * see comment in do_notify_parent() abot the following 3 lines
1492          */
1493         rcu_read_lock();
1494         info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
1495         info.si_uid = __task_cred(tsk)->uid;
1496         rcu_read_unlock();
1497
1498         info.si_utime = cputime_to_clock_t(tsk->utime);
1499         info.si_stime = cputime_to_clock_t(tsk->stime);
1500
1501         info.si_code = why;
1502         switch (why) {
1503         case CLD_CONTINUED:
1504                 info.si_status = SIGCONT;
1505                 break;
1506         case CLD_STOPPED:
1507                 info.si_status = tsk->signal->group_exit_code & 0x7f;
1508                 break;
1509         case CLD_TRAPPED:
1510                 info.si_status = tsk->exit_code & 0x7f;
1511                 break;
1512         default:
1513                 BUG();
1514         }
1515
1516         sighand = parent->sighand;
1517         spin_lock_irqsave(&sighand->siglock, flags);
1518         if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
1519             !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
1520                 __group_send_sig_info(SIGCHLD, &info, parent);
1521         /*
1522          * Even if SIGCHLD is not generated, we must wake up wait4 calls.
1523          */
1524         __wake_up_parent(tsk, parent);
1525         spin_unlock_irqrestore(&sighand->siglock, flags);
1526 }
1527
1528 static inline int may_ptrace_stop(void)
1529 {
1530         if (!likely(current->ptrace & PT_PTRACED))
1531                 return 0;
1532         /*
1533          * Are we in the middle of do_coredump?
1534          * If so and our tracer is also part of the coredump stopping
1535          * is a deadlock situation, and pointless because our tracer
1536          * is dead so don't allow us to stop.
1537          * If SIGKILL was already sent before the caller unlocked
1538          * ->siglock we must see ->core_state != NULL. Otherwise it
1539          * is safe to enter schedule().
1540          */
1541         if (unlikely(current->mm->core_state) &&
1542             unlikely(current->mm == current->parent->mm))
1543                 return 0;
1544
1545         return 1;
1546 }
1547
1548 /*
1549  * Return nonzero if there is a SIGKILL that should be waking us up.
1550  * Called with the siglock held.
1551  */
1552 static int sigkill_pending(struct task_struct *tsk)
1553 {
1554         return  sigismember(&tsk->pending.signal, SIGKILL) ||
1555                 sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
1556 }
1557
1558 /*
1559  * This must be called with current->sighand->siglock held.
1560  *
1561  * This should be the path for all ptrace stops.
1562  * We always set current->last_siginfo while stopped here.
1563  * That makes it a way to test a stopped process for
1564  * being ptrace-stopped vs being job-control-stopped.
1565  *
1566  * If we actually decide not to stop at all because the tracer
1567  * is gone, we keep current->exit_code unless clear_code.
1568  */
1569 static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
1570 {
1571         if (arch_ptrace_stop_needed(exit_code, info)) {
1572                 /*
1573                  * The arch code has something special to do before a
1574                  * ptrace stop.  This is allowed to block, e.g. for faults
1575                  * on user stack pages.  We can't keep the siglock while
1576                  * calling arch_ptrace_stop, so we must release it now.
1577                  * To preserve proper semantics, we must do this before
1578                  * any signal bookkeeping like checking group_stop_count.
1579                  * Meanwhile, a SIGKILL could come in before we retake the
1580                  * siglock.  That must prevent us from sleeping in TASK_TRACED.
1581                  * So after regaining the lock, we must check for SIGKILL.
1582                  */
1583                 spin_unlock_irq(&current->sighand->siglock);
1584                 arch_ptrace_stop(exit_code, info);
1585                 spin_lock_irq(&current->sighand->siglock);
1586                 if (sigkill_pending(current))
1587                         return;
1588         }
1589
1590         /*
1591          * If there is a group stop in progress,
1592          * we must participate in the bookkeeping.
1593          */
1594         if (current->signal->group_stop_count > 0)
1595                 --current->signal->group_stop_count;
1596
1597         current->last_siginfo = info;
1598         current->exit_code = exit_code;
1599
1600         /* Let the debugger run.  */
1601         __set_current_state(TASK_TRACED);
1602         spin_unlock_irq(&current->sighand->siglock);
1603         read_lock(&tasklist_lock);
1604         if (may_ptrace_stop()) {
1605                 do_notify_parent_cldstop(current, CLD_TRAPPED);
1606                 /*
1607                  * Don't want to allow preemption here, because
1608                  * sys_ptrace() needs this task to be inactive.
1609                  *
1610                  * XXX: implement read_unlock_no_resched().
1611                  */
1612                 preempt_disable();
1613                 read_unlock(&tasklist_lock);
1614                 preempt_enable_no_resched();
1615                 schedule();
1616         } else {
1617                 /*
1618                  * By the time we got the lock, our tracer went away.
1619                  * Don't drop the lock yet, another tracer may come.
1620                  */
1621                 __set_current_state(TASK_RUNNING);
1622                 if (clear_code)
1623                         current->exit_code = 0;
1624                 read_unlock(&tasklist_lock);
1625         }
1626
1627         /*
1628          * While in TASK_TRACED, we were considered "frozen enough".
1629          * Now that we woke up, it's crucial if we're supposed to be
1630          * frozen that we freeze now before running anything substantial.
1631          */
1632         try_to_freeze();
1633
1634         /*
1635          * We are back.  Now reacquire the siglock before touching
1636          * last_siginfo, so that we are sure to have synchronized with
1637          * any signal-sending on another CPU that wants to examine it.
1638          */
1639         spin_lock_irq(&current->sighand->siglock);
1640         current->last_siginfo = NULL;
1641
1642         /*
1643          * Queued signals ignored us while we were stopped for tracing.
1644          * So check for any that we should take before resuming user mode.
1645          * This sets TIF_SIGPENDING, but never clears it.
1646          */
1647         recalc_sigpending_tsk(current);
1648 }
1649
1650 void ptrace_notify(int exit_code)
1651 {
1652         siginfo_t info;
1653
1654         BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
1655
1656         memset(&info, 0, sizeof info);
1657         info.si_signo = SIGTRAP;
1658         info.si_code = exit_code;
1659         info.si_pid = task_pid_vnr(current);
1660         info.si_uid = current_uid();
1661
1662         /* Let the debugger run.  */
1663         spin_lock_irq(&current->sighand->siglock);
1664         ptrace_stop(exit_code, 1, &info);
1665         spin_unlock_irq(&current->sighand->siglock);
1666 }
1667
1668 static void
1669 finish_stop(int stop_count)
1670 {
1671         /*
1672          * If there are no other threads in the group, or if there is
1673          * a group stop in progress and we are the last to stop,
1674          * report to the parent.  When ptraced, every thread reports itself.
1675          */
1676         if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) {
1677                 read_lock(&tasklist_lock);
1678                 do_notify_parent_cldstop(current, CLD_STOPPED);
1679                 read_unlock(&tasklist_lock);
1680         }
1681
1682         do {
1683                 schedule();
1684         } while (try_to_freeze());
1685         /*
1686          * Now we don't run again until continued.
1687          */
1688         current->exit_code = 0;
1689 }
1690
1691 /*
1692  * This performs the stopping for SIGSTOP and other stop signals.
1693  * We have to stop all threads in the thread group.
1694  * Returns nonzero if we've actually stopped and released the siglock.
1695  * Returns zero if we didn't stop and still hold the siglock.
1696  */
1697 static int do_signal_stop(int signr)
1698 {
1699         struct signal_struct *sig = current->signal;
1700         int stop_count;
1701
1702         if (sig->group_stop_count > 0) {
1703                 /*
1704                  * There is a group stop in progress.  We don't need to
1705                  * start another one.
1706                  */
1707                 stop_count = --sig->group_stop_count;
1708         } else {
1709                 struct task_struct *t;
1710
1711                 if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
1712                     unlikely(signal_group_exit(sig)))
1713                         return 0;
1714                 /*
1715                  * There is no group stop already in progress.
1716                  * We must initiate one now.
1717                  */
1718                 sig->group_exit_code = signr;
1719
1720                 stop_count = 0;
1721                 for (t = next_thread(current); t != current; t = next_thread(t))
1722                         /*
1723                          * Setting state to TASK_STOPPED for a group
1724                          * stop is always done with the siglock held,
1725                          * so this check has no races.
1726                          */
1727                         if (!(t->flags & PF_EXITING) &&
1728                             !task_is_stopped_or_traced(t)) {
1729                                 stop_count++;
1730                                 signal_wake_up(t, 0);
1731                         }
1732                 sig->group_stop_count = stop_count;
1733         }
1734
1735         if (stop_count == 0)
1736                 sig->flags = SIGNAL_STOP_STOPPED;
1737         current->exit_code = sig->group_exit_code;
1738         __set_current_state(TASK_STOPPED);
1739
1740         spin_unlock_irq(&current->sighand->siglock);
1741         finish_stop(stop_count);
1742         return 1;
1743 }
1744
1745 static int ptrace_signal(int signr, siginfo_t *info,
1746                          struct pt_regs *regs, void *cookie)
1747 {
1748         if (!(current->ptrace & PT_PTRACED))
1749                 return signr;
1750
1751         ptrace_signal_deliver(regs, cookie);
1752
1753         /* Let the debugger run.  */
1754         ptrace_stop(signr, 0, info);
1755
1756         /* We're back.  Did the debugger cancel the sig?  */
1757         signr = current->exit_code;
1758         if (signr == 0)
1759                 return signr;
1760
1761         current->exit_code = 0;
1762
1763         /* Update the siginfo structure if the signal has
1764            changed.  If the debugger wanted something
1765            specific in the siginfo structure then it should
1766            have updated *info via PTRACE_SETSIGINFO.  */
1767         if (signr != info->si_signo) {
1768                 info->si_signo = signr;
1769                 info->si_errno = 0;
1770                 info->si_code = SI_USER;
1771                 info->si_pid = task_pid_vnr(current->parent);
1772                 info->si_uid = task_uid(current->parent);
1773         }
1774
1775         /* If the (new) signal is now blocked, requeue it.  */
1776         if (sigismember(&current->blocked, signr)) {
1777                 specific_send_sig_info(signr, info, current);
1778                 signr = 0;
1779         }
1780
1781         return signr;
1782 }
1783
1784 int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
1785                           struct pt_regs *regs, void *cookie)
1786 {
1787         struct sighand_struct *sighand = current->sighand;
1788         struct signal_struct *signal = current->signal;
1789         int signr;
1790
1791 relock:
1792         /*
1793          * We'll jump back here after any time we were stopped in TASK_STOPPED.
1794          * While in TASK_STOPPED, we were considered "frozen enough".
1795          * Now that we woke up, it's crucial if we're supposed to be
1796          * frozen that we freeze now before running anything substantial.
1797          */
1798         try_to_freeze();
1799
1800         spin_lock_irq(&sighand->siglock);
1801         /*
1802          * Every stopped thread goes here after wakeup. Check to see if
1803          * we should notify the parent, prepare_signal(SIGCONT) encodes
1804          * the CLD_ si_code into SIGNAL_CLD_MASK bits.
1805          */
1806         if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
1807                 int why = (signal->flags & SIGNAL_STOP_CONTINUED)
1808                                 ? CLD_CONTINUED : CLD_STOPPED;
1809                 signal->flags &= ~SIGNAL_CLD_MASK;
1810                 spin_unlock_irq(&sighand->siglock);
1811
1812                 if (unlikely(!tracehook_notify_jctl(1, why)))
1813                         goto relock;
1814
1815                 read_lock(&tasklist_lock);
1816                 do_notify_parent_cldstop(current->group_leader, why);
1817                 read_unlock(&tasklist_lock);
1818                 goto relock;
1819         }
1820
1821         for (;;) {
1822                 struct k_sigaction *ka;
1823
1824                 if (unlikely(signal->group_stop_count > 0) &&
1825                     do_signal_stop(0))
1826                         goto relock;
1827
1828                 /*
1829                  * Tracing can induce an artifical signal and choose sigaction.
1830                  * The return value in @signr determines the default action,
1831                  * but @info->si_signo is the signal number we will report.
1832                  */
1833                 signr = tracehook_get_signal(current, regs, info, return_ka);
1834                 if (unlikely(signr < 0))
1835                         goto relock;
1836                 if (unlikely(signr != 0))
1837                         ka = return_ka;
1838                 else {
1839                         signr = dequeue_signal(current, &current->blocked,
1840                                                info);
1841
1842                         if (!signr)
1843                                 break; /* will return 0 */
1844
1845                         if (signr != SIGKILL) {
1846                                 signr = ptrace_signal(signr, info,
1847                                                       regs, cookie);
1848                                 if (!signr)
1849                                         continue;
1850                         }
1851
1852                         ka = &sighand->action[signr-1];
1853                 }
1854
1855                 if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
1856                         continue;
1857                 if (ka->sa.sa_handler != SIG_DFL) {
1858                         /* Run the handler.  */
1859                         *return_ka = *ka;
1860
1861                         if (ka->sa.sa_flags & SA_ONESHOT)
1862                                 ka->sa.sa_handler = SIG_DFL;
1863
1864                         break; /* will return non-zero "signr" value */
1865                 }
1866
1867                 /*
1868                  * Now we are doing the default action for this signal.
1869                  */
1870                 if (sig_kernel_ignore(signr)) /* Default is nothing. */
1871                         continue;
1872
1873                 /*
1874                  * Global init gets no signals it doesn't want.
1875                  * Container-init gets no signals it doesn't want from same
1876                  * container.
1877                  *
1878                  * Note that if global/container-init sees a sig_kernel_only()
1879                  * signal here, the signal must have been generated internally
1880                  * or must have come from an ancestor namespace. In either
1881                  * case, the signal cannot be dropped.
1882                  */
1883                 if (unlikely(signal->flags & SIGNAL_UNKILLABLE) &&
1884                                 !sig_kernel_only(signr))
1885                         continue;
1886
1887                 if (sig_kernel_stop(signr)) {
1888                         /*
1889                          * The default action is to stop all threads in
1890                          * the thread group.  The job control signals
1891                          * do nothing in an orphaned pgrp, but SIGSTOP
1892                          * always works.  Note that siglock needs to be
1893                          * dropped during the call to is_orphaned_pgrp()
1894                          * because of lock ordering with tasklist_lock.
1895                          * This allows an intervening SIGCONT to be posted.
1896                          * We need to check for that and bail out if necessary.
1897                          */
1898                         if (signr != SIGSTOP) {
1899                                 spin_unlock_irq(&sighand->siglock);
1900
1901                                 /* signals can be posted during this window */
1902
1903                                 if (is_current_pgrp_orphaned())
1904                                         goto relock;
1905
1906                                 spin_lock_irq(&sighand->siglock);
1907                         }
1908
1909                         if (likely(do_signal_stop(info->si_signo))) {
1910                                 /* It released the siglock.  */
1911                                 goto relock;
1912                         }
1913
1914                         /*
1915                          * We didn't actually stop, due to a race
1916                          * with SIGCONT or something like that.
1917                          */
1918                         continue;
1919                 }
1920
1921                 spin_unlock_irq(&sighand->siglock);
1922
1923                 /*
1924                  * Anything else is fatal, maybe with a core dump.
1925                  */
1926                 current->flags |= PF_SIGNALED;
1927
1928                 if (sig_kernel_coredump(signr)) {
1929                         if (print_fatal_signals)
1930                                 print_fatal_signal(regs, info->si_signo);
1931                         /*
1932                          * If it was able to dump core, this kills all
1933                          * other threads in the group and synchronizes with
1934                          * their demise.  If we lost the race with another
1935                          * thread getting here, it set group_exit_code
1936                          * first and our do_group_exit call below will use
1937                          * that value and ignore the one we pass it.
1938                          */
1939                         do_coredump(info->si_signo, info->si_signo, regs);
1940                 }
1941
1942                 /*
1943                  * Death signals, no core dump.
1944                  */
1945                 do_group_exit(info->si_signo);
1946                 /* NOTREACHED */
1947         }
1948         spin_unlock_irq(&sighand->siglock);
1949         return signr;
1950 }
1951
1952 void exit_signals(struct task_struct *tsk)
1953 {
1954         int group_stop = 0;
1955         struct task_struct *t;
1956
1957         if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
1958                 tsk->flags |= PF_EXITING;
1959                 return;
1960         }
1961
1962         spin_lock_irq(&tsk->sighand->siglock);
1963         /*
1964          * From now this task is not visible for group-wide signals,
1965          * see wants_signal(), do_signal_stop().
1966          */
1967         tsk->flags |= PF_EXITING;
1968         if (!signal_pending(tsk))
1969                 goto out;
1970
1971         /* It could be that __group_complete_signal() choose us to
1972          * notify about group-wide signal. Another thread should be
1973          * woken now to take the signal since we will not.
1974          */
1975         for (t = tsk; (t = next_thread(t)) != tsk; )
1976                 if (!signal_pending(t) && !(t->flags & PF_EXITING))
1977                         recalc_sigpending_and_wake(t);
1978
1979         if (unlikely(tsk->signal->group_stop_count) &&
1980                         !--tsk->signal->group_stop_count) {
1981                 tsk->signal->flags = SIGNAL_STOP_STOPPED;
1982                 group_stop = 1;
1983         }
1984 out:
1985         spin_unlock_irq(&tsk->sighand->siglock);
1986
1987         if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) {
1988                 read_lock(&tasklist_lock);
1989                 do_notify_parent_cldstop(tsk, CLD_STOPPED);
1990                 read_unlock(&tasklist_lock);
1991         }
1992 }
1993
1994 EXPORT_SYMBOL(recalc_sigpending);
1995 EXPORT_SYMBOL_GPL(dequeue_signal);
1996 EXPORT_SYMBOL(flush_signals);
1997 EXPORT_SYMBOL(force_sig);
1998 EXPORT_SYMBOL(send_sig);
1999 EXPORT_SYMBOL(send_sig_info);
2000 EXPORT_SYMBOL(sigprocmask);
2001 EXPORT_SYMBOL(block_all_signals);
2002 EXPORT_SYMBOL(unblock_all_signals);
2003
2004
2005 /*
2006  * System call entry points.
2007  */
2008
2009 SYSCALL_DEFINE0(restart_syscall)
2010 {
2011         struct restart_block *restart = &current_thread_info()->restart_block;
2012         return restart->fn(restart);
2013 }
2014
2015 long do_no_restart_syscall(struct restart_block *param)
2016 {
2017         return -EINTR;
2018 }
2019
2020 /*
2021  * We don't need to get the kernel lock - this is all local to this
2022  * particular thread.. (and that's good, because this is _heavily_
2023  * used by various programs)
2024  */
2025
2026 /*
2027  * This is also useful for kernel threads that want to temporarily
2028  * (or permanently) block certain signals.
2029  *
2030  * NOTE! Unlike the user-mode sys_sigprocmask(), the kernel
2031  * interface happily blocks "unblockable" signals like SIGKILL
2032  * and friends.
2033  */
2034 int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
2035 {
2036         int error;
2037
2038         spin_lock_irq(&current->sighand->siglock);
2039         if (oldset)
2040                 *oldset = current->blocked;
2041
2042         error = 0;
2043         switch (how) {
2044         case SIG_BLOCK:
2045                 sigorsets(&current->blocked, &current->blocked, set);
2046                 break;
2047         case SIG_UNBLOCK:
2048                 signandsets(&current->blocked, &current->blocked, set);
2049                 break;
2050         case SIG_SETMASK:
2051                 current->blocked = *set;
2052                 break;
2053         default:
2054                 error = -EINVAL;
2055         }
2056         recalc_sigpending();
2057         spin_unlock_irq(&current->sighand->siglock);
2058
2059         return error;
2060 }
2061
2062 SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set,
2063                 sigset_t __user *, oset, size_t, sigsetsize)
2064 {
2065         int error = -EINVAL;
2066         sigset_t old_set, new_set;
2067
2068         /* XXX: Don't preclude handling different sized sigset_t's.  */
2069         if (sigsetsize != sizeof(sigset_t))
2070                 goto out;
2071
2072         if (set) {
2073                 error = -EFAULT;
2074                 if (copy_from_user(&new_set, set, sizeof(*set)))
2075                         goto out;
2076                 sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
2077
2078                 error = sigprocmask(how, &new_set, &old_set);
2079                 if (error)
2080                         goto out;
2081                 if (oset)
2082                         goto set_old;
2083         } else if (oset) {
2084                 spin_lock_irq(&current->sighand->siglock);
2085                 old_set = current->blocked;
2086                 spin_unlock_irq(&current->sighand->siglock);
2087
2088         set_old:
2089                 error = -EFAULT;
2090                 if (copy_to_user(oset, &old_set, sizeof(*oset)))
2091                         goto out;
2092         }
2093         error = 0;
2094 out:
2095         return error;
2096 }
2097
2098 long do_sigpending(void __user *set, unsigned long sigsetsize)
2099 {
2100         long error = -EINVAL;
2101         sigset_t pending;
2102
2103         if (sigsetsize > sizeof(sigset_t))
2104                 goto out;
2105
2106         spin_lock_irq(&current->sighand->siglock);
2107         sigorsets(&pending, &current->pending.signal,
2108                   &current->signal->shared_pending.signal);
2109         spin_unlock_irq(&current->sighand->siglock);
2110
2111         /* Outside the lock because only this thread touches it.  */
2112         sigandsets(&pending, &current->blocked, &pending);
2113
2114         error = -EFAULT;
2115         if (!copy_to_user(set, &pending, sigsetsize))
2116                 error = 0;
2117
2118 out:
2119         return error;
2120 }       
2121
2122 SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize)
2123 {
2124         return do_sigpending(set, sigsetsize);
2125 }
2126
2127 #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER
2128
2129 int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
2130 {
2131         int err;
2132
2133         if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
2134                 return -EFAULT;
2135         if (from->si_code < 0)
2136                 return __copy_to_user(to, from, sizeof(siginfo_t))
2137                         ? -EFAULT : 0;
2138         /*
2139          * If you change siginfo_t structure, please be sure
2140          * this code is fixed accordingly.
2141          * Please remember to update the signalfd_copyinfo() function
2142          * inside fs/signalfd.c too, in case siginfo_t changes.
2143          * It should never copy any pad contained in the structure
2144          * to avoid security leaks, but must copy the generic
2145          * 3 ints plus the relevant union member.
2146          */
2147         err = __put_user(from->si_signo, &to->si_signo);
2148         err |= __put_user(from->si_errno, &to->si_errno);
2149         err |= __put_user((short)from->si_code, &to->si_code);
2150         switch (from->si_code & __SI_MASK) {
2151         case __SI_KILL:
2152                 err |= __put_user(from->si_pid, &to->si_pid);
2153                 err |= __put_user(from->si_uid, &to->si_uid);
2154                 break;
2155         case __SI_TIMER:
2156                  err |= __put_user(from->si_tid, &to->si_tid);
2157                  err |= __put_user(from->si_overrun, &to->si_overrun);
2158                  err |= __put_user(from->si_ptr, &to->si_ptr);
2159                 break;
2160         case __SI_POLL:
2161                 err |= __put_user(from->si_band, &to->si_band);
2162                 err |= __put_user(from->si_fd, &to->si_fd);
2163                 break;
2164         case __SI_FAULT:
2165                 err |= __put_user(from->si_addr, &to->si_addr);
2166 #ifdef __ARCH_SI_TRAPNO
2167                 err |= __put_user(from->si_trapno, &to->si_trapno);
2168 #endif
2169                 break;
2170         case __SI_CHLD:
2171                 err |= __put_user(from->si_pid, &to->si_pid);
2172                 err |= __put_user(from->si_uid, &to->si_uid);
2173                 err |= __put_user(from->si_status, &to->si_status);
2174                 err |= __put_user(from->si_utime, &to->si_utime);
2175                 err |= __put_user(from->si_stime, &to->si_stime);
2176                 break;
2177         case __SI_RT: /* This is not generated by the kernel as of now. */
2178         case __SI_MESGQ: /* But this is */
2179                 err |= __put_user(from->si_pid, &to->si_pid);
2180                 err |= __put_user(from->si_uid, &to->si_uid);
2181                 err |= __put_user(from->si_ptr, &to->si_ptr);
2182                 break;
2183         default: /* this is just in case for now ... */
2184                 err |= __put_user(from->si_pid, &to->si_pid);
2185                 err |= __put_user(from->si_uid, &to->si_uid);
2186                 break;
2187         }
2188         return err;
2189 }
2190
2191 #endif
2192
2193 SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
2194                 siginfo_t __user *, uinfo, const struct timespec __user *, uts,
2195                 size_t, sigsetsize)
2196 {
2197         int ret, sig;
2198         sigset_t these;
2199         struct timespec ts;
2200         siginfo_t info;
2201         long timeout = 0;
2202
2203         /* XXX: Don't preclude handling different sized sigset_t's.  */
2204         if (sigsetsize != sizeof(sigset_t))
2205                 return -EINVAL;
2206
2207         if (copy_from_user(&these, uthese, sizeof(these)))
2208                 return -EFAULT;
2209                 
2210         /*
2211          * Invert the set of allowed signals to get those we
2212          * want to block.
2213          */
2214         sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP));
2215         signotset(&these);
2216
2217         if (uts) {
2218                 if (copy_from_user(&ts, uts, sizeof(ts)))
2219                         return -EFAULT;
2220                 if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0
2221                     || ts.tv_sec < 0)
2222                         return -EINVAL;
2223         }
2224
2225         spin_lock_irq(&current->sighand->siglock);
2226         sig = dequeue_signal(current, &these, &info);
2227         if (!sig) {
2228                 timeout = MAX_SCHEDULE_TIMEOUT;
2229                 if (uts)
2230                         timeout = (timespec_to_jiffies(&ts)
2231                                    + (ts.tv_sec || ts.tv_nsec));
2232
2233                 if (timeout) {
2234                         /* None ready -- temporarily unblock those we're
2235                          * interested while we are sleeping in so that we'll
2236                          * be awakened when they arrive.  */
2237                         current->real_blocked = current->blocked;
2238                         sigandsets(&current->blocked, &current->blocked, &these);
2239                         recalc_sigpending();
2240                         spin_unlock_irq(&current->sighand->siglock);
2241
2242                         timeout = schedule_timeout_interruptible(timeout);
2243
2244                         spin_lock_irq(&current->sighand->siglock);
2245                         sig = dequeue_signal(current, &these, &info);
2246                         current->blocked = current->real_blocked;
2247                         siginitset(&current->real_blocked, 0);
2248                         recalc_sigpending();
2249                 }
2250         }
2251         spin_unlock_irq(&current->sighand->siglock);
2252
2253         if (sig) {
2254                 ret = sig;
2255                 if (uinfo) {
2256                         if (copy_siginfo_to_user(uinfo, &info))
2257                                 ret = -EFAULT;
2258                 }
2259         } else {
2260                 ret = -EAGAIN;
2261                 if (timeout)
2262                         ret = -EINTR;
2263         }
2264
2265         return ret;
2266 }
2267
2268 SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
2269 {
2270         struct siginfo info;
2271
2272         info.si_signo = sig;
2273         info.si_errno = 0;
2274         info.si_code = SI_USER;
2275         info.si_pid = task_tgid_vnr(current);
2276         info.si_uid = current_uid();
2277
2278         return kill_something_info(sig, &info, pid);
2279 }
2280
2281 static int do_tkill(pid_t tgid, pid_t pid, int sig)
2282 {
2283         int error;
2284         struct siginfo info;
2285         struct task_struct *p;
2286         unsigned long flags;
2287
2288         error = -ESRCH;
2289         info.si_signo = sig;
2290         info.si_errno = 0;
2291         info.si_code = SI_TKILL;
2292         info.si_pid = task_tgid_vnr(current);
2293         info.si_uid = current_uid();
2294
2295         rcu_read_lock();
2296         p = find_task_by_vpid(pid);
2297         if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
2298                 error = check_kill_permission(sig, &info, p);
2299                 /*
2300                  * The null signal is a permissions and process existence
2301                  * probe.  No signal is actually delivered.
2302                  *
2303                  * If lock_task_sighand() fails we pretend the task dies
2304                  * after receiving the signal. The window is tiny, and the
2305                  * signal is private anyway.
2306                  */
2307                 if (!error && sig && lock_task_sighand(p, &flags)) {
2308                         error = specific_send_sig_info(sig, &info, p);
2309                         unlock_task_sighand(p, &flags);
2310                 }
2311         }
2312         rcu_read_unlock();
2313
2314         return error;
2315 }
2316
2317 /**
2318  *  sys_tgkill - send signal to one specific thread
2319  *  @tgid: the thread group ID of the thread
2320  *  @pid: the PID of the thread
2321  *  @sig: signal to be sent
2322  *
2323  *  This syscall also checks the @tgid and returns -ESRCH even if the PID
2324  *  exists but it's not belonging to the target process anymore. This
2325  *  method solves the problem of threads exiting and PIDs getting reused.
2326  */
2327 SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig)
2328 {
2329         /* This is only valid for single tasks */
2330         if (pid <= 0 || tgid <= 0)
2331                 return -EINVAL;
2332
2333         return do_tkill(tgid, pid, sig);
2334 }
2335
2336 /*
2337  *  Send a signal to only one task, even if it's a CLONE_THREAD task.
2338  */
2339 SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
2340 {
2341         /* This is only valid for single tasks */
2342         if (pid <= 0)
2343                 return -EINVAL;
2344
2345         return do_tkill(0, pid, sig);
2346 }
2347
2348 SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
2349                 siginfo_t __user *, uinfo)
2350 {
2351         siginfo_t info;
2352
2353         if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
2354                 return -EFAULT;
2355
2356         /* Not even root can pretend to send signals from the kernel.
2357            Nor can they impersonate a kill(), which adds source info.  */
2358         if (info.si_code >= 0)
2359                 return -EPERM;
2360         info.si_signo = sig;
2361
2362         /* POSIX.1b doesn't mention process groups.  */
2363         return kill_proc_info(sig, &info, pid);
2364 }
2365
2366 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
2367 {
2368         struct task_struct *t = current;
2369         struct k_sigaction *k;
2370         sigset_t mask;
2371
2372         if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig)))
2373                 return -EINVAL;
2374
2375         k = &t->sighand->action[sig-1];
2376
2377         spin_lock_irq(&current->sighand->siglock);
2378         if (oact)
2379                 *oact = *k;
2380
2381         if (act) {
2382                 sigdelsetmask(&act->sa.sa_mask,
2383                               sigmask(SIGKILL) | sigmask(SIGSTOP));
2384                 *k = *act;
2385                 /*
2386                  * POSIX 3.3.1.3:
2387                  *  "Setting a signal action to SIG_IGN for a signal that is
2388                  *   pending shall cause the pending signal to be discarded,
2389                  *   whether or not it is blocked."
2390                  *
2391                  *  "Setting a signal action to SIG_DFL for a signal that is
2392                  *   pending and whose default action is to ignore the signal
2393                  *   (for example, SIGCHLD), shall cause the pending signal to
2394                  *   be discarded, whether or not it is blocked"
2395                  */
2396                 if (sig_handler_ignored(sig_handler(t, sig), sig)) {
2397                         sigemptyset(&mask);
2398                         sigaddset(&mask, sig);
2399                         rm_from_queue_full(&mask, &t->signal->shared_pending);
2400                         do {
2401                                 rm_from_queue_full(&mask, &t->pending);
2402                                 t = next_thread(t);
2403                         } while (t != current);
2404                 }
2405         }
2406
2407         spin_unlock_irq(&current->sighand->siglock);
2408         return 0;
2409 }
2410
2411 int 
2412 do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long sp)
2413 {
2414         stack_t oss;
2415         int error;
2416
2417         if (uoss) {
2418                 oss.ss_sp = (void __user *) current->sas_ss_sp;
2419                 oss.ss_size = current->sas_ss_size;
2420                 oss.ss_flags = sas_ss_flags(sp);
2421         }
2422
2423         if (uss) {
2424                 void __user *ss_sp;
2425                 size_t ss_size;
2426                 int ss_flags;
2427
2428                 error = -EFAULT;
2429                 if (!access_ok(VERIFY_READ, uss, sizeof(*uss))
2430                     || __get_user(ss_sp, &uss->ss_sp)
2431                     || __get_user(ss_flags, &uss->ss_flags)
2432                     || __get_user(ss_size, &uss->ss_size))
2433                         goto out;
2434
2435                 error = -EPERM;
2436                 if (on_sig_stack(sp))
2437                         goto out;
2438
2439                 error = -EINVAL;
2440                 /*
2441                  *
2442                  * Note - this code used to test ss_flags incorrectly
2443                  *        old code may have been written using ss_flags==0
2444                  *        to mean ss_flags==SS_ONSTACK (as this was the only
2445                  *        way that worked) - this fix preserves that older
2446                  *        mechanism
2447                  */
2448                 if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0)
2449                         goto out;
2450
2451                 if (ss_flags == SS_DISABLE) {
2452                         ss_size = 0;
2453                         ss_sp = NULL;
2454                 } else {
2455                         error = -ENOMEM;
2456                         if (ss_size < MINSIGSTKSZ)
2457                                 goto out;
2458                 }
2459
2460                 current->sas_ss_sp = (unsigned long) ss_sp;
2461                 current->sas_ss_size = ss_size;
2462         }
2463
2464         if (uoss) {
2465                 error = -EFAULT;
2466                 if (copy_to_user(uoss, &oss, sizeof(oss)))
2467                         goto out;
2468         }
2469
2470         error = 0;
2471 out:
2472         return error;
2473 }
2474
2475 #ifdef __ARCH_WANT_SYS_SIGPENDING
2476
2477 SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
2478 {
2479         return do_sigpending(set, sizeof(*set));
2480 }
2481
2482 #endif
2483
2484 #ifdef __ARCH_WANT_SYS_SIGPROCMASK
2485 /* Some platforms have their own version with special arguments others
2486    support only sys_rt_sigprocmask.  */
2487
2488 SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set,
2489                 old_sigset_t __user *, oset)
2490 {
2491         int error;
2492         old_sigset_t old_set, new_set;
2493
2494         if (set) {
2495                 error = -EFAULT;
2496                 if (copy_from_user(&new_set, set, sizeof(*set)))
2497                         goto out;
2498                 new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP));
2499
2500                 spin_lock_irq(&current->sighand->siglock);
2501                 old_set = current->blocked.sig[0];
2502
2503                 error = 0;
2504                 switch (how) {
2505                 default:
2506                         error = -EINVAL;
2507                         break;
2508                 case SIG_BLOCK:
2509                         sigaddsetmask(&current->blocked, new_set);
2510                         break;
2511                 case SIG_UNBLOCK:
2512                         sigdelsetmask(&current->blocked, new_set);
2513                         break;
2514                 case SIG_SETMASK:
2515                         current->blocked.sig[0] = new_set;
2516                         break;
2517                 }
2518
2519                 recalc_sigpending();
2520                 spin_unlock_irq(&current->sighand->siglock);
2521                 if (error)
2522                         goto out;
2523                 if (oset)
2524                         goto set_old;
2525         } else if (oset) {
2526                 old_set = current->blocked.sig[0];
2527         set_old:
2528                 error = -EFAULT;
2529                 if (copy_to_user(oset, &old_set, sizeof(*oset)))
2530                         goto out;
2531         }
2532         error = 0;
2533 out:
2534         return error;
2535 }
2536 #endif /* __ARCH_WANT_SYS_SIGPROCMASK */
2537
2538 #ifdef __ARCH_WANT_SYS_RT_SIGACTION
2539 SYSCALL_DEFINE4(rt_sigaction, int, sig,
2540                 const struct sigaction __user *, act,
2541                 struct sigaction __user *, oact,
2542                 size_t, sigsetsize)
2543 {
2544         struct k_sigaction new_sa, old_sa;
2545         int ret = -EINVAL;
2546
2547         /* XXX: Don't preclude handling different sized sigset_t's.  */
2548         if (sigsetsize != sizeof(sigset_t))
2549                 goto out;
2550
2551         if (act) {
2552                 if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
2553                         return -EFAULT;
2554         }
2555
2556         ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
2557
2558         if (!ret && oact) {
2559                 if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
2560                         return -EFAULT;
2561         }
2562 out:
2563         return ret;
2564 }
2565 #endif /* __ARCH_WANT_SYS_RT_SIGACTION */
2566
2567 #ifdef __ARCH_WANT_SYS_SGETMASK
2568
2569 /*
2570  * For backwards compatibility.  Functionality superseded by sigprocmask.
2571  */
2572 SYSCALL_DEFINE0(sgetmask)
2573 {
2574         /* SMP safe */
2575         return current->blocked.sig[0];
2576 }
2577
2578 SYSCALL_DEFINE1(ssetmask, int, newmask)
2579 {
2580         int old;
2581
2582         spin_lock_irq(&current->sighand->siglock);
2583         old = current->blocked.sig[0];
2584
2585         siginitset(&current->blocked, newmask & ~(sigmask(SIGKILL)|
2586                                                   sigmask(SIGSTOP)));
2587         recalc_sigpending();
2588         spin_unlock_irq(&current->sighand->siglock);
2589
2590         return old;
2591 }
2592 #endif /* __ARCH_WANT_SGETMASK */
2593
2594 #ifdef __ARCH_WANT_SYS_SIGNAL
2595 /*
2596  * For backwards compatibility.  Functionality superseded by sigaction.
2597  */
2598 SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler)
2599 {
2600         struct k_sigaction new_sa, old_sa;
2601         int ret;
2602
2603         new_sa.sa.sa_handler = handler;
2604         new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
2605         sigemptyset(&new_sa.sa.sa_mask);
2606
2607         ret = do_sigaction(sig, &new_sa, &old_sa);
2608
2609         return ret ? ret : (unsigned long)old_sa.sa.sa_handler;
2610 }
2611 #endif /* __ARCH_WANT_SYS_SIGNAL */
2612
2613 #ifdef __ARCH_WANT_SYS_PAUSE
2614
2615 SYSCALL_DEFINE0(pause)
2616 {
2617         current->state = TASK_INTERRUPTIBLE;
2618         schedule();
2619         return -ERESTARTNOHAND;
2620 }
2621
2622 #endif
2623
2624 #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND
2625 SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
2626 {
2627         sigset_t newset;
2628
2629         /* XXX: Don't preclude handling different sized sigset_t's.  */
2630         if (sigsetsize != sizeof(sigset_t))
2631                 return -EINVAL;
2632
2633         if (copy_from_user(&newset, unewset, sizeof(newset)))
2634                 return -EFAULT;
2635         sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP));
2636
2637         spin_lock_irq(&current->sighand->siglock);
2638         current->saved_sigmask = current->blocked;
2639         current->blocked = newset;
2640         recalc_sigpending();
2641         spin_unlock_irq(&current->sighand->siglock);
2642
2643         current->state = TASK_INTERRUPTIBLE;
2644         schedule();
2645         set_restore_sigmask();
2646         return -ERESTARTNOHAND;
2647 }
2648 #endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */
2649
2650 __attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
2651 {
2652         return NULL;
2653 }
2654
2655 void __init signals_init(void)
2656 {
2657         sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
2658 }