signals: protect cinit from blocked fatal signals
[linux-2.6] / kernel / signal.c
1 /*
2  *  linux/kernel/signal.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  *
6  *  1997-11-02  Modified for POSIX.1b signals by Richard Henderson
7  *
8  *  2003-06-02  Jim Houston - Concurrent Computer Corp.
9  *              Changes to use preallocated sigqueue structures
10  *              to allow signals to be sent reliably.
11  */
12
13 #include <linux/slab.h>
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/sched.h>
17 #include <linux/fs.h>
18 #include <linux/tty.h>
19 #include <linux/binfmts.h>
20 #include <linux/security.h>
21 #include <linux/syscalls.h>
22 #include <linux/ptrace.h>
23 #include <linux/signal.h>
24 #include <linux/signalfd.h>
25 #include <linux/tracehook.h>
26 #include <linux/capability.h>
27 #include <linux/freezer.h>
28 #include <linux/pid_namespace.h>
29 #include <linux/nsproxy.h>
30 #include <trace/sched.h>
31
32 #include <asm/param.h>
33 #include <asm/uaccess.h>
34 #include <asm/unistd.h>
35 #include <asm/siginfo.h>
36 #include "audit.h"      /* audit_signal_info() */
37
38 /*
39  * SLAB caches for signal bits.
40  */
41
42 static struct kmem_cache *sigqueue_cachep;
43
44 DEFINE_TRACE(sched_signal_send);
45
46 static void __user *sig_handler(struct task_struct *t, int sig)
47 {
48         return t->sighand->action[sig - 1].sa.sa_handler;
49 }
50
51 static int sig_handler_ignored(void __user *handler, int sig)
52 {
53         /* Is it explicitly or implicitly ignored? */
54         return handler == SIG_IGN ||
55                 (handler == SIG_DFL && sig_kernel_ignore(sig));
56 }
57
58 static int sig_task_ignored(struct task_struct *t, int sig,
59                 int from_ancestor_ns)
60 {
61         void __user *handler;
62
63         handler = sig_handler(t, sig);
64
65         if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
66                         handler == SIG_DFL && !from_ancestor_ns)
67                 return 1;
68
69         return sig_handler_ignored(handler, sig);
70 }
71
72 static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns)
73 {
74         /*
75          * Blocked signals are never ignored, since the
76          * signal handler may change by the time it is
77          * unblocked.
78          */
79         if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
80                 return 0;
81
82         if (!sig_task_ignored(t, sig, from_ancestor_ns))
83                 return 0;
84
85         /*
86          * Tracers may want to know about even ignored signals.
87          */
88         return !tracehook_consider_ignored_signal(t, sig);
89 }
90
91 /*
92  * Re-calculate pending state from the set of locally pending
93  * signals, globally pending signals, and blocked signals.
94  */
95 static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
96 {
97         unsigned long ready;
98         long i;
99
100         switch (_NSIG_WORDS) {
101         default:
102                 for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;)
103                         ready |= signal->sig[i] &~ blocked->sig[i];
104                 break;
105
106         case 4: ready  = signal->sig[3] &~ blocked->sig[3];
107                 ready |= signal->sig[2] &~ blocked->sig[2];
108                 ready |= signal->sig[1] &~ blocked->sig[1];
109                 ready |= signal->sig[0] &~ blocked->sig[0];
110                 break;
111
112         case 2: ready  = signal->sig[1] &~ blocked->sig[1];
113                 ready |= signal->sig[0] &~ blocked->sig[0];
114                 break;
115
116         case 1: ready  = signal->sig[0] &~ blocked->sig[0];
117         }
118         return ready != 0;
119 }
120
121 #define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
122
123 static int recalc_sigpending_tsk(struct task_struct *t)
124 {
125         if (t->signal->group_stop_count > 0 ||
126             PENDING(&t->pending, &t->blocked) ||
127             PENDING(&t->signal->shared_pending, &t->blocked)) {
128                 set_tsk_thread_flag(t, TIF_SIGPENDING);
129                 return 1;
130         }
131         /*
132          * We must never clear the flag in another thread, or in current
133          * when it's possible the current syscall is returning -ERESTART*.
134          * So we don't clear it here, and only callers who know they should do.
135          */
136         return 0;
137 }
138
139 /*
140  * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up.
141  * This is superfluous when called on current, the wakeup is a harmless no-op.
142  */
143 void recalc_sigpending_and_wake(struct task_struct *t)
144 {
145         if (recalc_sigpending_tsk(t))
146                 signal_wake_up(t, 0);
147 }
148
149 void recalc_sigpending(void)
150 {
151         if (unlikely(tracehook_force_sigpending()))
152                 set_thread_flag(TIF_SIGPENDING);
153         else if (!recalc_sigpending_tsk(current) && !freezing(current))
154                 clear_thread_flag(TIF_SIGPENDING);
155
156 }
157
158 /* Given the mask, find the first available signal that should be serviced. */
159
160 int next_signal(struct sigpending *pending, sigset_t *mask)
161 {
162         unsigned long i, *s, *m, x;
163         int sig = 0;
164         
165         s = pending->signal.sig;
166         m = mask->sig;
167         switch (_NSIG_WORDS) {
168         default:
169                 for (i = 0; i < _NSIG_WORDS; ++i, ++s, ++m)
170                         if ((x = *s &~ *m) != 0) {
171                                 sig = ffz(~x) + i*_NSIG_BPW + 1;
172                                 break;
173                         }
174                 break;
175
176         case 2: if ((x = s[0] &~ m[0]) != 0)
177                         sig = 1;
178                 else if ((x = s[1] &~ m[1]) != 0)
179                         sig = _NSIG_BPW + 1;
180                 else
181                         break;
182                 sig += ffz(~x);
183                 break;
184
185         case 1: if ((x = *s &~ *m) != 0)
186                         sig = ffz(~x) + 1;
187                 break;
188         }
189         
190         return sig;
191 }
192
193 /*
194  * allocate a new signal queue record
195  * - this may be called without locks if and only if t == current, otherwise an
196  *   appopriate lock must be held to stop the target task from exiting
197  */
198 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
199                                          int override_rlimit)
200 {
201         struct sigqueue *q = NULL;
202         struct user_struct *user;
203
204         /*
205          * We won't get problems with the target's UID changing under us
206          * because changing it requires RCU be used, and if t != current, the
207          * caller must be holding the RCU readlock (by way of a spinlock) and
208          * we use RCU protection here
209          */
210         user = get_uid(__task_cred(t)->user);
211         atomic_inc(&user->sigpending);
212         if (override_rlimit ||
213             atomic_read(&user->sigpending) <=
214                         t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
215                 q = kmem_cache_alloc(sigqueue_cachep, flags);
216         if (unlikely(q == NULL)) {
217                 atomic_dec(&user->sigpending);
218                 free_uid(user);
219         } else {
220                 INIT_LIST_HEAD(&q->list);
221                 q->flags = 0;
222                 q->user = user;
223         }
224
225         return q;
226 }
227
228 static void __sigqueue_free(struct sigqueue *q)
229 {
230         if (q->flags & SIGQUEUE_PREALLOC)
231                 return;
232         atomic_dec(&q->user->sigpending);
233         free_uid(q->user);
234         kmem_cache_free(sigqueue_cachep, q);
235 }
236
237 void flush_sigqueue(struct sigpending *queue)
238 {
239         struct sigqueue *q;
240
241         sigemptyset(&queue->signal);
242         while (!list_empty(&queue->list)) {
243                 q = list_entry(queue->list.next, struct sigqueue , list);
244                 list_del_init(&q->list);
245                 __sigqueue_free(q);
246         }
247 }
248
249 /*
250  * Flush all pending signals for a task.
251  */
252 void flush_signals(struct task_struct *t)
253 {
254         unsigned long flags;
255
256         spin_lock_irqsave(&t->sighand->siglock, flags);
257         clear_tsk_thread_flag(t, TIF_SIGPENDING);
258         flush_sigqueue(&t->pending);
259         flush_sigqueue(&t->signal->shared_pending);
260         spin_unlock_irqrestore(&t->sighand->siglock, flags);
261 }
262
263 static void __flush_itimer_signals(struct sigpending *pending)
264 {
265         sigset_t signal, retain;
266         struct sigqueue *q, *n;
267
268         signal = pending->signal;
269         sigemptyset(&retain);
270
271         list_for_each_entry_safe(q, n, &pending->list, list) {
272                 int sig = q->info.si_signo;
273
274                 if (likely(q->info.si_code != SI_TIMER)) {
275                         sigaddset(&retain, sig);
276                 } else {
277                         sigdelset(&signal, sig);
278                         list_del_init(&q->list);
279                         __sigqueue_free(q);
280                 }
281         }
282
283         sigorsets(&pending->signal, &signal, &retain);
284 }
285
286 void flush_itimer_signals(void)
287 {
288         struct task_struct *tsk = current;
289         unsigned long flags;
290
291         spin_lock_irqsave(&tsk->sighand->siglock, flags);
292         __flush_itimer_signals(&tsk->pending);
293         __flush_itimer_signals(&tsk->signal->shared_pending);
294         spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
295 }
296
297 void ignore_signals(struct task_struct *t)
298 {
299         int i;
300
301         for (i = 0; i < _NSIG; ++i)
302                 t->sighand->action[i].sa.sa_handler = SIG_IGN;
303
304         flush_signals(t);
305 }
306
307 /*
308  * Flush all handlers for a task.
309  */
310
311 void
312 flush_signal_handlers(struct task_struct *t, int force_default)
313 {
314         int i;
315         struct k_sigaction *ka = &t->sighand->action[0];
316         for (i = _NSIG ; i != 0 ; i--) {
317                 if (force_default || ka->sa.sa_handler != SIG_IGN)
318                         ka->sa.sa_handler = SIG_DFL;
319                 ka->sa.sa_flags = 0;
320                 sigemptyset(&ka->sa.sa_mask);
321                 ka++;
322         }
323 }
324
325 int unhandled_signal(struct task_struct *tsk, int sig)
326 {
327         void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
328         if (is_global_init(tsk))
329                 return 1;
330         if (handler != SIG_IGN && handler != SIG_DFL)
331                 return 0;
332         return !tracehook_consider_fatal_signal(tsk, sig);
333 }
334
335
336 /* Notify the system that a driver wants to block all signals for this
337  * process, and wants to be notified if any signals at all were to be
338  * sent/acted upon.  If the notifier routine returns non-zero, then the
339  * signal will be acted upon after all.  If the notifier routine returns 0,
340  * then then signal will be blocked.  Only one block per process is
341  * allowed.  priv is a pointer to private data that the notifier routine
342  * can use to determine if the signal should be blocked or not.  */
343
344 void
345 block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
346 {
347         unsigned long flags;
348
349         spin_lock_irqsave(&current->sighand->siglock, flags);
350         current->notifier_mask = mask;
351         current->notifier_data = priv;
352         current->notifier = notifier;
353         spin_unlock_irqrestore(&current->sighand->siglock, flags);
354 }
355
356 /* Notify the system that blocking has ended. */
357
358 void
359 unblock_all_signals(void)
360 {
361         unsigned long flags;
362
363         spin_lock_irqsave(&current->sighand->siglock, flags);
364         current->notifier = NULL;
365         current->notifier_data = NULL;
366         recalc_sigpending();
367         spin_unlock_irqrestore(&current->sighand->siglock, flags);
368 }
369
370 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
371 {
372         struct sigqueue *q, *first = NULL;
373
374         /*
375          * Collect the siginfo appropriate to this signal.  Check if
376          * there is another siginfo for the same signal.
377         */
378         list_for_each_entry(q, &list->list, list) {
379                 if (q->info.si_signo == sig) {
380                         if (first)
381                                 goto still_pending;
382                         first = q;
383                 }
384         }
385
386         sigdelset(&list->signal, sig);
387
388         if (first) {
389 still_pending:
390                 list_del_init(&first->list);
391                 copy_siginfo(info, &first->info);
392                 __sigqueue_free(first);
393         } else {
394                 /* Ok, it wasn't in the queue.  This must be
395                    a fast-pathed signal or we must have been
396                    out of queue space.  So zero out the info.
397                  */
398                 info->si_signo = sig;
399                 info->si_errno = 0;
400                 info->si_code = 0;
401                 info->si_pid = 0;
402                 info->si_uid = 0;
403         }
404 }
405
406 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
407                         siginfo_t *info)
408 {
409         int sig = next_signal(pending, mask);
410
411         if (sig) {
412                 if (current->notifier) {
413                         if (sigismember(current->notifier_mask, sig)) {
414                                 if (!(current->notifier)(current->notifier_data)) {
415                                         clear_thread_flag(TIF_SIGPENDING);
416                                         return 0;
417                                 }
418                         }
419                 }
420
421                 collect_signal(sig, pending, info);
422         }
423
424         return sig;
425 }
426
427 /*
428  * Dequeue a signal and return the element to the caller, which is 
429  * expected to free it.
430  *
431  * All callers have to hold the siglock.
432  */
433 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
434 {
435         int signr;
436
437         /* We only dequeue private signals from ourselves, we don't let
438          * signalfd steal them
439          */
440         signr = __dequeue_signal(&tsk->pending, mask, info);
441         if (!signr) {
442                 signr = __dequeue_signal(&tsk->signal->shared_pending,
443                                          mask, info);
444                 /*
445                  * itimer signal ?
446                  *
447                  * itimers are process shared and we restart periodic
448                  * itimers in the signal delivery path to prevent DoS
449                  * attacks in the high resolution timer case. This is
450                  * compliant with the old way of self restarting
451                  * itimers, as the SIGALRM is a legacy signal and only
452                  * queued once. Changing the restart behaviour to
453                  * restart the timer in the signal dequeue path is
454                  * reducing the timer noise on heavy loaded !highres
455                  * systems too.
456                  */
457                 if (unlikely(signr == SIGALRM)) {
458                         struct hrtimer *tmr = &tsk->signal->real_timer;
459
460                         if (!hrtimer_is_queued(tmr) &&
461                             tsk->signal->it_real_incr.tv64 != 0) {
462                                 hrtimer_forward(tmr, tmr->base->get_time(),
463                                                 tsk->signal->it_real_incr);
464                                 hrtimer_restart(tmr);
465                         }
466                 }
467         }
468
469         recalc_sigpending();
470         if (!signr)
471                 return 0;
472
473         if (unlikely(sig_kernel_stop(signr))) {
474                 /*
475                  * Set a marker that we have dequeued a stop signal.  Our
476                  * caller might release the siglock and then the pending
477                  * stop signal it is about to process is no longer in the
478                  * pending bitmasks, but must still be cleared by a SIGCONT
479                  * (and overruled by a SIGKILL).  So those cases clear this
480                  * shared flag after we've set it.  Note that this flag may
481                  * remain set after the signal we return is ignored or
482                  * handled.  That doesn't matter because its only purpose
483                  * is to alert stop-signal processing code when another
484                  * processor has come along and cleared the flag.
485                  */
486                 tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
487         }
488         if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
489                 /*
490                  * Release the siglock to ensure proper locking order
491                  * of timer locks outside of siglocks.  Note, we leave
492                  * irqs disabled here, since the posix-timers code is
493                  * about to disable them again anyway.
494                  */
495                 spin_unlock(&tsk->sighand->siglock);
496                 do_schedule_next_timer(info);
497                 spin_lock(&tsk->sighand->siglock);
498         }
499         return signr;
500 }
501
502 /*
503  * Tell a process that it has a new active signal..
504  *
505  * NOTE! we rely on the previous spin_lock to
506  * lock interrupts for us! We can only be called with
507  * "siglock" held, and the local interrupt must
508  * have been disabled when that got acquired!
509  *
510  * No need to set need_resched since signal event passing
511  * goes through ->blocked
512  */
513 void signal_wake_up(struct task_struct *t, int resume)
514 {
515         unsigned int mask;
516
517         set_tsk_thread_flag(t, TIF_SIGPENDING);
518
519         /*
520          * For SIGKILL, we want to wake it up in the stopped/traced/killable
521          * case. We don't check t->state here because there is a race with it
522          * executing another processor and just now entering stopped state.
523          * By using wake_up_state, we ensure the process will wake up and
524          * handle its death signal.
525          */
526         mask = TASK_INTERRUPTIBLE;
527         if (resume)
528                 mask |= TASK_WAKEKILL;
529         if (!wake_up_state(t, mask))
530                 kick_process(t);
531 }
532
533 /*
534  * Remove signals in mask from the pending set and queue.
535  * Returns 1 if any signals were found.
536  *
537  * All callers must be holding the siglock.
538  *
539  * This version takes a sigset mask and looks at all signals,
540  * not just those in the first mask word.
541  */
542 static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
543 {
544         struct sigqueue *q, *n;
545         sigset_t m;
546
547         sigandsets(&m, mask, &s->signal);
548         if (sigisemptyset(&m))
549                 return 0;
550
551         signandsets(&s->signal, &s->signal, mask);
552         list_for_each_entry_safe(q, n, &s->list, list) {
553                 if (sigismember(mask, q->info.si_signo)) {
554                         list_del_init(&q->list);
555                         __sigqueue_free(q);
556                 }
557         }
558         return 1;
559 }
560 /*
561  * Remove signals in mask from the pending set and queue.
562  * Returns 1 if any signals were found.
563  *
564  * All callers must be holding the siglock.
565  */
566 static int rm_from_queue(unsigned long mask, struct sigpending *s)
567 {
568         struct sigqueue *q, *n;
569
570         if (!sigtestsetmask(&s->signal, mask))
571                 return 0;
572
573         sigdelsetmask(&s->signal, mask);
574         list_for_each_entry_safe(q, n, &s->list, list) {
575                 if (q->info.si_signo < SIGRTMIN &&
576                     (mask & sigmask(q->info.si_signo))) {
577                         list_del_init(&q->list);
578                         __sigqueue_free(q);
579                 }
580         }
581         return 1;
582 }
583
584 /*
585  * Bad permissions for sending the signal
586  * - the caller must hold at least the RCU read lock
587  */
588 static int check_kill_permission(int sig, struct siginfo *info,
589                                  struct task_struct *t)
590 {
591         const struct cred *cred = current_cred(), *tcred;
592         struct pid *sid;
593         int error;
594
595         if (!valid_signal(sig))
596                 return -EINVAL;
597
598         if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info)))
599                 return 0;
600
601         error = audit_signal_info(sig, t); /* Let audit system see the signal */
602         if (error)
603                 return error;
604
605         tcred = __task_cred(t);
606         if ((cred->euid ^ tcred->suid) &&
607             (cred->euid ^ tcred->uid) &&
608             (cred->uid  ^ tcred->suid) &&
609             (cred->uid  ^ tcred->uid) &&
610             !capable(CAP_KILL)) {
611                 switch (sig) {
612                 case SIGCONT:
613                         sid = task_session(t);
614                         /*
615                          * We don't return the error if sid == NULL. The
616                          * task was unhashed, the caller must notice this.
617                          */
618                         if (!sid || sid == task_session(current))
619                                 break;
620                 default:
621                         return -EPERM;
622                 }
623         }
624
625         return security_task_kill(t, info, sig, 0);
626 }
627
628 /*
629  * Handle magic process-wide effects of stop/continue signals. Unlike
630  * the signal actions, these happen immediately at signal-generation
631  * time regardless of blocking, ignoring, or handling.  This does the
632  * actual continuing for SIGCONT, but not the actual stopping for stop
633  * signals. The process stop is done as a signal action for SIG_DFL.
634  *
635  * Returns true if the signal should be actually delivered, otherwise
636  * it should be dropped.
637  */
638 static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
639 {
640         struct signal_struct *signal = p->signal;
641         struct task_struct *t;
642
643         if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) {
644                 /*
645                  * The process is in the middle of dying, nothing to do.
646                  */
647         } else if (sig_kernel_stop(sig)) {
648                 /*
649                  * This is a stop signal.  Remove SIGCONT from all queues.
650                  */
651                 rm_from_queue(sigmask(SIGCONT), &signal->shared_pending);
652                 t = p;
653                 do {
654                         rm_from_queue(sigmask(SIGCONT), &t->pending);
655                 } while_each_thread(p, t);
656         } else if (sig == SIGCONT) {
657                 unsigned int why;
658                 /*
659                  * Remove all stop signals from all queues,
660                  * and wake all threads.
661                  */
662                 rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending);
663                 t = p;
664                 do {
665                         unsigned int state;
666                         rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
667                         /*
668                          * If there is a handler for SIGCONT, we must make
669                          * sure that no thread returns to user mode before
670                          * we post the signal, in case it was the only
671                          * thread eligible to run the signal handler--then
672                          * it must not do anything between resuming and
673                          * running the handler.  With the TIF_SIGPENDING
674                          * flag set, the thread will pause and acquire the
675                          * siglock that we hold now and until we've queued
676                          * the pending signal.
677                          *
678                          * Wake up the stopped thread _after_ setting
679                          * TIF_SIGPENDING
680                          */
681                         state = __TASK_STOPPED;
682                         if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
683                                 set_tsk_thread_flag(t, TIF_SIGPENDING);
684                                 state |= TASK_INTERRUPTIBLE;
685                         }
686                         wake_up_state(t, state);
687                 } while_each_thread(p, t);
688
689                 /*
690                  * Notify the parent with CLD_CONTINUED if we were stopped.
691                  *
692                  * If we were in the middle of a group stop, we pretend it
693                  * was already finished, and then continued. Since SIGCHLD
694                  * doesn't queue we report only CLD_STOPPED, as if the next
695                  * CLD_CONTINUED was dropped.
696                  */
697                 why = 0;
698                 if (signal->flags & SIGNAL_STOP_STOPPED)
699                         why |= SIGNAL_CLD_CONTINUED;
700                 else if (signal->group_stop_count)
701                         why |= SIGNAL_CLD_STOPPED;
702
703                 if (why) {
704                         /*
705                          * The first thread which returns from finish_stop()
706                          * will take ->siglock, notice SIGNAL_CLD_MASK, and
707                          * notify its parent. See get_signal_to_deliver().
708                          */
709                         signal->flags = why | SIGNAL_STOP_CONTINUED;
710                         signal->group_stop_count = 0;
711                         signal->group_exit_code = 0;
712                 } else {
713                         /*
714                          * We are not stopped, but there could be a stop
715                          * signal in the middle of being processed after
716                          * being removed from the queue.  Clear that too.
717                          */
718                         signal->flags &= ~SIGNAL_STOP_DEQUEUED;
719                 }
720         }
721
722         return !sig_ignored(p, sig, from_ancestor_ns);
723 }
724
725 /*
726  * Test if P wants to take SIG.  After we've checked all threads with this,
727  * it's equivalent to finding no threads not blocking SIG.  Any threads not
728  * blocking SIG were ruled out because they are not running and already
729  * have pending signals.  Such threads will dequeue from the shared queue
730  * as soon as they're available, so putting the signal on the shared queue
731  * will be equivalent to sending it to one such thread.
732  */
733 static inline int wants_signal(int sig, struct task_struct *p)
734 {
735         if (sigismember(&p->blocked, sig))
736                 return 0;
737         if (p->flags & PF_EXITING)
738                 return 0;
739         if (sig == SIGKILL)
740                 return 1;
741         if (task_is_stopped_or_traced(p))
742                 return 0;
743         return task_curr(p) || !signal_pending(p);
744 }
745
746 static void complete_signal(int sig, struct task_struct *p, int group)
747 {
748         struct signal_struct *signal = p->signal;
749         struct task_struct *t;
750
751         /*
752          * Now find a thread we can wake up to take the signal off the queue.
753          *
754          * If the main thread wants the signal, it gets first crack.
755          * Probably the least surprising to the average bear.
756          */
757         if (wants_signal(sig, p))
758                 t = p;
759         else if (!group || thread_group_empty(p))
760                 /*
761                  * There is just one thread and it does not need to be woken.
762                  * It will dequeue unblocked signals before it runs again.
763                  */
764                 return;
765         else {
766                 /*
767                  * Otherwise try to find a suitable thread.
768                  */
769                 t = signal->curr_target;
770                 while (!wants_signal(sig, t)) {
771                         t = next_thread(t);
772                         if (t == signal->curr_target)
773                                 /*
774                                  * No thread needs to be woken.
775                                  * Any eligible threads will see
776                                  * the signal in the queue soon.
777                                  */
778                                 return;
779                 }
780                 signal->curr_target = t;
781         }
782
783         /*
784          * Found a killable thread.  If the signal will be fatal,
785          * then start taking the whole group down immediately.
786          */
787         if (sig_fatal(p, sig) &&
788             !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) &&
789             !sigismember(&t->real_blocked, sig) &&
790             (sig == SIGKILL ||
791              !tracehook_consider_fatal_signal(t, sig))) {
792                 /*
793                  * This signal will be fatal to the whole group.
794                  */
795                 if (!sig_kernel_coredump(sig)) {
796                         /*
797                          * Start a group exit and wake everybody up.
798                          * This way we don't have other threads
799                          * running and doing things after a slower
800                          * thread has the fatal signal pending.
801                          */
802                         signal->flags = SIGNAL_GROUP_EXIT;
803                         signal->group_exit_code = sig;
804                         signal->group_stop_count = 0;
805                         t = p;
806                         do {
807                                 sigaddset(&t->pending.signal, SIGKILL);
808                                 signal_wake_up(t, 1);
809                         } while_each_thread(p, t);
810                         return;
811                 }
812         }
813
814         /*
815          * The signal is already in the shared-pending queue.
816          * Tell the chosen thread to wake up and dequeue it.
817          */
818         signal_wake_up(t, sig == SIGKILL);
819         return;
820 }
821
822 static inline int legacy_queue(struct sigpending *signals, int sig)
823 {
824         return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
825 }
826
827 static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
828                         int group, int from_ancestor_ns)
829 {
830         struct sigpending *pending;
831         struct sigqueue *q;
832
833         trace_sched_signal_send(sig, t);
834
835         assert_spin_locked(&t->sighand->siglock);
836
837         if (!prepare_signal(sig, t, from_ancestor_ns))
838                 return 0;
839
840         pending = group ? &t->signal->shared_pending : &t->pending;
841         /*
842          * Short-circuit ignored signals and support queuing
843          * exactly one non-rt signal, so that we can get more
844          * detailed information about the cause of the signal.
845          */
846         if (legacy_queue(pending, sig))
847                 return 0;
848         /*
849          * fast-pathed signals for kernel-internal things like SIGSTOP
850          * or SIGKILL.
851          */
852         if (info == SEND_SIG_FORCED)
853                 goto out_set;
854
855         /* Real-time signals must be queued if sent by sigqueue, or
856            some other real-time mechanism.  It is implementation
857            defined whether kill() does so.  We attempt to do so, on
858            the principle of least surprise, but since kill is not
859            allowed to fail with EAGAIN when low on memory we just
860            make sure at least one signal gets delivered and don't
861            pass on the info struct.  */
862
863         q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
864                                              (is_si_special(info) ||
865                                               info->si_code >= 0)));
866         if (q) {
867                 list_add_tail(&q->list, &pending->list);
868                 switch ((unsigned long) info) {
869                 case (unsigned long) SEND_SIG_NOINFO:
870                         q->info.si_signo = sig;
871                         q->info.si_errno = 0;
872                         q->info.si_code = SI_USER;
873                         q->info.si_pid = task_tgid_nr_ns(current,
874                                                         task_active_pid_ns(t));
875                         q->info.si_uid = current_uid();
876                         break;
877                 case (unsigned long) SEND_SIG_PRIV:
878                         q->info.si_signo = sig;
879                         q->info.si_errno = 0;
880                         q->info.si_code = SI_KERNEL;
881                         q->info.si_pid = 0;
882                         q->info.si_uid = 0;
883                         break;
884                 default:
885                         copy_siginfo(&q->info, info);
886                         break;
887                 }
888         } else if (!is_si_special(info)) {
889                 if (sig >= SIGRTMIN && info->si_code != SI_USER)
890                 /*
891                  * Queue overflow, abort.  We may abort if the signal was rt
892                  * and sent by user using something other than kill().
893                  */
894                         return -EAGAIN;
895         }
896
897 out_set:
898         signalfd_notify(t, sig);
899         sigaddset(&pending->signal, sig);
900         complete_signal(sig, t, group);
901         return 0;
902 }
903
904 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
905                         int group)
906 {
907         int from_ancestor_ns = 0;
908
909 #ifdef CONFIG_PID_NS
910         if (!is_si_special(info) && SI_FROMUSER(info) &&
911                         task_pid_nr_ns(current, task_active_pid_ns(t)) <= 0)
912                 from_ancestor_ns = 1;
913 #endif
914
915         return __send_signal(sig, info, t, group, from_ancestor_ns);
916 }
917
918 int print_fatal_signals;
919
920 static void print_fatal_signal(struct pt_regs *regs, int signr)
921 {
922         printk("%s/%d: potentially unexpected fatal signal %d.\n",
923                 current->comm, task_pid_nr(current), signr);
924
925 #if defined(__i386__) && !defined(__arch_um__)
926         printk("code at %08lx: ", regs->ip);
927         {
928                 int i;
929                 for (i = 0; i < 16; i++) {
930                         unsigned char insn;
931
932                         __get_user(insn, (unsigned char *)(regs->ip + i));
933                         printk("%02x ", insn);
934                 }
935         }
936 #endif
937         printk("\n");
938         preempt_disable();
939         show_regs(regs);
940         preempt_enable();
941 }
942
943 static int __init setup_print_fatal_signals(char *str)
944 {
945         get_option (&str, &print_fatal_signals);
946
947         return 1;
948 }
949
950 __setup("print-fatal-signals=", setup_print_fatal_signals);
951
952 int
953 __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
954 {
955         return send_signal(sig, info, p, 1);
956 }
957
958 static int
959 specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
960 {
961         return send_signal(sig, info, t, 0);
962 }
963
964 /*
965  * Force a signal that the process can't ignore: if necessary
966  * we unblock the signal and change any SIG_IGN to SIG_DFL.
967  *
968  * Note: If we unblock the signal, we always reset it to SIG_DFL,
969  * since we do not want to have a signal handler that was blocked
970  * be invoked when user space had explicitly blocked it.
971  *
972  * We don't want to have recursive SIGSEGV's etc, for example,
973  * that is why we also clear SIGNAL_UNKILLABLE.
974  */
975 int
976 force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
977 {
978         unsigned long int flags;
979         int ret, blocked, ignored;
980         struct k_sigaction *action;
981
982         spin_lock_irqsave(&t->sighand->siglock, flags);
983         action = &t->sighand->action[sig-1];
984         ignored = action->sa.sa_handler == SIG_IGN;
985         blocked = sigismember(&t->blocked, sig);
986         if (blocked || ignored) {
987                 action->sa.sa_handler = SIG_DFL;
988                 if (blocked) {
989                         sigdelset(&t->blocked, sig);
990                         recalc_sigpending_and_wake(t);
991                 }
992         }
993         if (action->sa.sa_handler == SIG_DFL)
994                 t->signal->flags &= ~SIGNAL_UNKILLABLE;
995         ret = specific_send_sig_info(sig, info, t);
996         spin_unlock_irqrestore(&t->sighand->siglock, flags);
997
998         return ret;
999 }
1000
1001 void
1002 force_sig_specific(int sig, struct task_struct *t)
1003 {
1004         force_sig_info(sig, SEND_SIG_FORCED, t);
1005 }
1006
1007 /*
1008  * Nuke all other threads in the group.
1009  */
1010 void zap_other_threads(struct task_struct *p)
1011 {
1012         struct task_struct *t;
1013
1014         p->signal->group_stop_count = 0;
1015
1016         for (t = next_thread(p); t != p; t = next_thread(t)) {
1017                 /*
1018                  * Don't bother with already dead threads
1019                  */
1020                 if (t->exit_state)
1021                         continue;
1022
1023                 /* SIGKILL will be handled before any pending SIGSTOP */
1024                 sigaddset(&t->pending.signal, SIGKILL);
1025                 signal_wake_up(t, 1);
1026         }
1027 }
1028
1029 int __fatal_signal_pending(struct task_struct *tsk)
1030 {
1031         return sigismember(&tsk->pending.signal, SIGKILL);
1032 }
1033 EXPORT_SYMBOL(__fatal_signal_pending);
1034
1035 struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
1036 {
1037         struct sighand_struct *sighand;
1038
1039         rcu_read_lock();
1040         for (;;) {
1041                 sighand = rcu_dereference(tsk->sighand);
1042                 if (unlikely(sighand == NULL))
1043                         break;
1044
1045                 spin_lock_irqsave(&sighand->siglock, *flags);
1046                 if (likely(sighand == tsk->sighand))
1047                         break;
1048                 spin_unlock_irqrestore(&sighand->siglock, *flags);
1049         }
1050         rcu_read_unlock();
1051
1052         return sighand;
1053 }
1054
1055 /*
1056  * send signal info to all the members of a group
1057  * - the caller must hold the RCU read lock at least
1058  */
1059 int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1060 {
1061         unsigned long flags;
1062         int ret;
1063
1064         ret = check_kill_permission(sig, info, p);
1065
1066         if (!ret && sig) {
1067                 ret = -ESRCH;
1068                 if (lock_task_sighand(p, &flags)) {
1069                         ret = __group_send_sig_info(sig, info, p);
1070                         unlock_task_sighand(p, &flags);
1071                 }
1072         }
1073
1074         return ret;
1075 }
1076
1077 /*
1078  * __kill_pgrp_info() sends a signal to a process group: this is what the tty
1079  * control characters do (^C, ^Z etc)
1080  * - the caller must hold at least a readlock on tasklist_lock
1081  */
1082 int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
1083 {
1084         struct task_struct *p = NULL;
1085         int retval, success;
1086
1087         success = 0;
1088         retval = -ESRCH;
1089         do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
1090                 int err = group_send_sig_info(sig, info, p);
1091                 success |= !err;
1092                 retval = err;
1093         } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
1094         return success ? 0 : retval;
1095 }
1096
1097 int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
1098 {
1099         int error = -ESRCH;
1100         struct task_struct *p;
1101
1102         rcu_read_lock();
1103 retry:
1104         p = pid_task(pid, PIDTYPE_PID);
1105         if (p) {
1106                 error = group_send_sig_info(sig, info, p);
1107                 if (unlikely(error == -ESRCH))
1108                         /*
1109                          * The task was unhashed in between, try again.
1110                          * If it is dead, pid_task() will return NULL,
1111                          * if we race with de_thread() it will find the
1112                          * new leader.
1113                          */
1114                         goto retry;
1115         }
1116         rcu_read_unlock();
1117
1118         return error;
1119 }
1120
1121 int
1122 kill_proc_info(int sig, struct siginfo *info, pid_t pid)
1123 {
1124         int error;
1125         rcu_read_lock();
1126         error = kill_pid_info(sig, info, find_vpid(pid));
1127         rcu_read_unlock();
1128         return error;
1129 }
1130
1131 /* like kill_pid_info(), but doesn't use uid/euid of "current" */
1132 int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
1133                       uid_t uid, uid_t euid, u32 secid)
1134 {
1135         int ret = -EINVAL;
1136         struct task_struct *p;
1137         const struct cred *pcred;
1138
1139         if (!valid_signal(sig))
1140                 return ret;
1141
1142         read_lock(&tasklist_lock);
1143         p = pid_task(pid, PIDTYPE_PID);
1144         if (!p) {
1145                 ret = -ESRCH;
1146                 goto out_unlock;
1147         }
1148         pcred = __task_cred(p);
1149         if ((info == SEND_SIG_NOINFO ||
1150              (!is_si_special(info) && SI_FROMUSER(info))) &&
1151             euid != pcred->suid && euid != pcred->uid &&
1152             uid  != pcred->suid && uid  != pcred->uid) {
1153                 ret = -EPERM;
1154                 goto out_unlock;
1155         }
1156         ret = security_task_kill(p, info, sig, secid);
1157         if (ret)
1158                 goto out_unlock;
1159         if (sig && p->sighand) {
1160                 unsigned long flags;
1161                 spin_lock_irqsave(&p->sighand->siglock, flags);
1162                 ret = __send_signal(sig, info, p, 1, 0);
1163                 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1164         }
1165 out_unlock:
1166         read_unlock(&tasklist_lock);
1167         return ret;
1168 }
1169 EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
1170
1171 /*
1172  * kill_something_info() interprets pid in interesting ways just like kill(2).
1173  *
1174  * POSIX specifies that kill(-1,sig) is unspecified, but what we have
1175  * is probably wrong.  Should make it like BSD or SYSV.
1176  */
1177
1178 static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
1179 {
1180         int ret;
1181
1182         if (pid > 0) {
1183                 rcu_read_lock();
1184                 ret = kill_pid_info(sig, info, find_vpid(pid));
1185                 rcu_read_unlock();
1186                 return ret;
1187         }
1188
1189         read_lock(&tasklist_lock);
1190         if (pid != -1) {
1191                 ret = __kill_pgrp_info(sig, info,
1192                                 pid ? find_vpid(-pid) : task_pgrp(current));
1193         } else {
1194                 int retval = 0, count = 0;
1195                 struct task_struct * p;
1196
1197                 for_each_process(p) {
1198                         if (task_pid_vnr(p) > 1 &&
1199                                         !same_thread_group(p, current)) {
1200                                 int err = group_send_sig_info(sig, info, p);
1201                                 ++count;
1202                                 if (err != -EPERM)
1203                                         retval = err;
1204                         }
1205                 }
1206                 ret = count ? retval : -ESRCH;
1207         }
1208         read_unlock(&tasklist_lock);
1209
1210         return ret;
1211 }
1212
1213 /*
1214  * These are for backward compatibility with the rest of the kernel source.
1215  */
1216
1217 /*
1218  * The caller must ensure the task can't exit.
1219  */
1220 int
1221 send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1222 {
1223         int ret;
1224         unsigned long flags;
1225
1226         /*
1227          * Make sure legacy kernel users don't send in bad values
1228          * (normal paths check this in check_kill_permission).
1229          */
1230         if (!valid_signal(sig))
1231                 return -EINVAL;
1232
1233         spin_lock_irqsave(&p->sighand->siglock, flags);
1234         ret = specific_send_sig_info(sig, info, p);
1235         spin_unlock_irqrestore(&p->sighand->siglock, flags);
1236         return ret;
1237 }
1238
1239 #define __si_special(priv) \
1240         ((priv) ? SEND_SIG_PRIV : SEND_SIG_NOINFO)
1241
1242 int
1243 send_sig(int sig, struct task_struct *p, int priv)
1244 {
1245         return send_sig_info(sig, __si_special(priv), p);
1246 }
1247
1248 void
1249 force_sig(int sig, struct task_struct *p)
1250 {
1251         force_sig_info(sig, SEND_SIG_PRIV, p);
1252 }
1253
1254 /*
1255  * When things go south during signal handling, we
1256  * will force a SIGSEGV. And if the signal that caused
1257  * the problem was already a SIGSEGV, we'll want to
1258  * make sure we don't even try to deliver the signal..
1259  */
1260 int
1261 force_sigsegv(int sig, struct task_struct *p)
1262 {
1263         if (sig == SIGSEGV) {
1264                 unsigned long flags;
1265                 spin_lock_irqsave(&p->sighand->siglock, flags);
1266                 p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
1267                 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1268         }
1269         force_sig(SIGSEGV, p);
1270         return 0;
1271 }
1272
1273 int kill_pgrp(struct pid *pid, int sig, int priv)
1274 {
1275         int ret;
1276
1277         read_lock(&tasklist_lock);
1278         ret = __kill_pgrp_info(sig, __si_special(priv), pid);
1279         read_unlock(&tasklist_lock);
1280
1281         return ret;
1282 }
1283 EXPORT_SYMBOL(kill_pgrp);
1284
1285 int kill_pid(struct pid *pid, int sig, int priv)
1286 {
1287         return kill_pid_info(sig, __si_special(priv), pid);
1288 }
1289 EXPORT_SYMBOL(kill_pid);
1290
1291 /*
1292  * These functions support sending signals using preallocated sigqueue
1293  * structures.  This is needed "because realtime applications cannot
1294  * afford to lose notifications of asynchronous events, like timer
1295  * expirations or I/O completions".  In the case of Posix Timers 
1296  * we allocate the sigqueue structure from the timer_create.  If this
1297  * allocation fails we are able to report the failure to the application
1298  * with an EAGAIN error.
1299  */
1300  
1301 struct sigqueue *sigqueue_alloc(void)
1302 {
1303         struct sigqueue *q;
1304
1305         if ((q = __sigqueue_alloc(current, GFP_KERNEL, 0)))
1306                 q->flags |= SIGQUEUE_PREALLOC;
1307         return(q);
1308 }
1309
1310 void sigqueue_free(struct sigqueue *q)
1311 {
1312         unsigned long flags;
1313         spinlock_t *lock = &current->sighand->siglock;
1314
1315         BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1316         /*
1317          * We must hold ->siglock while testing q->list
1318          * to serialize with collect_signal() or with
1319          * __exit_signal()->flush_sigqueue().
1320          */
1321         spin_lock_irqsave(lock, flags);
1322         q->flags &= ~SIGQUEUE_PREALLOC;
1323         /*
1324          * If it is queued it will be freed when dequeued,
1325          * like the "regular" sigqueue.
1326          */
1327         if (!list_empty(&q->list))
1328                 q = NULL;
1329         spin_unlock_irqrestore(lock, flags);
1330
1331         if (q)
1332                 __sigqueue_free(q);
1333 }
1334
1335 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1336 {
1337         int sig = q->info.si_signo;
1338         struct sigpending *pending;
1339         unsigned long flags;
1340         int ret;
1341
1342         BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1343
1344         ret = -1;
1345         if (!likely(lock_task_sighand(t, &flags)))
1346                 goto ret;
1347
1348         ret = 1; /* the signal is ignored */
1349         if (!prepare_signal(sig, t, 0))
1350                 goto out;
1351
1352         ret = 0;
1353         if (unlikely(!list_empty(&q->list))) {
1354                 /*
1355                  * If an SI_TIMER entry is already queue just increment
1356                  * the overrun count.
1357                  */
1358                 BUG_ON(q->info.si_code != SI_TIMER);
1359                 q->info.si_overrun++;
1360                 goto out;
1361         }
1362         q->info.si_overrun = 0;
1363
1364         signalfd_notify(t, sig);
1365         pending = group ? &t->signal->shared_pending : &t->pending;
1366         list_add_tail(&q->list, &pending->list);
1367         sigaddset(&pending->signal, sig);
1368         complete_signal(sig, t, group);
1369 out:
1370         unlock_task_sighand(t, &flags);
1371 ret:
1372         return ret;
1373 }
1374
1375 /*
1376  * Wake up any threads in the parent blocked in wait* syscalls.
1377  */
1378 static inline void __wake_up_parent(struct task_struct *p,
1379                                     struct task_struct *parent)
1380 {
1381         wake_up_interruptible_sync(&parent->signal->wait_chldexit);
1382 }
1383
1384 /*
1385  * Let a parent know about the death of a child.
1386  * For a stopped/continued status change, use do_notify_parent_cldstop instead.
1387  *
1388  * Returns -1 if our parent ignored us and so we've switched to
1389  * self-reaping, or else @sig.
1390  */
1391 int do_notify_parent(struct task_struct *tsk, int sig)
1392 {
1393         struct siginfo info;
1394         unsigned long flags;
1395         struct sighand_struct *psig;
1396         int ret = sig;
1397
1398         BUG_ON(sig == -1);
1399
1400         /* do_notify_parent_cldstop should have been called instead.  */
1401         BUG_ON(task_is_stopped_or_traced(tsk));
1402
1403         BUG_ON(!tsk->ptrace &&
1404                (tsk->group_leader != tsk || !thread_group_empty(tsk)));
1405
1406         info.si_signo = sig;
1407         info.si_errno = 0;
1408         /*
1409          * we are under tasklist_lock here so our parent is tied to
1410          * us and cannot exit and release its namespace.
1411          *
1412          * the only it can is to switch its nsproxy with sys_unshare,
1413          * bu uncharing pid namespaces is not allowed, so we'll always
1414          * see relevant namespace
1415          *
1416          * write_lock() currently calls preempt_disable() which is the
1417          * same as rcu_read_lock(), but according to Oleg, this is not
1418          * correct to rely on this
1419          */
1420         rcu_read_lock();
1421         info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
1422         info.si_uid = __task_cred(tsk)->uid;
1423         rcu_read_unlock();
1424
1425         info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
1426                                 tsk->signal->utime));
1427         info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
1428                                 tsk->signal->stime));
1429
1430         info.si_status = tsk->exit_code & 0x7f;
1431         if (tsk->exit_code & 0x80)
1432                 info.si_code = CLD_DUMPED;
1433         else if (tsk->exit_code & 0x7f)
1434                 info.si_code = CLD_KILLED;
1435         else {
1436                 info.si_code = CLD_EXITED;
1437                 info.si_status = tsk->exit_code >> 8;
1438         }
1439
1440         psig = tsk->parent->sighand;
1441         spin_lock_irqsave(&psig->siglock, flags);
1442         if (!tsk->ptrace && sig == SIGCHLD &&
1443             (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
1444              (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) {
1445                 /*
1446                  * We are exiting and our parent doesn't care.  POSIX.1
1447                  * defines special semantics for setting SIGCHLD to SIG_IGN
1448                  * or setting the SA_NOCLDWAIT flag: we should be reaped
1449                  * automatically and not left for our parent's wait4 call.
1450                  * Rather than having the parent do it as a magic kind of
1451                  * signal handler, we just set this to tell do_exit that we
1452                  * can be cleaned up without becoming a zombie.  Note that
1453                  * we still call __wake_up_parent in this case, because a
1454                  * blocked sys_wait4 might now return -ECHILD.
1455                  *
1456                  * Whether we send SIGCHLD or not for SA_NOCLDWAIT
1457                  * is implementation-defined: we do (if you don't want
1458                  * it, just use SIG_IGN instead).
1459                  */
1460                 ret = tsk->exit_signal = -1;
1461                 if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
1462                         sig = -1;
1463         }
1464         if (valid_signal(sig) && sig > 0)
1465                 __group_send_sig_info(sig, &info, tsk->parent);
1466         __wake_up_parent(tsk, tsk->parent);
1467         spin_unlock_irqrestore(&psig->siglock, flags);
1468
1469         return ret;
1470 }
1471
1472 static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
1473 {
1474         struct siginfo info;
1475         unsigned long flags;
1476         struct task_struct *parent;
1477         struct sighand_struct *sighand;
1478
1479         if (tsk->ptrace & PT_PTRACED)
1480                 parent = tsk->parent;
1481         else {
1482                 tsk = tsk->group_leader;
1483                 parent = tsk->real_parent;
1484         }
1485
1486         info.si_signo = SIGCHLD;
1487         info.si_errno = 0;
1488         /*
1489          * see comment in do_notify_parent() abot the following 3 lines
1490          */
1491         rcu_read_lock();
1492         info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
1493         info.si_uid = __task_cred(tsk)->uid;
1494         rcu_read_unlock();
1495
1496         info.si_utime = cputime_to_clock_t(tsk->utime);
1497         info.si_stime = cputime_to_clock_t(tsk->stime);
1498
1499         info.si_code = why;
1500         switch (why) {
1501         case CLD_CONTINUED:
1502                 info.si_status = SIGCONT;
1503                 break;
1504         case CLD_STOPPED:
1505                 info.si_status = tsk->signal->group_exit_code & 0x7f;
1506                 break;
1507         case CLD_TRAPPED:
1508                 info.si_status = tsk->exit_code & 0x7f;
1509                 break;
1510         default:
1511                 BUG();
1512         }
1513
1514         sighand = parent->sighand;
1515         spin_lock_irqsave(&sighand->siglock, flags);
1516         if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
1517             !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
1518                 __group_send_sig_info(SIGCHLD, &info, parent);
1519         /*
1520          * Even if SIGCHLD is not generated, we must wake up wait4 calls.
1521          */
1522         __wake_up_parent(tsk, parent);
1523         spin_unlock_irqrestore(&sighand->siglock, flags);
1524 }
1525
1526 static inline int may_ptrace_stop(void)
1527 {
1528         if (!likely(current->ptrace & PT_PTRACED))
1529                 return 0;
1530         /*
1531          * Are we in the middle of do_coredump?
1532          * If so and our tracer is also part of the coredump stopping
1533          * is a deadlock situation, and pointless because our tracer
1534          * is dead so don't allow us to stop.
1535          * If SIGKILL was already sent before the caller unlocked
1536          * ->siglock we must see ->core_state != NULL. Otherwise it
1537          * is safe to enter schedule().
1538          */
1539         if (unlikely(current->mm->core_state) &&
1540             unlikely(current->mm == current->parent->mm))
1541                 return 0;
1542
1543         return 1;
1544 }
1545
1546 /*
1547  * Return nonzero if there is a SIGKILL that should be waking us up.
1548  * Called with the siglock held.
1549  */
1550 static int sigkill_pending(struct task_struct *tsk)
1551 {
1552         return  sigismember(&tsk->pending.signal, SIGKILL) ||
1553                 sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
1554 }
1555
1556 /*
1557  * This must be called with current->sighand->siglock held.
1558  *
1559  * This should be the path for all ptrace stops.
1560  * We always set current->last_siginfo while stopped here.
1561  * That makes it a way to test a stopped process for
1562  * being ptrace-stopped vs being job-control-stopped.
1563  *
1564  * If we actually decide not to stop at all because the tracer
1565  * is gone, we keep current->exit_code unless clear_code.
1566  */
1567 static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
1568 {
1569         if (arch_ptrace_stop_needed(exit_code, info)) {
1570                 /*
1571                  * The arch code has something special to do before a
1572                  * ptrace stop.  This is allowed to block, e.g. for faults
1573                  * on user stack pages.  We can't keep the siglock while
1574                  * calling arch_ptrace_stop, so we must release it now.
1575                  * To preserve proper semantics, we must do this before
1576                  * any signal bookkeeping like checking group_stop_count.
1577                  * Meanwhile, a SIGKILL could come in before we retake the
1578                  * siglock.  That must prevent us from sleeping in TASK_TRACED.
1579                  * So after regaining the lock, we must check for SIGKILL.
1580                  */
1581                 spin_unlock_irq(&current->sighand->siglock);
1582                 arch_ptrace_stop(exit_code, info);
1583                 spin_lock_irq(&current->sighand->siglock);
1584                 if (sigkill_pending(current))
1585                         return;
1586         }
1587
1588         /*
1589          * If there is a group stop in progress,
1590          * we must participate in the bookkeeping.
1591          */
1592         if (current->signal->group_stop_count > 0)
1593                 --current->signal->group_stop_count;
1594
1595         current->last_siginfo = info;
1596         current->exit_code = exit_code;
1597
1598         /* Let the debugger run.  */
1599         __set_current_state(TASK_TRACED);
1600         spin_unlock_irq(&current->sighand->siglock);
1601         read_lock(&tasklist_lock);
1602         if (may_ptrace_stop()) {
1603                 do_notify_parent_cldstop(current, CLD_TRAPPED);
1604                 /*
1605                  * Don't want to allow preemption here, because
1606                  * sys_ptrace() needs this task to be inactive.
1607                  *
1608                  * XXX: implement read_unlock_no_resched().
1609                  */
1610                 preempt_disable();
1611                 read_unlock(&tasklist_lock);
1612                 preempt_enable_no_resched();
1613                 schedule();
1614         } else {
1615                 /*
1616                  * By the time we got the lock, our tracer went away.
1617                  * Don't drop the lock yet, another tracer may come.
1618                  */
1619                 __set_current_state(TASK_RUNNING);
1620                 if (clear_code)
1621                         current->exit_code = 0;
1622                 read_unlock(&tasklist_lock);
1623         }
1624
1625         /*
1626          * While in TASK_TRACED, we were considered "frozen enough".
1627          * Now that we woke up, it's crucial if we're supposed to be
1628          * frozen that we freeze now before running anything substantial.
1629          */
1630         try_to_freeze();
1631
1632         /*
1633          * We are back.  Now reacquire the siglock before touching
1634          * last_siginfo, so that we are sure to have synchronized with
1635          * any signal-sending on another CPU that wants to examine it.
1636          */
1637         spin_lock_irq(&current->sighand->siglock);
1638         current->last_siginfo = NULL;
1639
1640         /*
1641          * Queued signals ignored us while we were stopped for tracing.
1642          * So check for any that we should take before resuming user mode.
1643          * This sets TIF_SIGPENDING, but never clears it.
1644          */
1645         recalc_sigpending_tsk(current);
1646 }
1647
1648 void ptrace_notify(int exit_code)
1649 {
1650         siginfo_t info;
1651
1652         BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
1653
1654         memset(&info, 0, sizeof info);
1655         info.si_signo = SIGTRAP;
1656         info.si_code = exit_code;
1657         info.si_pid = task_pid_vnr(current);
1658         info.si_uid = current_uid();
1659
1660         /* Let the debugger run.  */
1661         spin_lock_irq(&current->sighand->siglock);
1662         ptrace_stop(exit_code, 1, &info);
1663         spin_unlock_irq(&current->sighand->siglock);
1664 }
1665
1666 static void
1667 finish_stop(int stop_count)
1668 {
1669         /*
1670          * If there are no other threads in the group, or if there is
1671          * a group stop in progress and we are the last to stop,
1672          * report to the parent.  When ptraced, every thread reports itself.
1673          */
1674         if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) {
1675                 read_lock(&tasklist_lock);
1676                 do_notify_parent_cldstop(current, CLD_STOPPED);
1677                 read_unlock(&tasklist_lock);
1678         }
1679
1680         do {
1681                 schedule();
1682         } while (try_to_freeze());
1683         /*
1684          * Now we don't run again until continued.
1685          */
1686         current->exit_code = 0;
1687 }
1688
1689 /*
1690  * This performs the stopping for SIGSTOP and other stop signals.
1691  * We have to stop all threads in the thread group.
1692  * Returns nonzero if we've actually stopped and released the siglock.
1693  * Returns zero if we didn't stop and still hold the siglock.
1694  */
1695 static int do_signal_stop(int signr)
1696 {
1697         struct signal_struct *sig = current->signal;
1698         int stop_count;
1699
1700         if (sig->group_stop_count > 0) {
1701                 /*
1702                  * There is a group stop in progress.  We don't need to
1703                  * start another one.
1704                  */
1705                 stop_count = --sig->group_stop_count;
1706         } else {
1707                 struct task_struct *t;
1708
1709                 if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
1710                     unlikely(signal_group_exit(sig)))
1711                         return 0;
1712                 /*
1713                  * There is no group stop already in progress.
1714                  * We must initiate one now.
1715                  */
1716                 sig->group_exit_code = signr;
1717
1718                 stop_count = 0;
1719                 for (t = next_thread(current); t != current; t = next_thread(t))
1720                         /*
1721                          * Setting state to TASK_STOPPED for a group
1722                          * stop is always done with the siglock held,
1723                          * so this check has no races.
1724                          */
1725                         if (!(t->flags & PF_EXITING) &&
1726                             !task_is_stopped_or_traced(t)) {
1727                                 stop_count++;
1728                                 signal_wake_up(t, 0);
1729                         }
1730                 sig->group_stop_count = stop_count;
1731         }
1732
1733         if (stop_count == 0)
1734                 sig->flags = SIGNAL_STOP_STOPPED;
1735         current->exit_code = sig->group_exit_code;
1736         __set_current_state(TASK_STOPPED);
1737
1738         spin_unlock_irq(&current->sighand->siglock);
1739         finish_stop(stop_count);
1740         return 1;
1741 }
1742
1743 static int ptrace_signal(int signr, siginfo_t *info,
1744                          struct pt_regs *regs, void *cookie)
1745 {
1746         if (!(current->ptrace & PT_PTRACED))
1747                 return signr;
1748
1749         ptrace_signal_deliver(regs, cookie);
1750
1751         /* Let the debugger run.  */
1752         ptrace_stop(signr, 0, info);
1753
1754         /* We're back.  Did the debugger cancel the sig?  */
1755         signr = current->exit_code;
1756         if (signr == 0)
1757                 return signr;
1758
1759         current->exit_code = 0;
1760
1761         /* Update the siginfo structure if the signal has
1762            changed.  If the debugger wanted something
1763            specific in the siginfo structure then it should
1764            have updated *info via PTRACE_SETSIGINFO.  */
1765         if (signr != info->si_signo) {
1766                 info->si_signo = signr;
1767                 info->si_errno = 0;
1768                 info->si_code = SI_USER;
1769                 info->si_pid = task_pid_vnr(current->parent);
1770                 info->si_uid = task_uid(current->parent);
1771         }
1772
1773         /* If the (new) signal is now blocked, requeue it.  */
1774         if (sigismember(&current->blocked, signr)) {
1775                 specific_send_sig_info(signr, info, current);
1776                 signr = 0;
1777         }
1778
1779         return signr;
1780 }
1781
1782 int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
1783                           struct pt_regs *regs, void *cookie)
1784 {
1785         struct sighand_struct *sighand = current->sighand;
1786         struct signal_struct *signal = current->signal;
1787         int signr;
1788
1789 relock:
1790         /*
1791          * We'll jump back here after any time we were stopped in TASK_STOPPED.
1792          * While in TASK_STOPPED, we were considered "frozen enough".
1793          * Now that we woke up, it's crucial if we're supposed to be
1794          * frozen that we freeze now before running anything substantial.
1795          */
1796         try_to_freeze();
1797
1798         spin_lock_irq(&sighand->siglock);
1799         /*
1800          * Every stopped thread goes here after wakeup. Check to see if
1801          * we should notify the parent, prepare_signal(SIGCONT) encodes
1802          * the CLD_ si_code into SIGNAL_CLD_MASK bits.
1803          */
1804         if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
1805                 int why = (signal->flags & SIGNAL_STOP_CONTINUED)
1806                                 ? CLD_CONTINUED : CLD_STOPPED;
1807                 signal->flags &= ~SIGNAL_CLD_MASK;
1808                 spin_unlock_irq(&sighand->siglock);
1809
1810                 if (unlikely(!tracehook_notify_jctl(1, why)))
1811                         goto relock;
1812
1813                 read_lock(&tasklist_lock);
1814                 do_notify_parent_cldstop(current->group_leader, why);
1815                 read_unlock(&tasklist_lock);
1816                 goto relock;
1817         }
1818
1819         for (;;) {
1820                 struct k_sigaction *ka;
1821
1822                 if (unlikely(signal->group_stop_count > 0) &&
1823                     do_signal_stop(0))
1824                         goto relock;
1825
1826                 /*
1827                  * Tracing can induce an artifical signal and choose sigaction.
1828                  * The return value in @signr determines the default action,
1829                  * but @info->si_signo is the signal number we will report.
1830                  */
1831                 signr = tracehook_get_signal(current, regs, info, return_ka);
1832                 if (unlikely(signr < 0))
1833                         goto relock;
1834                 if (unlikely(signr != 0))
1835                         ka = return_ka;
1836                 else {
1837                         signr = dequeue_signal(current, &current->blocked,
1838                                                info);
1839
1840                         if (!signr)
1841                                 break; /* will return 0 */
1842
1843                         if (signr != SIGKILL) {
1844                                 signr = ptrace_signal(signr, info,
1845                                                       regs, cookie);
1846                                 if (!signr)
1847                                         continue;
1848                         }
1849
1850                         ka = &sighand->action[signr-1];
1851                 }
1852
1853                 if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
1854                         continue;
1855                 if (ka->sa.sa_handler != SIG_DFL) {
1856                         /* Run the handler.  */
1857                         *return_ka = *ka;
1858
1859                         if (ka->sa.sa_flags & SA_ONESHOT)
1860                                 ka->sa.sa_handler = SIG_DFL;
1861
1862                         break; /* will return non-zero "signr" value */
1863                 }
1864
1865                 /*
1866                  * Now we are doing the default action for this signal.
1867                  */
1868                 if (sig_kernel_ignore(signr)) /* Default is nothing. */
1869                         continue;
1870
1871                 /*
1872                  * Global init gets no signals it doesn't want.
1873                  * Container-init gets no signals it doesn't want from same
1874                  * container.
1875                  *
1876                  * Note that if global/container-init sees a sig_kernel_only()
1877                  * signal here, the signal must have been generated internally
1878                  * or must have come from an ancestor namespace. In either
1879                  * case, the signal cannot be dropped.
1880                  */
1881                 if (unlikely(signal->flags & SIGNAL_UNKILLABLE) &&
1882                                 !sig_kernel_only(signr))
1883                         continue;
1884
1885                 if (sig_kernel_stop(signr)) {
1886                         /*
1887                          * The default action is to stop all threads in
1888                          * the thread group.  The job control signals
1889                          * do nothing in an orphaned pgrp, but SIGSTOP
1890                          * always works.  Note that siglock needs to be
1891                          * dropped during the call to is_orphaned_pgrp()
1892                          * because of lock ordering with tasklist_lock.
1893                          * This allows an intervening SIGCONT to be posted.
1894                          * We need to check for that and bail out if necessary.
1895                          */
1896                         if (signr != SIGSTOP) {
1897                                 spin_unlock_irq(&sighand->siglock);
1898
1899                                 /* signals can be posted during this window */
1900
1901                                 if (is_current_pgrp_orphaned())
1902                                         goto relock;
1903
1904                                 spin_lock_irq(&sighand->siglock);
1905                         }
1906
1907                         if (likely(do_signal_stop(info->si_signo))) {
1908                                 /* It released the siglock.  */
1909                                 goto relock;
1910                         }
1911
1912                         /*
1913                          * We didn't actually stop, due to a race
1914                          * with SIGCONT or something like that.
1915                          */
1916                         continue;
1917                 }
1918
1919                 spin_unlock_irq(&sighand->siglock);
1920
1921                 /*
1922                  * Anything else is fatal, maybe with a core dump.
1923                  */
1924                 current->flags |= PF_SIGNALED;
1925
1926                 if (sig_kernel_coredump(signr)) {
1927                         if (print_fatal_signals)
1928                                 print_fatal_signal(regs, info->si_signo);
1929                         /*
1930                          * If it was able to dump core, this kills all
1931                          * other threads in the group and synchronizes with
1932                          * their demise.  If we lost the race with another
1933                          * thread getting here, it set group_exit_code
1934                          * first and our do_group_exit call below will use
1935                          * that value and ignore the one we pass it.
1936                          */
1937                         do_coredump(info->si_signo, info->si_signo, regs);
1938                 }
1939
1940                 /*
1941                  * Death signals, no core dump.
1942                  */
1943                 do_group_exit(info->si_signo);
1944                 /* NOTREACHED */
1945         }
1946         spin_unlock_irq(&sighand->siglock);
1947         return signr;
1948 }
1949
1950 void exit_signals(struct task_struct *tsk)
1951 {
1952         int group_stop = 0;
1953         struct task_struct *t;
1954
1955         if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
1956                 tsk->flags |= PF_EXITING;
1957                 return;
1958         }
1959
1960         spin_lock_irq(&tsk->sighand->siglock);
1961         /*
1962          * From now this task is not visible for group-wide signals,
1963          * see wants_signal(), do_signal_stop().
1964          */
1965         tsk->flags |= PF_EXITING;
1966         if (!signal_pending(tsk))
1967                 goto out;
1968
1969         /* It could be that __group_complete_signal() choose us to
1970          * notify about group-wide signal. Another thread should be
1971          * woken now to take the signal since we will not.
1972          */
1973         for (t = tsk; (t = next_thread(t)) != tsk; )
1974                 if (!signal_pending(t) && !(t->flags & PF_EXITING))
1975                         recalc_sigpending_and_wake(t);
1976
1977         if (unlikely(tsk->signal->group_stop_count) &&
1978                         !--tsk->signal->group_stop_count) {
1979                 tsk->signal->flags = SIGNAL_STOP_STOPPED;
1980                 group_stop = 1;
1981         }
1982 out:
1983         spin_unlock_irq(&tsk->sighand->siglock);
1984
1985         if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) {
1986                 read_lock(&tasklist_lock);
1987                 do_notify_parent_cldstop(tsk, CLD_STOPPED);
1988                 read_unlock(&tasklist_lock);
1989         }
1990 }
1991
1992 EXPORT_SYMBOL(recalc_sigpending);
1993 EXPORT_SYMBOL_GPL(dequeue_signal);
1994 EXPORT_SYMBOL(flush_signals);
1995 EXPORT_SYMBOL(force_sig);
1996 EXPORT_SYMBOL(send_sig);
1997 EXPORT_SYMBOL(send_sig_info);
1998 EXPORT_SYMBOL(sigprocmask);
1999 EXPORT_SYMBOL(block_all_signals);
2000 EXPORT_SYMBOL(unblock_all_signals);
2001
2002
2003 /*
2004  * System call entry points.
2005  */
2006
2007 SYSCALL_DEFINE0(restart_syscall)
2008 {
2009         struct restart_block *restart = &current_thread_info()->restart_block;
2010         return restart->fn(restart);
2011 }
2012
2013 long do_no_restart_syscall(struct restart_block *param)
2014 {
2015         return -EINTR;
2016 }
2017
2018 /*
2019  * We don't need to get the kernel lock - this is all local to this
2020  * particular thread.. (and that's good, because this is _heavily_
2021  * used by various programs)
2022  */
2023
2024 /*
2025  * This is also useful for kernel threads that want to temporarily
2026  * (or permanently) block certain signals.
2027  *
2028  * NOTE! Unlike the user-mode sys_sigprocmask(), the kernel
2029  * interface happily blocks "unblockable" signals like SIGKILL
2030  * and friends.
2031  */
2032 int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
2033 {
2034         int error;
2035
2036         spin_lock_irq(&current->sighand->siglock);
2037         if (oldset)
2038                 *oldset = current->blocked;
2039
2040         error = 0;
2041         switch (how) {
2042         case SIG_BLOCK:
2043                 sigorsets(&current->blocked, &current->blocked, set);
2044                 break;
2045         case SIG_UNBLOCK:
2046                 signandsets(&current->blocked, &current->blocked, set);
2047                 break;
2048         case SIG_SETMASK:
2049                 current->blocked = *set;
2050                 break;
2051         default:
2052                 error = -EINVAL;
2053         }
2054         recalc_sigpending();
2055         spin_unlock_irq(&current->sighand->siglock);
2056
2057         return error;
2058 }
2059
2060 SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set,
2061                 sigset_t __user *, oset, size_t, sigsetsize)
2062 {
2063         int error = -EINVAL;
2064         sigset_t old_set, new_set;
2065
2066         /* XXX: Don't preclude handling different sized sigset_t's.  */
2067         if (sigsetsize != sizeof(sigset_t))
2068                 goto out;
2069
2070         if (set) {
2071                 error = -EFAULT;
2072                 if (copy_from_user(&new_set, set, sizeof(*set)))
2073                         goto out;
2074                 sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
2075
2076                 error = sigprocmask(how, &new_set, &old_set);
2077                 if (error)
2078                         goto out;
2079                 if (oset)
2080                         goto set_old;
2081         } else if (oset) {
2082                 spin_lock_irq(&current->sighand->siglock);
2083                 old_set = current->blocked;
2084                 spin_unlock_irq(&current->sighand->siglock);
2085
2086         set_old:
2087                 error = -EFAULT;
2088                 if (copy_to_user(oset, &old_set, sizeof(*oset)))
2089                         goto out;
2090         }
2091         error = 0;
2092 out:
2093         return error;
2094 }
2095
2096 long do_sigpending(void __user *set, unsigned long sigsetsize)
2097 {
2098         long error = -EINVAL;
2099         sigset_t pending;
2100
2101         if (sigsetsize > sizeof(sigset_t))
2102                 goto out;
2103
2104         spin_lock_irq(&current->sighand->siglock);
2105         sigorsets(&pending, &current->pending.signal,
2106                   &current->signal->shared_pending.signal);
2107         spin_unlock_irq(&current->sighand->siglock);
2108
2109         /* Outside the lock because only this thread touches it.  */
2110         sigandsets(&pending, &current->blocked, &pending);
2111
2112         error = -EFAULT;
2113         if (!copy_to_user(set, &pending, sigsetsize))
2114                 error = 0;
2115
2116 out:
2117         return error;
2118 }       
2119
2120 SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize)
2121 {
2122         return do_sigpending(set, sigsetsize);
2123 }
2124
2125 #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER
2126
2127 int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
2128 {
2129         int err;
2130
2131         if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
2132                 return -EFAULT;
2133         if (from->si_code < 0)
2134                 return __copy_to_user(to, from, sizeof(siginfo_t))
2135                         ? -EFAULT : 0;
2136         /*
2137          * If you change siginfo_t structure, please be sure
2138          * this code is fixed accordingly.
2139          * Please remember to update the signalfd_copyinfo() function
2140          * inside fs/signalfd.c too, in case siginfo_t changes.
2141          * It should never copy any pad contained in the structure
2142          * to avoid security leaks, but must copy the generic
2143          * 3 ints plus the relevant union member.
2144          */
2145         err = __put_user(from->si_signo, &to->si_signo);
2146         err |= __put_user(from->si_errno, &to->si_errno);
2147         err |= __put_user((short)from->si_code, &to->si_code);
2148         switch (from->si_code & __SI_MASK) {
2149         case __SI_KILL:
2150                 err |= __put_user(from->si_pid, &to->si_pid);
2151                 err |= __put_user(from->si_uid, &to->si_uid);
2152                 break;
2153         case __SI_TIMER:
2154                  err |= __put_user(from->si_tid, &to->si_tid);
2155                  err |= __put_user(from->si_overrun, &to->si_overrun);
2156                  err |= __put_user(from->si_ptr, &to->si_ptr);
2157                 break;
2158         case __SI_POLL:
2159                 err |= __put_user(from->si_band, &to->si_band);
2160                 err |= __put_user(from->si_fd, &to->si_fd);
2161                 break;
2162         case __SI_FAULT:
2163                 err |= __put_user(from->si_addr, &to->si_addr);
2164 #ifdef __ARCH_SI_TRAPNO
2165                 err |= __put_user(from->si_trapno, &to->si_trapno);
2166 #endif
2167                 break;
2168         case __SI_CHLD:
2169                 err |= __put_user(from->si_pid, &to->si_pid);
2170                 err |= __put_user(from->si_uid, &to->si_uid);
2171                 err |= __put_user(from->si_status, &to->si_status);
2172                 err |= __put_user(from->si_utime, &to->si_utime);
2173                 err |= __put_user(from->si_stime, &to->si_stime);
2174                 break;
2175         case __SI_RT: /* This is not generated by the kernel as of now. */
2176         case __SI_MESGQ: /* But this is */
2177                 err |= __put_user(from->si_pid, &to->si_pid);
2178                 err |= __put_user(from->si_uid, &to->si_uid);
2179                 err |= __put_user(from->si_ptr, &to->si_ptr);
2180                 break;
2181         default: /* this is just in case for now ... */
2182                 err |= __put_user(from->si_pid, &to->si_pid);
2183                 err |= __put_user(from->si_uid, &to->si_uid);
2184                 break;
2185         }
2186         return err;
2187 }
2188
2189 #endif
2190
2191 SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
2192                 siginfo_t __user *, uinfo, const struct timespec __user *, uts,
2193                 size_t, sigsetsize)
2194 {
2195         int ret, sig;
2196         sigset_t these;
2197         struct timespec ts;
2198         siginfo_t info;
2199         long timeout = 0;
2200
2201         /* XXX: Don't preclude handling different sized sigset_t's.  */
2202         if (sigsetsize != sizeof(sigset_t))
2203                 return -EINVAL;
2204
2205         if (copy_from_user(&these, uthese, sizeof(these)))
2206                 return -EFAULT;
2207                 
2208         /*
2209          * Invert the set of allowed signals to get those we
2210          * want to block.
2211          */
2212         sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP));
2213         signotset(&these);
2214
2215         if (uts) {
2216                 if (copy_from_user(&ts, uts, sizeof(ts)))
2217                         return -EFAULT;
2218                 if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0
2219                     || ts.tv_sec < 0)
2220                         return -EINVAL;
2221         }
2222
2223         spin_lock_irq(&current->sighand->siglock);
2224         sig = dequeue_signal(current, &these, &info);
2225         if (!sig) {
2226                 timeout = MAX_SCHEDULE_TIMEOUT;
2227                 if (uts)
2228                         timeout = (timespec_to_jiffies(&ts)
2229                                    + (ts.tv_sec || ts.tv_nsec));
2230
2231                 if (timeout) {
2232                         /* None ready -- temporarily unblock those we're
2233                          * interested while we are sleeping in so that we'll
2234                          * be awakened when they arrive.  */
2235                         current->real_blocked = current->blocked;
2236                         sigandsets(&current->blocked, &current->blocked, &these);
2237                         recalc_sigpending();
2238                         spin_unlock_irq(&current->sighand->siglock);
2239
2240                         timeout = schedule_timeout_interruptible(timeout);
2241
2242                         spin_lock_irq(&current->sighand->siglock);
2243                         sig = dequeue_signal(current, &these, &info);
2244                         current->blocked = current->real_blocked;
2245                         siginitset(&current->real_blocked, 0);
2246                         recalc_sigpending();
2247                 }
2248         }
2249         spin_unlock_irq(&current->sighand->siglock);
2250
2251         if (sig) {
2252                 ret = sig;
2253                 if (uinfo) {
2254                         if (copy_siginfo_to_user(uinfo, &info))
2255                                 ret = -EFAULT;
2256                 }
2257         } else {
2258                 ret = -EAGAIN;
2259                 if (timeout)
2260                         ret = -EINTR;
2261         }
2262
2263         return ret;
2264 }
2265
2266 SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
2267 {
2268         struct siginfo info;
2269
2270         info.si_signo = sig;
2271         info.si_errno = 0;
2272         info.si_code = SI_USER;
2273         info.si_pid = task_tgid_vnr(current);
2274         info.si_uid = current_uid();
2275
2276         return kill_something_info(sig, &info, pid);
2277 }
2278
2279 static int do_tkill(pid_t tgid, pid_t pid, int sig)
2280 {
2281         int error;
2282         struct siginfo info;
2283         struct task_struct *p;
2284         unsigned long flags;
2285
2286         error = -ESRCH;
2287         info.si_signo = sig;
2288         info.si_errno = 0;
2289         info.si_code = SI_TKILL;
2290         info.si_pid = task_tgid_vnr(current);
2291         info.si_uid = current_uid();
2292
2293         rcu_read_lock();
2294         p = find_task_by_vpid(pid);
2295         if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
2296                 error = check_kill_permission(sig, &info, p);
2297                 /*
2298                  * The null signal is a permissions and process existence
2299                  * probe.  No signal is actually delivered.
2300                  *
2301                  * If lock_task_sighand() fails we pretend the task dies
2302                  * after receiving the signal. The window is tiny, and the
2303                  * signal is private anyway.
2304                  */
2305                 if (!error && sig && lock_task_sighand(p, &flags)) {
2306                         error = specific_send_sig_info(sig, &info, p);
2307                         unlock_task_sighand(p, &flags);
2308                 }
2309         }
2310         rcu_read_unlock();
2311
2312         return error;
2313 }
2314
2315 /**
2316  *  sys_tgkill - send signal to one specific thread
2317  *  @tgid: the thread group ID of the thread
2318  *  @pid: the PID of the thread
2319  *  @sig: signal to be sent
2320  *
2321  *  This syscall also checks the @tgid and returns -ESRCH even if the PID
2322  *  exists but it's not belonging to the target process anymore. This
2323  *  method solves the problem of threads exiting and PIDs getting reused.
2324  */
2325 SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig)
2326 {
2327         /* This is only valid for single tasks */
2328         if (pid <= 0 || tgid <= 0)
2329                 return -EINVAL;
2330
2331         return do_tkill(tgid, pid, sig);
2332 }
2333
2334 /*
2335  *  Send a signal to only one task, even if it's a CLONE_THREAD task.
2336  */
2337 SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
2338 {
2339         /* This is only valid for single tasks */
2340         if (pid <= 0)
2341                 return -EINVAL;
2342
2343         return do_tkill(0, pid, sig);
2344 }
2345
2346 SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
2347                 siginfo_t __user *, uinfo)
2348 {
2349         siginfo_t info;
2350
2351         if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
2352                 return -EFAULT;
2353
2354         /* Not even root can pretend to send signals from the kernel.
2355            Nor can they impersonate a kill(), which adds source info.  */
2356         if (info.si_code >= 0)
2357                 return -EPERM;
2358         info.si_signo = sig;
2359
2360         /* POSIX.1b doesn't mention process groups.  */
2361         return kill_proc_info(sig, &info, pid);
2362 }
2363
2364 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
2365 {
2366         struct task_struct *t = current;
2367         struct k_sigaction *k;
2368         sigset_t mask;
2369
2370         if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig)))
2371                 return -EINVAL;
2372
2373         k = &t->sighand->action[sig-1];
2374
2375         spin_lock_irq(&current->sighand->siglock);
2376         if (oact)
2377                 *oact = *k;
2378
2379         if (act) {
2380                 sigdelsetmask(&act->sa.sa_mask,
2381                               sigmask(SIGKILL) | sigmask(SIGSTOP));
2382                 *k = *act;
2383                 /*
2384                  * POSIX 3.3.1.3:
2385                  *  "Setting a signal action to SIG_IGN for a signal that is
2386                  *   pending shall cause the pending signal to be discarded,
2387                  *   whether or not it is blocked."
2388                  *
2389                  *  "Setting a signal action to SIG_DFL for a signal that is
2390                  *   pending and whose default action is to ignore the signal
2391                  *   (for example, SIGCHLD), shall cause the pending signal to
2392                  *   be discarded, whether or not it is blocked"
2393                  */
2394                 if (sig_handler_ignored(sig_handler(t, sig), sig)) {
2395                         sigemptyset(&mask);
2396                         sigaddset(&mask, sig);
2397                         rm_from_queue_full(&mask, &t->signal->shared_pending);
2398                         do {
2399                                 rm_from_queue_full(&mask, &t->pending);
2400                                 t = next_thread(t);
2401                         } while (t != current);
2402                 }
2403         }
2404
2405         spin_unlock_irq(&current->sighand->siglock);
2406         return 0;
2407 }
2408
2409 int 
2410 do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long sp)
2411 {
2412         stack_t oss;
2413         int error;
2414
2415         if (uoss) {
2416                 oss.ss_sp = (void __user *) current->sas_ss_sp;
2417                 oss.ss_size = current->sas_ss_size;
2418                 oss.ss_flags = sas_ss_flags(sp);
2419         }
2420
2421         if (uss) {
2422                 void __user *ss_sp;
2423                 size_t ss_size;
2424                 int ss_flags;
2425
2426                 error = -EFAULT;
2427                 if (!access_ok(VERIFY_READ, uss, sizeof(*uss))
2428                     || __get_user(ss_sp, &uss->ss_sp)
2429                     || __get_user(ss_flags, &uss->ss_flags)
2430                     || __get_user(ss_size, &uss->ss_size))
2431                         goto out;
2432
2433                 error = -EPERM;
2434                 if (on_sig_stack(sp))
2435                         goto out;
2436
2437                 error = -EINVAL;
2438                 /*
2439                  *
2440                  * Note - this code used to test ss_flags incorrectly
2441                  *        old code may have been written using ss_flags==0
2442                  *        to mean ss_flags==SS_ONSTACK (as this was the only
2443                  *        way that worked) - this fix preserves that older
2444                  *        mechanism
2445                  */
2446                 if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0)
2447                         goto out;
2448
2449                 if (ss_flags == SS_DISABLE) {
2450                         ss_size = 0;
2451                         ss_sp = NULL;
2452                 } else {
2453                         error = -ENOMEM;
2454                         if (ss_size < MINSIGSTKSZ)
2455                                 goto out;
2456                 }
2457
2458                 current->sas_ss_sp = (unsigned long) ss_sp;
2459                 current->sas_ss_size = ss_size;
2460         }
2461
2462         if (uoss) {
2463                 error = -EFAULT;
2464                 if (copy_to_user(uoss, &oss, sizeof(oss)))
2465                         goto out;
2466         }
2467
2468         error = 0;
2469 out:
2470         return error;
2471 }
2472
2473 #ifdef __ARCH_WANT_SYS_SIGPENDING
2474
2475 SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
2476 {
2477         return do_sigpending(set, sizeof(*set));
2478 }
2479
2480 #endif
2481
2482 #ifdef __ARCH_WANT_SYS_SIGPROCMASK
2483 /* Some platforms have their own version with special arguments others
2484    support only sys_rt_sigprocmask.  */
2485
2486 SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set,
2487                 old_sigset_t __user *, oset)
2488 {
2489         int error;
2490         old_sigset_t old_set, new_set;
2491
2492         if (set) {
2493                 error = -EFAULT;
2494                 if (copy_from_user(&new_set, set, sizeof(*set)))
2495                         goto out;
2496                 new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP));
2497
2498                 spin_lock_irq(&current->sighand->siglock);
2499                 old_set = current->blocked.sig[0];
2500
2501                 error = 0;
2502                 switch (how) {
2503                 default:
2504                         error = -EINVAL;
2505                         break;
2506                 case SIG_BLOCK:
2507                         sigaddsetmask(&current->blocked, new_set);
2508                         break;
2509                 case SIG_UNBLOCK:
2510                         sigdelsetmask(&current->blocked, new_set);
2511                         break;
2512                 case SIG_SETMASK:
2513                         current->blocked.sig[0] = new_set;
2514                         break;
2515                 }
2516
2517                 recalc_sigpending();
2518                 spin_unlock_irq(&current->sighand->siglock);
2519                 if (error)
2520                         goto out;
2521                 if (oset)
2522                         goto set_old;
2523         } else if (oset) {
2524                 old_set = current->blocked.sig[0];
2525         set_old:
2526                 error = -EFAULT;
2527                 if (copy_to_user(oset, &old_set, sizeof(*oset)))
2528                         goto out;
2529         }
2530         error = 0;
2531 out:
2532         return error;
2533 }
2534 #endif /* __ARCH_WANT_SYS_SIGPROCMASK */
2535
2536 #ifdef __ARCH_WANT_SYS_RT_SIGACTION
2537 SYSCALL_DEFINE4(rt_sigaction, int, sig,
2538                 const struct sigaction __user *, act,
2539                 struct sigaction __user *, oact,
2540                 size_t, sigsetsize)
2541 {
2542         struct k_sigaction new_sa, old_sa;
2543         int ret = -EINVAL;
2544
2545         /* XXX: Don't preclude handling different sized sigset_t's.  */
2546         if (sigsetsize != sizeof(sigset_t))
2547                 goto out;
2548
2549         if (act) {
2550                 if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
2551                         return -EFAULT;
2552         }
2553
2554         ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
2555
2556         if (!ret && oact) {
2557                 if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
2558                         return -EFAULT;
2559         }
2560 out:
2561         return ret;
2562 }
2563 #endif /* __ARCH_WANT_SYS_RT_SIGACTION */
2564
2565 #ifdef __ARCH_WANT_SYS_SGETMASK
2566
2567 /*
2568  * For backwards compatibility.  Functionality superseded by sigprocmask.
2569  */
2570 SYSCALL_DEFINE0(sgetmask)
2571 {
2572         /* SMP safe */
2573         return current->blocked.sig[0];
2574 }
2575
2576 SYSCALL_DEFINE1(ssetmask, int, newmask)
2577 {
2578         int old;
2579
2580         spin_lock_irq(&current->sighand->siglock);
2581         old = current->blocked.sig[0];
2582
2583         siginitset(&current->blocked, newmask & ~(sigmask(SIGKILL)|
2584                                                   sigmask(SIGSTOP)));
2585         recalc_sigpending();
2586         spin_unlock_irq(&current->sighand->siglock);
2587
2588         return old;
2589 }
2590 #endif /* __ARCH_WANT_SGETMASK */
2591
2592 #ifdef __ARCH_WANT_SYS_SIGNAL
2593 /*
2594  * For backwards compatibility.  Functionality superseded by sigaction.
2595  */
2596 SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler)
2597 {
2598         struct k_sigaction new_sa, old_sa;
2599         int ret;
2600
2601         new_sa.sa.sa_handler = handler;
2602         new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
2603         sigemptyset(&new_sa.sa.sa_mask);
2604
2605         ret = do_sigaction(sig, &new_sa, &old_sa);
2606
2607         return ret ? ret : (unsigned long)old_sa.sa.sa_handler;
2608 }
2609 #endif /* __ARCH_WANT_SYS_SIGNAL */
2610
2611 #ifdef __ARCH_WANT_SYS_PAUSE
2612
2613 SYSCALL_DEFINE0(pause)
2614 {
2615         current->state = TASK_INTERRUPTIBLE;
2616         schedule();
2617         return -ERESTARTNOHAND;
2618 }
2619
2620 #endif
2621
2622 #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND
2623 SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
2624 {
2625         sigset_t newset;
2626
2627         /* XXX: Don't preclude handling different sized sigset_t's.  */
2628         if (sigsetsize != sizeof(sigset_t))
2629                 return -EINVAL;
2630
2631         if (copy_from_user(&newset, unewset, sizeof(newset)))
2632                 return -EFAULT;
2633         sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP));
2634
2635         spin_lock_irq(&current->sighand->siglock);
2636         current->saved_sigmask = current->blocked;
2637         current->blocked = newset;
2638         recalc_sigpending();
2639         spin_unlock_irq(&current->sighand->siglock);
2640
2641         current->state = TASK_INTERRUPTIBLE;
2642         schedule();
2643         set_restore_sigmask();
2644         return -ERESTARTNOHAND;
2645 }
2646 #endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */
2647
2648 __attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
2649 {
2650         return NULL;
2651 }
2652
2653 void __init signals_init(void)
2654 {
2655         sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
2656 }