make sys_poll() wait at least timeout ms
[linux-2.6] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/compiler.h>
35 #include <linux/highmem.h>
36 #include <linux/pagemap.h>
37 #include <linux/security.h>
38 #include <linux/syscalls.h>
39 #include <linux/random.h>
40 #include <linux/elf.h>
41 #include <linux/utsname.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
49                                 int, int, unsigned long);
50
51 /*
52  * If we don't support core dumping, then supply a NULL so we
53  * don't even try.
54  */
55 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
56 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
57 #else
58 #define elf_core_dump   NULL
59 #endif
60
61 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
62 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
63 #else
64 #define ELF_MIN_ALIGN   PAGE_SIZE
65 #endif
66
67 #ifndef ELF_CORE_EFLAGS
68 #define ELF_CORE_EFLAGS 0
69 #endif
70
71 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
72 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
73 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
74
75 static struct linux_binfmt elf_format = {
76                 .module         = THIS_MODULE,
77                 .load_binary    = load_elf_binary,
78                 .load_shlib     = load_elf_library,
79                 .core_dump      = elf_core_dump,
80                 .min_coredump   = ELF_EXEC_PAGESIZE,
81                 .hasvdso        = 1
82 };
83
84 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
85
86 static int set_brk(unsigned long start, unsigned long end)
87 {
88         start = ELF_PAGEALIGN(start);
89         end = ELF_PAGEALIGN(end);
90         if (end > start) {
91                 unsigned long addr;
92                 down_write(&current->mm->mmap_sem);
93                 addr = do_brk(start, end - start);
94                 up_write(&current->mm->mmap_sem);
95                 if (BAD_ADDR(addr))
96                         return addr;
97         }
98         current->mm->start_brk = current->mm->brk = end;
99         return 0;
100 }
101
102 /* We need to explicitly zero any fractional pages
103    after the data section (i.e. bss).  This would
104    contain the junk from the file that should not
105    be in memory
106  */
107 static int padzero(unsigned long elf_bss)
108 {
109         unsigned long nbyte;
110
111         nbyte = ELF_PAGEOFFSET(elf_bss);
112         if (nbyte) {
113                 nbyte = ELF_MIN_ALIGN - nbyte;
114                 if (clear_user((void __user *) elf_bss, nbyte))
115                         return -EFAULT;
116         }
117         return 0;
118 }
119
120 /* Let's use some macros to make this stack manipulation a little clearer */
121 #ifdef CONFIG_STACK_GROWSUP
122 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
123 #define STACK_ROUND(sp, items) \
124         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
125 #define STACK_ALLOC(sp, len) ({ \
126         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
127         old_sp; })
128 #else
129 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
130 #define STACK_ROUND(sp, items) \
131         (((unsigned long) (sp - items)) &~ 15UL)
132 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
133 #endif
134
135 static int
136 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
137                 int interp_aout, unsigned long load_addr,
138                 unsigned long interp_load_addr)
139 {
140         unsigned long p = bprm->p;
141         int argc = bprm->argc;
142         int envc = bprm->envc;
143         elf_addr_t __user *argv;
144         elf_addr_t __user *envp;
145         elf_addr_t __user *sp;
146         elf_addr_t __user *u_platform;
147         const char *k_platform = ELF_PLATFORM;
148         int items;
149         elf_addr_t *elf_info;
150         int ei_index = 0;
151         struct task_struct *tsk = current;
152         struct vm_area_struct *vma;
153
154         /*
155          * In some cases (e.g. Hyper-Threading), we want to avoid L1
156          * evictions by the processes running on the same package. One
157          * thing we can do is to shuffle the initial stack for them.
158          */
159
160         p = arch_align_stack(p);
161
162         /*
163          * If this architecture has a platform capability string, copy it
164          * to userspace.  In some cases (Sparc), this info is impossible
165          * for userspace to get any other way, in others (i386) it is
166          * merely difficult.
167          */
168         u_platform = NULL;
169         if (k_platform) {
170                 size_t len = strlen(k_platform) + 1;
171
172                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
173                 if (__copy_to_user(u_platform, k_platform, len))
174                         return -EFAULT;
175         }
176
177         /* Create the ELF interpreter info */
178         elf_info = (elf_addr_t *)current->mm->saved_auxv;
179         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
180 #define NEW_AUX_ENT(id, val) \
181         do { \
182                 elf_info[ei_index++] = id; \
183                 elf_info[ei_index++] = val; \
184         } while (0)
185
186 #ifdef ARCH_DLINFO
187         /* 
188          * ARCH_DLINFO must come first so PPC can do its special alignment of
189          * AUXV.
190          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
191          * ARCH_DLINFO changes
192          */
193         ARCH_DLINFO;
194 #endif
195         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
196         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
197         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
198         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
199         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
200         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
201         NEW_AUX_ENT(AT_BASE, interp_load_addr);
202         NEW_AUX_ENT(AT_FLAGS, 0);
203         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
204         NEW_AUX_ENT(AT_UID, tsk->uid);
205         NEW_AUX_ENT(AT_EUID, tsk->euid);
206         NEW_AUX_ENT(AT_GID, tsk->gid);
207         NEW_AUX_ENT(AT_EGID, tsk->egid);
208         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
209         if (k_platform) {
210                 NEW_AUX_ENT(AT_PLATFORM,
211                             (elf_addr_t)(unsigned long)u_platform);
212         }
213         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
214                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
215         }
216 #undef NEW_AUX_ENT
217         /* AT_NULL is zero; clear the rest too */
218         memset(&elf_info[ei_index], 0,
219                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
220
221         /* And advance past the AT_NULL entry.  */
222         ei_index += 2;
223
224         sp = STACK_ADD(p, ei_index);
225
226         items = (argc + 1) + (envc + 1);
227         if (interp_aout) {
228                 items += 3; /* a.out interpreters require argv & envp too */
229         } else {
230                 items += 1; /* ELF interpreters only put argc on the stack */
231         }
232         bprm->p = STACK_ROUND(sp, items);
233
234         /* Point sp at the lowest address on the stack */
235 #ifdef CONFIG_STACK_GROWSUP
236         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
237         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
238 #else
239         sp = (elf_addr_t __user *)bprm->p;
240 #endif
241
242
243         /*
244          * Grow the stack manually; some architectures have a limit on how
245          * far ahead a user-space access may be in order to grow the stack.
246          */
247         vma = find_extend_vma(current->mm, bprm->p);
248         if (!vma)
249                 return -EFAULT;
250
251         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
252         if (__put_user(argc, sp++))
253                 return -EFAULT;
254         if (interp_aout) {
255                 argv = sp + 2;
256                 envp = argv + argc + 1;
257                 if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
258                     __put_user((elf_addr_t)(unsigned long)envp, sp++))
259                         return -EFAULT;
260         } else {
261                 argv = sp;
262                 envp = argv + argc + 1;
263         }
264
265         /* Populate argv and envp */
266         p = current->mm->arg_end = current->mm->arg_start;
267         while (argc-- > 0) {
268                 size_t len;
269                 if (__put_user((elf_addr_t)p, argv++))
270                         return -EFAULT;
271                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
272                 if (!len || len > MAX_ARG_STRLEN)
273                         return 0;
274                 p += len;
275         }
276         if (__put_user(0, argv))
277                 return -EFAULT;
278         current->mm->arg_end = current->mm->env_start = p;
279         while (envc-- > 0) {
280                 size_t len;
281                 if (__put_user((elf_addr_t)p, envp++))
282                         return -EFAULT;
283                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
284                 if (!len || len > MAX_ARG_STRLEN)
285                         return 0;
286                 p += len;
287         }
288         if (__put_user(0, envp))
289                 return -EFAULT;
290         current->mm->env_end = p;
291
292         /* Put the elf_info on the stack in the right place.  */
293         sp = (elf_addr_t __user *)envp + 1;
294         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
295                 return -EFAULT;
296         return 0;
297 }
298
299 #ifndef elf_map
300
301 static unsigned long elf_map(struct file *filep, unsigned long addr,
302                 struct elf_phdr *eppnt, int prot, int type,
303                 unsigned long total_size)
304 {
305         unsigned long map_addr;
306         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
307         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
308         addr = ELF_PAGESTART(addr);
309         size = ELF_PAGEALIGN(size);
310
311         /* mmap() will return -EINVAL if given a zero size, but a
312          * segment with zero filesize is perfectly valid */
313         if (!size)
314                 return addr;
315
316         down_write(&current->mm->mmap_sem);
317         /*
318         * total_size is the size of the ELF (interpreter) image.
319         * The _first_ mmap needs to know the full size, otherwise
320         * randomization might put this image into an overlapping
321         * position with the ELF binary image. (since size < total_size)
322         * So we first map the 'big' image - and unmap the remainder at
323         * the end. (which unmap is needed for ELF images with holes.)
324         */
325         if (total_size) {
326                 total_size = ELF_PAGEALIGN(total_size);
327                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
328                 if (!BAD_ADDR(map_addr))
329                         do_munmap(current->mm, map_addr+size, total_size-size);
330         } else
331                 map_addr = do_mmap(filep, addr, size, prot, type, off);
332
333         up_write(&current->mm->mmap_sem);
334         return(map_addr);
335 }
336
337 #endif /* !elf_map */
338
339 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
340 {
341         int i, first_idx = -1, last_idx = -1;
342
343         for (i = 0; i < nr; i++) {
344                 if (cmds[i].p_type == PT_LOAD) {
345                         last_idx = i;
346                         if (first_idx == -1)
347                                 first_idx = i;
348                 }
349         }
350         if (first_idx == -1)
351                 return 0;
352
353         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
354                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
355 }
356
357
358 /* This is much more generalized than the library routine read function,
359    so we keep this separate.  Technically the library read function
360    is only provided so that we can read a.out libraries that have
361    an ELF header */
362
363 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
364                 struct file *interpreter, unsigned long *interp_map_addr,
365                 unsigned long no_base)
366 {
367         struct elf_phdr *elf_phdata;
368         struct elf_phdr *eppnt;
369         unsigned long load_addr = 0;
370         int load_addr_set = 0;
371         unsigned long last_bss = 0, elf_bss = 0;
372         unsigned long error = ~0UL;
373         unsigned long total_size;
374         int retval, i, size;
375
376         /* First of all, some simple consistency checks */
377         if (interp_elf_ex->e_type != ET_EXEC &&
378             interp_elf_ex->e_type != ET_DYN)
379                 goto out;
380         if (!elf_check_arch(interp_elf_ex))
381                 goto out;
382         if (!interpreter->f_op || !interpreter->f_op->mmap)
383                 goto out;
384
385         /*
386          * If the size of this structure has changed, then punt, since
387          * we will be doing the wrong thing.
388          */
389         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
390                 goto out;
391         if (interp_elf_ex->e_phnum < 1 ||
392                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
393                 goto out;
394
395         /* Now read in all of the header information */
396         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
397         if (size > ELF_MIN_ALIGN)
398                 goto out;
399         elf_phdata = kmalloc(size, GFP_KERNEL);
400         if (!elf_phdata)
401                 goto out;
402
403         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
404                              (char *)elf_phdata,size);
405         error = -EIO;
406         if (retval != size) {
407                 if (retval < 0)
408                         error = retval; 
409                 goto out_close;
410         }
411
412         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
413         if (!total_size) {
414                 error = -EINVAL;
415                 goto out_close;
416         }
417
418         eppnt = elf_phdata;
419         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
420                 if (eppnt->p_type == PT_LOAD) {
421                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
422                         int elf_prot = 0;
423                         unsigned long vaddr = 0;
424                         unsigned long k, map_addr;
425
426                         if (eppnt->p_flags & PF_R)
427                                 elf_prot = PROT_READ;
428                         if (eppnt->p_flags & PF_W)
429                                 elf_prot |= PROT_WRITE;
430                         if (eppnt->p_flags & PF_X)
431                                 elf_prot |= PROT_EXEC;
432                         vaddr = eppnt->p_vaddr;
433                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
434                                 elf_type |= MAP_FIXED;
435                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
436                                 load_addr = -vaddr;
437
438                         map_addr = elf_map(interpreter, load_addr + vaddr,
439                                         eppnt, elf_prot, elf_type, total_size);
440                         total_size = 0;
441                         if (!*interp_map_addr)
442                                 *interp_map_addr = map_addr;
443                         error = map_addr;
444                         if (BAD_ADDR(map_addr))
445                                 goto out_close;
446
447                         if (!load_addr_set &&
448                             interp_elf_ex->e_type == ET_DYN) {
449                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
450                                 load_addr_set = 1;
451                         }
452
453                         /*
454                          * Check to see if the section's size will overflow the
455                          * allowed task size. Note that p_filesz must always be
456                          * <= p_memsize so it's only necessary to check p_memsz.
457                          */
458                         k = load_addr + eppnt->p_vaddr;
459                         if (BAD_ADDR(k) ||
460                             eppnt->p_filesz > eppnt->p_memsz ||
461                             eppnt->p_memsz > TASK_SIZE ||
462                             TASK_SIZE - eppnt->p_memsz < k) {
463                                 error = -ENOMEM;
464                                 goto out_close;
465                         }
466
467                         /*
468                          * Find the end of the file mapping for this phdr, and
469                          * keep track of the largest address we see for this.
470                          */
471                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
472                         if (k > elf_bss)
473                                 elf_bss = k;
474
475                         /*
476                          * Do the same thing for the memory mapping - between
477                          * elf_bss and last_bss is the bss section.
478                          */
479                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
480                         if (k > last_bss)
481                                 last_bss = k;
482                 }
483         }
484
485         /*
486          * Now fill out the bss section.  First pad the last page up
487          * to the page boundary, and then perform a mmap to make sure
488          * that there are zero-mapped pages up to and including the 
489          * last bss page.
490          */
491         if (padzero(elf_bss)) {
492                 error = -EFAULT;
493                 goto out_close;
494         }
495
496         /* What we have mapped so far */
497         elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
498
499         /* Map the last of the bss segment */
500         if (last_bss > elf_bss) {
501                 down_write(&current->mm->mmap_sem);
502                 error = do_brk(elf_bss, last_bss - elf_bss);
503                 up_write(&current->mm->mmap_sem);
504                 if (BAD_ADDR(error))
505                         goto out_close;
506         }
507
508         error = load_addr;
509
510 out_close:
511         kfree(elf_phdata);
512 out:
513         return error;
514 }
515
516 static unsigned long load_aout_interp(struct exec *interp_ex,
517                 struct file *interpreter)
518 {
519         unsigned long text_data, elf_entry = ~0UL;
520         char __user * addr;
521         loff_t offset;
522
523         current->mm->end_code = interp_ex->a_text;
524         text_data = interp_ex->a_text + interp_ex->a_data;
525         current->mm->end_data = text_data;
526         current->mm->brk = interp_ex->a_bss + text_data;
527
528         switch (N_MAGIC(*interp_ex)) {
529         case OMAGIC:
530                 offset = 32;
531                 addr = (char __user *)0;
532                 break;
533         case ZMAGIC:
534         case QMAGIC:
535                 offset = N_TXTOFF(*interp_ex);
536                 addr = (char __user *)N_TXTADDR(*interp_ex);
537                 break;
538         default:
539                 goto out;
540         }
541
542         down_write(&current->mm->mmap_sem);     
543         do_brk(0, text_data);
544         up_write(&current->mm->mmap_sem);
545         if (!interpreter->f_op || !interpreter->f_op->read)
546                 goto out;
547         if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
548                 goto out;
549         flush_icache_range((unsigned long)addr,
550                            (unsigned long)addr + text_data);
551
552         down_write(&current->mm->mmap_sem);     
553         do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
554                 interp_ex->a_bss);
555         up_write(&current->mm->mmap_sem);
556         elf_entry = interp_ex->a_entry;
557
558 out:
559         return elf_entry;
560 }
561
562 /*
563  * These are the functions used to load ELF style executables and shared
564  * libraries.  There is no binary dependent code anywhere else.
565  */
566
567 #define INTERPRETER_NONE 0
568 #define INTERPRETER_AOUT 1
569 #define INTERPRETER_ELF 2
570
571 #ifndef STACK_RND_MASK
572 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
573 #endif
574
575 static unsigned long randomize_stack_top(unsigned long stack_top)
576 {
577         unsigned int random_variable = 0;
578
579         if ((current->flags & PF_RANDOMIZE) &&
580                 !(current->personality & ADDR_NO_RANDOMIZE)) {
581                 random_variable = get_random_int() & STACK_RND_MASK;
582                 random_variable <<= PAGE_SHIFT;
583         }
584 #ifdef CONFIG_STACK_GROWSUP
585         return PAGE_ALIGN(stack_top) + random_variable;
586 #else
587         return PAGE_ALIGN(stack_top) - random_variable;
588 #endif
589 }
590
591 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
592 {
593         struct file *interpreter = NULL; /* to shut gcc up */
594         unsigned long load_addr = 0, load_bias = 0;
595         int load_addr_set = 0;
596         char * elf_interpreter = NULL;
597         unsigned int interpreter_type = INTERPRETER_NONE;
598         unsigned long error;
599         struct elf_phdr *elf_ppnt, *elf_phdata;
600         unsigned long elf_bss, elf_brk;
601         int elf_exec_fileno;
602         int retval, i;
603         unsigned int size;
604         unsigned long elf_entry;
605         unsigned long interp_load_addr = 0;
606         unsigned long start_code, end_code, start_data, end_data;
607         unsigned long reloc_func_desc = 0;
608         char passed_fileno[6];
609         struct files_struct *files;
610         int executable_stack = EXSTACK_DEFAULT;
611         unsigned long def_flags = 0;
612         struct {
613                 struct elfhdr elf_ex;
614                 struct elfhdr interp_elf_ex;
615                 struct exec interp_ex;
616         } *loc;
617
618         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
619         if (!loc) {
620                 retval = -ENOMEM;
621                 goto out_ret;
622         }
623         
624         /* Get the exec-header */
625         loc->elf_ex = *((struct elfhdr *)bprm->buf);
626
627         retval = -ENOEXEC;
628         /* First of all, some simple consistency checks */
629         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
630                 goto out;
631
632         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
633                 goto out;
634         if (!elf_check_arch(&loc->elf_ex))
635                 goto out;
636         if (!bprm->file->f_op||!bprm->file->f_op->mmap)
637                 goto out;
638
639         /* Now read in all of the header information */
640         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
641                 goto out;
642         if (loc->elf_ex.e_phnum < 1 ||
643                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
644                 goto out;
645         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
646         retval = -ENOMEM;
647         elf_phdata = kmalloc(size, GFP_KERNEL);
648         if (!elf_phdata)
649                 goto out;
650
651         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
652                              (char *)elf_phdata, size);
653         if (retval != size) {
654                 if (retval >= 0)
655                         retval = -EIO;
656                 goto out_free_ph;
657         }
658
659         files = current->files; /* Refcounted so ok */
660         retval = unshare_files();
661         if (retval < 0)
662                 goto out_free_ph;
663         if (files == current->files) {
664                 put_files_struct(files);
665                 files = NULL;
666         }
667
668         /* exec will make our files private anyway, but for the a.out
669            loader stuff we need to do it earlier */
670         retval = get_unused_fd();
671         if (retval < 0)
672                 goto out_free_fh;
673         get_file(bprm->file);
674         fd_install(elf_exec_fileno = retval, bprm->file);
675
676         elf_ppnt = elf_phdata;
677         elf_bss = 0;
678         elf_brk = 0;
679
680         start_code = ~0UL;
681         end_code = 0;
682         start_data = 0;
683         end_data = 0;
684
685         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
686                 if (elf_ppnt->p_type == PT_INTERP) {
687                         /* This is the program interpreter used for
688                          * shared libraries - for now assume that this
689                          * is an a.out format binary
690                          */
691                         retval = -ENOEXEC;
692                         if (elf_ppnt->p_filesz > PATH_MAX || 
693                             elf_ppnt->p_filesz < 2)
694                                 goto out_free_file;
695
696                         retval = -ENOMEM;
697                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
698                                                   GFP_KERNEL);
699                         if (!elf_interpreter)
700                                 goto out_free_file;
701
702                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
703                                              elf_interpreter,
704                                              elf_ppnt->p_filesz);
705                         if (retval != elf_ppnt->p_filesz) {
706                                 if (retval >= 0)
707                                         retval = -EIO;
708                                 goto out_free_interp;
709                         }
710                         /* make sure path is NULL terminated */
711                         retval = -ENOEXEC;
712                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
713                                 goto out_free_interp;
714
715                         /*
716                          * The early SET_PERSONALITY here is so that the lookup
717                          * for the interpreter happens in the namespace of the 
718                          * to-be-execed image.  SET_PERSONALITY can select an
719                          * alternate root.
720                          *
721                          * However, SET_PERSONALITY is NOT allowed to switch
722                          * this task into the new images's memory mapping
723                          * policy - that is, TASK_SIZE must still evaluate to
724                          * that which is appropriate to the execing application.
725                          * This is because exit_mmap() needs to have TASK_SIZE
726                          * evaluate to the size of the old image.
727                          *
728                          * So if (say) a 64-bit application is execing a 32-bit
729                          * application it is the architecture's responsibility
730                          * to defer changing the value of TASK_SIZE until the
731                          * switch really is going to happen - do this in
732                          * flush_thread().      - akpm
733                          */
734                         SET_PERSONALITY(loc->elf_ex, 0);
735
736                         interpreter = open_exec(elf_interpreter);
737                         retval = PTR_ERR(interpreter);
738                         if (IS_ERR(interpreter))
739                                 goto out_free_interp;
740
741                         /*
742                          * If the binary is not readable then enforce
743                          * mm->dumpable = 0 regardless of the interpreter's
744                          * permissions.
745                          */
746                         if (file_permission(interpreter, MAY_READ) < 0)
747                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
748
749                         retval = kernel_read(interpreter, 0, bprm->buf,
750                                              BINPRM_BUF_SIZE);
751                         if (retval != BINPRM_BUF_SIZE) {
752                                 if (retval >= 0)
753                                         retval = -EIO;
754                                 goto out_free_dentry;
755                         }
756
757                         /* Get the exec headers */
758                         loc->interp_ex = *((struct exec *)bprm->buf);
759                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
760                         break;
761                 }
762                 elf_ppnt++;
763         }
764
765         elf_ppnt = elf_phdata;
766         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
767                 if (elf_ppnt->p_type == PT_GNU_STACK) {
768                         if (elf_ppnt->p_flags & PF_X)
769                                 executable_stack = EXSTACK_ENABLE_X;
770                         else
771                                 executable_stack = EXSTACK_DISABLE_X;
772                         break;
773                 }
774
775         /* Some simple consistency checks for the interpreter */
776         if (elf_interpreter) {
777                 static int warn;
778                 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
779
780                 /* Now figure out which format our binary is */
781                 if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
782                     (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
783                     (N_MAGIC(loc->interp_ex) != QMAGIC))
784                         interpreter_type = INTERPRETER_ELF;
785
786                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
787                         interpreter_type &= ~INTERPRETER_ELF;
788
789                 if (interpreter_type == INTERPRETER_AOUT && warn < 10) {
790                         printk(KERN_WARNING "a.out ELF interpreter %s is "
791                                 "deprecated and will not be supported "
792                                 "after Linux 2.6.25\n", elf_interpreter);
793                         warn++;
794                 }
795
796                 retval = -ELIBBAD;
797                 if (!interpreter_type)
798                         goto out_free_dentry;
799
800                 /* Make sure only one type was selected */
801                 if ((interpreter_type & INTERPRETER_ELF) &&
802                      interpreter_type != INTERPRETER_ELF) {
803                         // FIXME - ratelimit this before re-enabling
804                         // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
805                         interpreter_type = INTERPRETER_ELF;
806                 }
807                 /* Verify the interpreter has a valid arch */
808                 if ((interpreter_type == INTERPRETER_ELF) &&
809                     !elf_check_arch(&loc->interp_elf_ex))
810                         goto out_free_dentry;
811         } else {
812                 /* Executables without an interpreter also need a personality  */
813                 SET_PERSONALITY(loc->elf_ex, 0);
814         }
815
816         /* OK, we are done with that, now set up the arg stuff,
817            and then start this sucker up */
818         if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
819                 char *passed_p = passed_fileno;
820                 sprintf(passed_fileno, "%d", elf_exec_fileno);
821
822                 if (elf_interpreter) {
823                         retval = copy_strings_kernel(1, &passed_p, bprm);
824                         if (retval)
825                                 goto out_free_dentry; 
826                         bprm->argc++;
827                 }
828         }
829
830         /* Flush all traces of the currently running executable */
831         retval = flush_old_exec(bprm);
832         if (retval)
833                 goto out_free_dentry;
834
835         /* Discard our unneeded old files struct */
836         if (files) {
837                 put_files_struct(files);
838                 files = NULL;
839         }
840
841         /* OK, This is the point of no return */
842         current->flags &= ~PF_FORKNOEXEC;
843         current->mm->def_flags = def_flags;
844
845         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
846            may depend on the personality.  */
847         SET_PERSONALITY(loc->elf_ex, 0);
848         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
849                 current->personality |= READ_IMPLIES_EXEC;
850
851         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
852                 current->flags |= PF_RANDOMIZE;
853         arch_pick_mmap_layout(current->mm);
854
855         /* Do this so that we can load the interpreter, if need be.  We will
856            change some of these later */
857         current->mm->free_area_cache = current->mm->mmap_base;
858         current->mm->cached_hole_size = 0;
859         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
860                                  executable_stack);
861         if (retval < 0) {
862                 send_sig(SIGKILL, current, 0);
863                 goto out_free_dentry;
864         }
865         
866         current->mm->start_stack = bprm->p;
867
868         /* Now we do a little grungy work by mmaping the ELF image into
869            the correct location in memory. */
870         for(i = 0, elf_ppnt = elf_phdata;
871             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
872                 int elf_prot = 0, elf_flags;
873                 unsigned long k, vaddr;
874
875                 if (elf_ppnt->p_type != PT_LOAD)
876                         continue;
877
878                 if (unlikely (elf_brk > elf_bss)) {
879                         unsigned long nbyte;
880                     
881                         /* There was a PT_LOAD segment with p_memsz > p_filesz
882                            before this one. Map anonymous pages, if needed,
883                            and clear the area.  */
884                         retval = set_brk (elf_bss + load_bias,
885                                           elf_brk + load_bias);
886                         if (retval) {
887                                 send_sig(SIGKILL, current, 0);
888                                 goto out_free_dentry;
889                         }
890                         nbyte = ELF_PAGEOFFSET(elf_bss);
891                         if (nbyte) {
892                                 nbyte = ELF_MIN_ALIGN - nbyte;
893                                 if (nbyte > elf_brk - elf_bss)
894                                         nbyte = elf_brk - elf_bss;
895                                 if (clear_user((void __user *)elf_bss +
896                                                         load_bias, nbyte)) {
897                                         /*
898                                          * This bss-zeroing can fail if the ELF
899                                          * file specifies odd protections. So
900                                          * we don't check the return value
901                                          */
902                                 }
903                         }
904                 }
905
906                 if (elf_ppnt->p_flags & PF_R)
907                         elf_prot |= PROT_READ;
908                 if (elf_ppnt->p_flags & PF_W)
909                         elf_prot |= PROT_WRITE;
910                 if (elf_ppnt->p_flags & PF_X)
911                         elf_prot |= PROT_EXEC;
912
913                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
914
915                 vaddr = elf_ppnt->p_vaddr;
916                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
917                         elf_flags |= MAP_FIXED;
918                 } else if (loc->elf_ex.e_type == ET_DYN) {
919                         /* Try and get dynamic programs out of the way of the
920                          * default mmap base, as well as whatever program they
921                          * might try to exec.  This is because the brk will
922                          * follow the loader, and is not movable.  */
923 #ifdef CONFIG_X86
924                         load_bias = 0;
925 #else
926                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
927 #endif
928                 }
929
930                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
931                                 elf_prot, elf_flags, 0);
932                 if (BAD_ADDR(error)) {
933                         send_sig(SIGKILL, current, 0);
934                         retval = IS_ERR((void *)error) ?
935                                 PTR_ERR((void*)error) : -EINVAL;
936                         goto out_free_dentry;
937                 }
938
939                 if (!load_addr_set) {
940                         load_addr_set = 1;
941                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
942                         if (loc->elf_ex.e_type == ET_DYN) {
943                                 load_bias += error -
944                                              ELF_PAGESTART(load_bias + vaddr);
945                                 load_addr += load_bias;
946                                 reloc_func_desc = load_bias;
947                         }
948                 }
949                 k = elf_ppnt->p_vaddr;
950                 if (k < start_code)
951                         start_code = k;
952                 if (start_data < k)
953                         start_data = k;
954
955                 /*
956                  * Check to see if the section's size will overflow the
957                  * allowed task size. Note that p_filesz must always be
958                  * <= p_memsz so it is only necessary to check p_memsz.
959                  */
960                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
961                     elf_ppnt->p_memsz > TASK_SIZE ||
962                     TASK_SIZE - elf_ppnt->p_memsz < k) {
963                         /* set_brk can never work. Avoid overflows. */
964                         send_sig(SIGKILL, current, 0);
965                         retval = -EINVAL;
966                         goto out_free_dentry;
967                 }
968
969                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
970
971                 if (k > elf_bss)
972                         elf_bss = k;
973                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
974                         end_code = k;
975                 if (end_data < k)
976                         end_data = k;
977                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
978                 if (k > elf_brk)
979                         elf_brk = k;
980         }
981
982         loc->elf_ex.e_entry += load_bias;
983         elf_bss += load_bias;
984         elf_brk += load_bias;
985         start_code += load_bias;
986         end_code += load_bias;
987         start_data += load_bias;
988         end_data += load_bias;
989
990         /* Calling set_brk effectively mmaps the pages that we need
991          * for the bss and break sections.  We must do this before
992          * mapping in the interpreter, to make sure it doesn't wind
993          * up getting placed where the bss needs to go.
994          */
995         retval = set_brk(elf_bss, elf_brk);
996         if (retval) {
997                 send_sig(SIGKILL, current, 0);
998                 goto out_free_dentry;
999         }
1000         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1001                 send_sig(SIGSEGV, current, 0);
1002                 retval = -EFAULT; /* Nobody gets to see this, but.. */
1003                 goto out_free_dentry;
1004         }
1005
1006         if (elf_interpreter) {
1007                 if (interpreter_type == INTERPRETER_AOUT) {
1008                         elf_entry = load_aout_interp(&loc->interp_ex,
1009                                                      interpreter);
1010                 } else {
1011                         unsigned long uninitialized_var(interp_map_addr);
1012
1013                         elf_entry = load_elf_interp(&loc->interp_elf_ex,
1014                                                     interpreter,
1015                                                     &interp_map_addr,
1016                                                     load_bias);
1017                         if (!IS_ERR((void *)elf_entry)) {
1018                                 /*
1019                                  * load_elf_interp() returns relocation
1020                                  * adjustment
1021                                  */
1022                                 interp_load_addr = elf_entry;
1023                                 elf_entry += loc->interp_elf_ex.e_entry;
1024                         }
1025                 }
1026                 if (BAD_ADDR(elf_entry)) {
1027                         force_sig(SIGSEGV, current);
1028                         retval = IS_ERR((void *)elf_entry) ?
1029                                         (int)elf_entry : -EINVAL;
1030                         goto out_free_dentry;
1031                 }
1032                 reloc_func_desc = interp_load_addr;
1033
1034                 allow_write_access(interpreter);
1035                 fput(interpreter);
1036                 kfree(elf_interpreter);
1037         } else {
1038                 elf_entry = loc->elf_ex.e_entry;
1039                 if (BAD_ADDR(elf_entry)) {
1040                         force_sig(SIGSEGV, current);
1041                         retval = -EINVAL;
1042                         goto out_free_dentry;
1043                 }
1044         }
1045
1046         kfree(elf_phdata);
1047
1048         if (interpreter_type != INTERPRETER_AOUT)
1049                 sys_close(elf_exec_fileno);
1050
1051         set_binfmt(&elf_format);
1052
1053 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1054         retval = arch_setup_additional_pages(bprm, executable_stack);
1055         if (retval < 0) {
1056                 send_sig(SIGKILL, current, 0);
1057                 goto out;
1058         }
1059 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1060
1061         compute_creds(bprm);
1062         current->flags &= ~PF_FORKNOEXEC;
1063         retval = create_elf_tables(bprm, &loc->elf_ex,
1064                           (interpreter_type == INTERPRETER_AOUT),
1065                           load_addr, interp_load_addr);
1066         if (retval < 0) {
1067                 send_sig(SIGKILL, current, 0);
1068                 goto out;
1069         }
1070         /* N.B. passed_fileno might not be initialized? */
1071         if (interpreter_type == INTERPRETER_AOUT)
1072                 current->mm->arg_start += strlen(passed_fileno) + 1;
1073         current->mm->end_code = end_code;
1074         current->mm->start_code = start_code;
1075         current->mm->start_data = start_data;
1076         current->mm->end_data = end_data;
1077         current->mm->start_stack = bprm->p;
1078
1079 #ifdef arch_randomize_brk
1080         if (current->flags & PF_RANDOMIZE)
1081                 current->mm->brk = current->mm->start_brk =
1082                         arch_randomize_brk(current->mm);
1083 #endif
1084
1085         if (current->personality & MMAP_PAGE_ZERO) {
1086                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1087                    and some applications "depend" upon this behavior.
1088                    Since we do not have the power to recompile these, we
1089                    emulate the SVr4 behavior. Sigh. */
1090                 down_write(&current->mm->mmap_sem);
1091                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1092                                 MAP_FIXED | MAP_PRIVATE, 0);
1093                 up_write(&current->mm->mmap_sem);
1094         }
1095
1096 #ifdef ELF_PLAT_INIT
1097         /*
1098          * The ABI may specify that certain registers be set up in special
1099          * ways (on i386 %edx is the address of a DT_FINI function, for
1100          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1101          * that the e_entry field is the address of the function descriptor
1102          * for the startup routine, rather than the address of the startup
1103          * routine itself.  This macro performs whatever initialization to
1104          * the regs structure is required as well as any relocations to the
1105          * function descriptor entries when executing dynamically links apps.
1106          */
1107         ELF_PLAT_INIT(regs, reloc_func_desc);
1108 #endif
1109
1110         start_thread(regs, elf_entry, bprm->p);
1111         if (unlikely(current->ptrace & PT_PTRACED)) {
1112                 if (current->ptrace & PT_TRACE_EXEC)
1113                         ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1114                 else
1115                         send_sig(SIGTRAP, current, 0);
1116         }
1117         retval = 0;
1118 out:
1119         kfree(loc);
1120 out_ret:
1121         return retval;
1122
1123         /* error cleanup */
1124 out_free_dentry:
1125         allow_write_access(interpreter);
1126         if (interpreter)
1127                 fput(interpreter);
1128 out_free_interp:
1129         kfree(elf_interpreter);
1130 out_free_file:
1131         sys_close(elf_exec_fileno);
1132 out_free_fh:
1133         if (files)
1134                 reset_files_struct(current, files);
1135 out_free_ph:
1136         kfree(elf_phdata);
1137         goto out;
1138 }
1139
1140 /* This is really simpleminded and specialized - we are loading an
1141    a.out library that is given an ELF header. */
1142 static int load_elf_library(struct file *file)
1143 {
1144         struct elf_phdr *elf_phdata;
1145         struct elf_phdr *eppnt;
1146         unsigned long elf_bss, bss, len;
1147         int retval, error, i, j;
1148         struct elfhdr elf_ex;
1149
1150         error = -ENOEXEC;
1151         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1152         if (retval != sizeof(elf_ex))
1153                 goto out;
1154
1155         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1156                 goto out;
1157
1158         /* First of all, some simple consistency checks */
1159         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1160             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1161                 goto out;
1162
1163         /* Now read in all of the header information */
1164
1165         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1166         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1167
1168         error = -ENOMEM;
1169         elf_phdata = kmalloc(j, GFP_KERNEL);
1170         if (!elf_phdata)
1171                 goto out;
1172
1173         eppnt = elf_phdata;
1174         error = -ENOEXEC;
1175         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1176         if (retval != j)
1177                 goto out_free_ph;
1178
1179         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1180                 if ((eppnt + i)->p_type == PT_LOAD)
1181                         j++;
1182         if (j != 1)
1183                 goto out_free_ph;
1184
1185         while (eppnt->p_type != PT_LOAD)
1186                 eppnt++;
1187
1188         /* Now use mmap to map the library into memory. */
1189         down_write(&current->mm->mmap_sem);
1190         error = do_mmap(file,
1191                         ELF_PAGESTART(eppnt->p_vaddr),
1192                         (eppnt->p_filesz +
1193                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1194                         PROT_READ | PROT_WRITE | PROT_EXEC,
1195                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1196                         (eppnt->p_offset -
1197                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1198         up_write(&current->mm->mmap_sem);
1199         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1200                 goto out_free_ph;
1201
1202         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1203         if (padzero(elf_bss)) {
1204                 error = -EFAULT;
1205                 goto out_free_ph;
1206         }
1207
1208         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1209                             ELF_MIN_ALIGN - 1);
1210         bss = eppnt->p_memsz + eppnt->p_vaddr;
1211         if (bss > len) {
1212                 down_write(&current->mm->mmap_sem);
1213                 do_brk(len, bss - len);
1214                 up_write(&current->mm->mmap_sem);
1215         }
1216         error = 0;
1217
1218 out_free_ph:
1219         kfree(elf_phdata);
1220 out:
1221         return error;
1222 }
1223
1224 /*
1225  * Note that some platforms still use traditional core dumps and not
1226  * the ELF core dump.  Each platform can select it as appropriate.
1227  */
1228 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1229
1230 /*
1231  * ELF core dumper
1232  *
1233  * Modelled on fs/exec.c:aout_core_dump()
1234  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1235  */
1236 /*
1237  * These are the only things you should do on a core-file: use only these
1238  * functions to write out all the necessary info.
1239  */
1240 static int dump_write(struct file *file, const void *addr, int nr)
1241 {
1242         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1243 }
1244
1245 static int dump_seek(struct file *file, loff_t off)
1246 {
1247         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1248                 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1249                         return 0;
1250         } else {
1251                 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1252                 if (!buf)
1253                         return 0;
1254                 while (off > 0) {
1255                         unsigned long n = off;
1256                         if (n > PAGE_SIZE)
1257                                 n = PAGE_SIZE;
1258                         if (!dump_write(file, buf, n))
1259                                 return 0;
1260                         off -= n;
1261                 }
1262                 free_page((unsigned long)buf);
1263         }
1264         return 1;
1265 }
1266
1267 /*
1268  * Decide what to dump of a segment, part, all or none.
1269  */
1270 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1271                                    unsigned long mm_flags)
1272 {
1273         /* The vma can be set up to tell us the answer directly.  */
1274         if (vma->vm_flags & VM_ALWAYSDUMP)
1275                 goto whole;
1276
1277         /* Do not dump I/O mapped devices or special mappings */
1278         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1279                 return 0;
1280
1281 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1282
1283         /* By default, dump shared memory if mapped from an anonymous file. */
1284         if (vma->vm_flags & VM_SHARED) {
1285                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1286                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1287                         goto whole;
1288                 return 0;
1289         }
1290
1291         /* Dump segments that have been written to.  */
1292         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1293                 goto whole;
1294         if (vma->vm_file == NULL)
1295                 return 0;
1296
1297         if (FILTER(MAPPED_PRIVATE))
1298                 goto whole;
1299
1300         /*
1301          * If this looks like the beginning of a DSO or executable mapping,
1302          * check for an ELF header.  If we find one, dump the first page to
1303          * aid in determining what was mapped here.
1304          */
1305         if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1306                 u32 __user *header = (u32 __user *) vma->vm_start;
1307                 u32 word;
1308                 /*
1309                  * Doing it this way gets the constant folded by GCC.
1310                  */
1311                 union {
1312                         u32 cmp;
1313                         char elfmag[SELFMAG];
1314                 } magic;
1315                 BUILD_BUG_ON(SELFMAG != sizeof word);
1316                 magic.elfmag[EI_MAG0] = ELFMAG0;
1317                 magic.elfmag[EI_MAG1] = ELFMAG1;
1318                 magic.elfmag[EI_MAG2] = ELFMAG2;
1319                 magic.elfmag[EI_MAG3] = ELFMAG3;
1320                 if (get_user(word, header) == 0 && word == magic.cmp)
1321                         return PAGE_SIZE;
1322         }
1323
1324 #undef  FILTER
1325
1326         return 0;
1327
1328 whole:
1329         return vma->vm_end - vma->vm_start;
1330 }
1331
1332 /* An ELF note in memory */
1333 struct memelfnote
1334 {
1335         const char *name;
1336         int type;
1337         unsigned int datasz;
1338         void *data;
1339 };
1340
1341 static int notesize(struct memelfnote *en)
1342 {
1343         int sz;
1344
1345         sz = sizeof(struct elf_note);
1346         sz += roundup(strlen(en->name) + 1, 4);
1347         sz += roundup(en->datasz, 4);
1348
1349         return sz;
1350 }
1351
1352 #define DUMP_WRITE(addr, nr, foffset)   \
1353         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1354
1355 static int alignfile(struct file *file, loff_t *foffset)
1356 {
1357         static const char buf[4] = { 0, };
1358         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1359         return 1;
1360 }
1361
1362 static int writenote(struct memelfnote *men, struct file *file,
1363                         loff_t *foffset)
1364 {
1365         struct elf_note en;
1366         en.n_namesz = strlen(men->name) + 1;
1367         en.n_descsz = men->datasz;
1368         en.n_type = men->type;
1369
1370         DUMP_WRITE(&en, sizeof(en), foffset);
1371         DUMP_WRITE(men->name, en.n_namesz, foffset);
1372         if (!alignfile(file, foffset))
1373                 return 0;
1374         DUMP_WRITE(men->data, men->datasz, foffset);
1375         if (!alignfile(file, foffset))
1376                 return 0;
1377
1378         return 1;
1379 }
1380 #undef DUMP_WRITE
1381
1382 #define DUMP_WRITE(addr, nr)    \
1383         if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1384                 goto end_coredump;
1385 #define DUMP_SEEK(off)  \
1386         if (!dump_seek(file, (off))) \
1387                 goto end_coredump;
1388
1389 static void fill_elf_header(struct elfhdr *elf, int segs,
1390                             u16 machine, u32 flags, u8 osabi)
1391 {
1392         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1393         elf->e_ident[EI_CLASS] = ELF_CLASS;
1394         elf->e_ident[EI_DATA] = ELF_DATA;
1395         elf->e_ident[EI_VERSION] = EV_CURRENT;
1396         elf->e_ident[EI_OSABI] = ELF_OSABI;
1397         memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1398
1399         elf->e_type = ET_CORE;
1400         elf->e_machine = machine;
1401         elf->e_version = EV_CURRENT;
1402         elf->e_entry = 0;
1403         elf->e_phoff = sizeof(struct elfhdr);
1404         elf->e_shoff = 0;
1405         elf->e_flags = flags;
1406         elf->e_ehsize = sizeof(struct elfhdr);
1407         elf->e_phentsize = sizeof(struct elf_phdr);
1408         elf->e_phnum = segs;
1409         elf->e_shentsize = 0;
1410         elf->e_shnum = 0;
1411         elf->e_shstrndx = 0;
1412         return;
1413 }
1414
1415 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1416 {
1417         phdr->p_type = PT_NOTE;
1418         phdr->p_offset = offset;
1419         phdr->p_vaddr = 0;
1420         phdr->p_paddr = 0;
1421         phdr->p_filesz = sz;
1422         phdr->p_memsz = 0;
1423         phdr->p_flags = 0;
1424         phdr->p_align = 0;
1425         return;
1426 }
1427
1428 static void fill_note(struct memelfnote *note, const char *name, int type, 
1429                 unsigned int sz, void *data)
1430 {
1431         note->name = name;
1432         note->type = type;
1433         note->datasz = sz;
1434         note->data = data;
1435         return;
1436 }
1437
1438 /*
1439  * fill up all the fields in prstatus from the given task struct, except
1440  * registers which need to be filled up separately.
1441  */
1442 static void fill_prstatus(struct elf_prstatus *prstatus,
1443                 struct task_struct *p, long signr)
1444 {
1445         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1446         prstatus->pr_sigpend = p->pending.signal.sig[0];
1447         prstatus->pr_sighold = p->blocked.sig[0];
1448         prstatus->pr_pid = task_pid_vnr(p);
1449         prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1450         prstatus->pr_pgrp = task_pgrp_vnr(p);
1451         prstatus->pr_sid = task_session_vnr(p);
1452         if (thread_group_leader(p)) {
1453                 /*
1454                  * This is the record for the group leader.  Add in the
1455                  * cumulative times of previous dead threads.  This total
1456                  * won't include the time of each live thread whose state
1457                  * is included in the core dump.  The final total reported
1458                  * to our parent process when it calls wait4 will include
1459                  * those sums as well as the little bit more time it takes
1460                  * this and each other thread to finish dying after the
1461                  * core dump synchronization phase.
1462                  */
1463                 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1464                                    &prstatus->pr_utime);
1465                 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1466                                    &prstatus->pr_stime);
1467         } else {
1468                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1469                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1470         }
1471         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1472         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1473 }
1474
1475 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1476                        struct mm_struct *mm)
1477 {
1478         unsigned int i, len;
1479         
1480         /* first copy the parameters from user space */
1481         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1482
1483         len = mm->arg_end - mm->arg_start;
1484         if (len >= ELF_PRARGSZ)
1485                 len = ELF_PRARGSZ-1;
1486         if (copy_from_user(&psinfo->pr_psargs,
1487                            (const char __user *)mm->arg_start, len))
1488                 return -EFAULT;
1489         for(i = 0; i < len; i++)
1490                 if (psinfo->pr_psargs[i] == 0)
1491                         psinfo->pr_psargs[i] = ' ';
1492         psinfo->pr_psargs[len] = 0;
1493
1494         psinfo->pr_pid = task_pid_vnr(p);
1495         psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1496         psinfo->pr_pgrp = task_pgrp_vnr(p);
1497         psinfo->pr_sid = task_session_vnr(p);
1498
1499         i = p->state ? ffz(~p->state) + 1 : 0;
1500         psinfo->pr_state = i;
1501         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1502         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1503         psinfo->pr_nice = task_nice(p);
1504         psinfo->pr_flag = p->flags;
1505         SET_UID(psinfo->pr_uid, p->uid);
1506         SET_GID(psinfo->pr_gid, p->gid);
1507         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1508         
1509         return 0;
1510 }
1511
1512 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1513 {
1514         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1515         int i = 0;
1516         do
1517                 i += 2;
1518         while (auxv[i - 2] != AT_NULL);
1519         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1520 }
1521
1522 #ifdef CORE_DUMP_USE_REGSET
1523 #include <linux/regset.h>
1524
1525 struct elf_thread_core_info {
1526         struct elf_thread_core_info *next;
1527         struct task_struct *task;
1528         struct elf_prstatus prstatus;
1529         struct memelfnote notes[0];
1530 };
1531
1532 struct elf_note_info {
1533         struct elf_thread_core_info *thread;
1534         struct memelfnote psinfo;
1535         struct memelfnote auxv;
1536         size_t size;
1537         int thread_notes;
1538 };
1539
1540 static int fill_thread_core_info(struct elf_thread_core_info *t,
1541                                  const struct user_regset_view *view,
1542                                  long signr, size_t *total)
1543 {
1544         unsigned int i;
1545
1546         /*
1547          * NT_PRSTATUS is the one special case, because the regset data
1548          * goes into the pr_reg field inside the note contents, rather
1549          * than being the whole note contents.  We fill the reset in here.
1550          * We assume that regset 0 is NT_PRSTATUS.
1551          */
1552         fill_prstatus(&t->prstatus, t->task, signr);
1553         (void) view->regsets[0].get(t->task, &view->regsets[0],
1554                                     0, sizeof(t->prstatus.pr_reg),
1555                                     &t->prstatus.pr_reg, NULL);
1556
1557         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1558                   sizeof(t->prstatus), &t->prstatus);
1559         *total += notesize(&t->notes[0]);
1560
1561         /*
1562          * Each other regset might generate a note too.  For each regset
1563          * that has no core_note_type or is inactive, we leave t->notes[i]
1564          * all zero and we'll know to skip writing it later.
1565          */
1566         for (i = 1; i < view->n; ++i) {
1567                 const struct user_regset *regset = &view->regsets[i];
1568                 if (regset->core_note_type &&
1569                     (!regset->active || regset->active(t->task, regset))) {
1570                         int ret;
1571                         size_t size = regset->n * regset->size;
1572                         void *data = kmalloc(size, GFP_KERNEL);
1573                         if (unlikely(!data))
1574                                 return 0;
1575                         ret = regset->get(t->task, regset,
1576                                           0, size, data, NULL);
1577                         if (unlikely(ret))
1578                                 kfree(data);
1579                         else {
1580                                 if (regset->core_note_type != NT_PRFPREG)
1581                                         fill_note(&t->notes[i], "LINUX",
1582                                                   regset->core_note_type,
1583                                                   size, data);
1584                                 else {
1585                                         t->prstatus.pr_fpvalid = 1;
1586                                         fill_note(&t->notes[i], "CORE",
1587                                                   NT_PRFPREG, size, data);
1588                                 }
1589                                 *total += notesize(&t->notes[i]);
1590                         }
1591                 }
1592         }
1593
1594         return 1;
1595 }
1596
1597 static int fill_note_info(struct elfhdr *elf, int phdrs,
1598                           struct elf_note_info *info,
1599                           long signr, struct pt_regs *regs)
1600 {
1601         struct task_struct *dump_task = current;
1602         const struct user_regset_view *view = task_user_regset_view(dump_task);
1603         struct elf_thread_core_info *t;
1604         struct elf_prpsinfo *psinfo;
1605         struct task_struct *g, *p;
1606         unsigned int i;
1607
1608         info->size = 0;
1609         info->thread = NULL;
1610
1611         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1612         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1613
1614         if (psinfo == NULL)
1615                 return 0;
1616
1617         /*
1618          * Figure out how many notes we're going to need for each thread.
1619          */
1620         info->thread_notes = 0;
1621         for (i = 0; i < view->n; ++i)
1622                 if (view->regsets[i].core_note_type != 0)
1623                         ++info->thread_notes;
1624
1625         /*
1626          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1627          * since it is our one special case.
1628          */
1629         if (unlikely(info->thread_notes == 0) ||
1630             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1631                 WARN_ON(1);
1632                 return 0;
1633         }
1634
1635         /*
1636          * Initialize the ELF file header.
1637          */
1638         fill_elf_header(elf, phdrs,
1639                         view->e_machine, view->e_flags, view->ei_osabi);
1640
1641         /*
1642          * Allocate a structure for each thread.
1643          */
1644         rcu_read_lock();
1645         do_each_thread(g, p)
1646                 if (p->mm == dump_task->mm) {
1647                         t = kzalloc(offsetof(struct elf_thread_core_info,
1648                                              notes[info->thread_notes]),
1649                                     GFP_ATOMIC);
1650                         if (unlikely(!t)) {
1651                                 rcu_read_unlock();
1652                                 return 0;
1653                         }
1654                         t->task = p;
1655                         if (p == dump_task || !info->thread) {
1656                                 t->next = info->thread;
1657                                 info->thread = t;
1658                         } else {
1659                                 /*
1660                                  * Make sure to keep the original task at
1661                                  * the head of the list.
1662                                  */
1663                                 t->next = info->thread->next;
1664                                 info->thread->next = t;
1665                         }
1666                 }
1667         while_each_thread(g, p);
1668         rcu_read_unlock();
1669
1670         /*
1671          * Now fill in each thread's information.
1672          */
1673         for (t = info->thread; t != NULL; t = t->next)
1674                 if (!fill_thread_core_info(t, view, signr, &info->size))
1675                         return 0;
1676
1677         /*
1678          * Fill in the two process-wide notes.
1679          */
1680         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1681         info->size += notesize(&info->psinfo);
1682
1683         fill_auxv_note(&info->auxv, current->mm);
1684         info->size += notesize(&info->auxv);
1685
1686         return 1;
1687 }
1688
1689 static size_t get_note_info_size(struct elf_note_info *info)
1690 {
1691         return info->size;
1692 }
1693
1694 /*
1695  * Write all the notes for each thread.  When writing the first thread, the
1696  * process-wide notes are interleaved after the first thread-specific note.
1697  */
1698 static int write_note_info(struct elf_note_info *info,
1699                            struct file *file, loff_t *foffset)
1700 {
1701         bool first = 1;
1702         struct elf_thread_core_info *t = info->thread;
1703
1704         do {
1705                 int i;
1706
1707                 if (!writenote(&t->notes[0], file, foffset))
1708                         return 0;
1709
1710                 if (first && !writenote(&info->psinfo, file, foffset))
1711                         return 0;
1712                 if (first && !writenote(&info->auxv, file, foffset))
1713                         return 0;
1714
1715                 for (i = 1; i < info->thread_notes; ++i)
1716                         if (t->notes[i].data &&
1717                             !writenote(&t->notes[i], file, foffset))
1718                                 return 0;
1719
1720                 first = 0;
1721                 t = t->next;
1722         } while (t);
1723
1724         return 1;
1725 }
1726
1727 static void free_note_info(struct elf_note_info *info)
1728 {
1729         struct elf_thread_core_info *threads = info->thread;
1730         while (threads) {
1731                 unsigned int i;
1732                 struct elf_thread_core_info *t = threads;
1733                 threads = t->next;
1734                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1735                 for (i = 1; i < info->thread_notes; ++i)
1736                         kfree(t->notes[i].data);
1737                 kfree(t);
1738         }
1739         kfree(info->psinfo.data);
1740 }
1741
1742 #else
1743
1744 /* Here is the structure in which status of each thread is captured. */
1745 struct elf_thread_status
1746 {
1747         struct list_head list;
1748         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1749         elf_fpregset_t fpu;             /* NT_PRFPREG */
1750         struct task_struct *thread;
1751 #ifdef ELF_CORE_COPY_XFPREGS
1752         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1753 #endif
1754         struct memelfnote notes[3];
1755         int num_notes;
1756 };
1757
1758 /*
1759  * In order to add the specific thread information for the elf file format,
1760  * we need to keep a linked list of every threads pr_status and then create
1761  * a single section for them in the final core file.
1762  */
1763 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1764 {
1765         int sz = 0;
1766         struct task_struct *p = t->thread;
1767         t->num_notes = 0;
1768
1769         fill_prstatus(&t->prstatus, p, signr);
1770         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1771         
1772         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1773                   &(t->prstatus));
1774         t->num_notes++;
1775         sz += notesize(&t->notes[0]);
1776
1777         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1778                                                                 &t->fpu))) {
1779                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1780                           &(t->fpu));
1781                 t->num_notes++;
1782                 sz += notesize(&t->notes[1]);
1783         }
1784
1785 #ifdef ELF_CORE_COPY_XFPREGS
1786         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1787                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1788                           sizeof(t->xfpu), &t->xfpu);
1789                 t->num_notes++;
1790                 sz += notesize(&t->notes[2]);
1791         }
1792 #endif  
1793         return sz;
1794 }
1795
1796 struct elf_note_info {
1797         struct memelfnote *notes;
1798         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1799         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1800         struct list_head thread_list;
1801         elf_fpregset_t *fpu;
1802 #ifdef ELF_CORE_COPY_XFPREGS
1803         elf_fpxregset_t *xfpu;
1804 #endif
1805         int thread_status_size;
1806         int numnote;
1807 };
1808
1809 static int fill_note_info(struct elfhdr *elf, int phdrs,
1810                           struct elf_note_info *info,
1811                           long signr, struct pt_regs *regs)
1812 {
1813 #define NUM_NOTES       6
1814         struct list_head *t;
1815         struct task_struct *g, *p;
1816
1817         info->notes = NULL;
1818         info->prstatus = NULL;
1819         info->psinfo = NULL;
1820         info->fpu = NULL;
1821 #ifdef ELF_CORE_COPY_XFPREGS
1822         info->xfpu = NULL;
1823 #endif
1824         INIT_LIST_HEAD(&info->thread_list);
1825
1826         info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1827                               GFP_KERNEL);
1828         if (!info->notes)
1829                 return 0;
1830         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1831         if (!info->psinfo)
1832                 return 0;
1833         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1834         if (!info->prstatus)
1835                 return 0;
1836         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1837         if (!info->fpu)
1838                 return 0;
1839 #ifdef ELF_CORE_COPY_XFPREGS
1840         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1841         if (!info->xfpu)
1842                 return 0;
1843 #endif
1844
1845         info->thread_status_size = 0;
1846         if (signr) {
1847                 struct elf_thread_status *tmp;
1848                 rcu_read_lock();
1849                 do_each_thread(g, p)
1850                         if (current->mm == p->mm && current != p) {
1851                                 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1852                                 if (!tmp) {
1853                                         rcu_read_unlock();
1854                                         return 0;
1855                                 }
1856                                 tmp->thread = p;
1857                                 list_add(&tmp->list, &info->thread_list);
1858                         }
1859                 while_each_thread(g, p);
1860                 rcu_read_unlock();
1861                 list_for_each(t, &info->thread_list) {
1862                         struct elf_thread_status *tmp;
1863                         int sz;
1864
1865                         tmp = list_entry(t, struct elf_thread_status, list);
1866                         sz = elf_dump_thread_status(signr, tmp);
1867                         info->thread_status_size += sz;
1868                 }
1869         }
1870         /* now collect the dump for the current */
1871         memset(info->prstatus, 0, sizeof(*info->prstatus));
1872         fill_prstatus(info->prstatus, current, signr);
1873         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1874
1875         /* Set up header */
1876         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1877
1878         /*
1879          * Set up the notes in similar form to SVR4 core dumps made
1880          * with info from their /proc.
1881          */
1882
1883         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1884                   sizeof(*info->prstatus), info->prstatus);
1885         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1886         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1887                   sizeof(*info->psinfo), info->psinfo);
1888
1889         info->numnote = 2;
1890
1891         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1892
1893         /* Try to dump the FPU. */
1894         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1895                                                                info->fpu);
1896         if (info->prstatus->pr_fpvalid)
1897                 fill_note(info->notes + info->numnote++,
1898                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1899 #ifdef ELF_CORE_COPY_XFPREGS
1900         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1901                 fill_note(info->notes + info->numnote++,
1902                           "LINUX", ELF_CORE_XFPREG_TYPE,
1903                           sizeof(*info->xfpu), info->xfpu);
1904 #endif
1905
1906         return 1;
1907
1908 #undef NUM_NOTES
1909 }
1910
1911 static size_t get_note_info_size(struct elf_note_info *info)
1912 {
1913         int sz = 0;
1914         int i;
1915
1916         for (i = 0; i < info->numnote; i++)
1917                 sz += notesize(info->notes + i);
1918
1919         sz += info->thread_status_size;
1920
1921         return sz;
1922 }
1923
1924 static int write_note_info(struct elf_note_info *info,
1925                            struct file *file, loff_t *foffset)
1926 {
1927         int i;
1928         struct list_head *t;
1929
1930         for (i = 0; i < info->numnote; i++)
1931                 if (!writenote(info->notes + i, file, foffset))
1932                         return 0;
1933
1934         /* write out the thread status notes section */
1935         list_for_each(t, &info->thread_list) {
1936                 struct elf_thread_status *tmp =
1937                                 list_entry(t, struct elf_thread_status, list);
1938
1939                 for (i = 0; i < tmp->num_notes; i++)
1940                         if (!writenote(&tmp->notes[i], file, foffset))
1941                                 return 0;
1942         }
1943
1944         return 1;
1945 }
1946
1947 static void free_note_info(struct elf_note_info *info)
1948 {
1949         while (!list_empty(&info->thread_list)) {
1950                 struct list_head *tmp = info->thread_list.next;
1951                 list_del(tmp);
1952                 kfree(list_entry(tmp, struct elf_thread_status, list));
1953         }
1954
1955         kfree(info->prstatus);
1956         kfree(info->psinfo);
1957         kfree(info->notes);
1958         kfree(info->fpu);
1959 #ifdef ELF_CORE_COPY_XFPREGS
1960         kfree(info->xfpu);
1961 #endif
1962 }
1963
1964 #endif
1965
1966 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1967                                         struct vm_area_struct *gate_vma)
1968 {
1969         struct vm_area_struct *ret = tsk->mm->mmap;
1970
1971         if (ret)
1972                 return ret;
1973         return gate_vma;
1974 }
1975 /*
1976  * Helper function for iterating across a vma list.  It ensures that the caller
1977  * will visit `gate_vma' prior to terminating the search.
1978  */
1979 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1980                                         struct vm_area_struct *gate_vma)
1981 {
1982         struct vm_area_struct *ret;
1983
1984         ret = this_vma->vm_next;
1985         if (ret)
1986                 return ret;
1987         if (this_vma == gate_vma)
1988                 return NULL;
1989         return gate_vma;
1990 }
1991
1992 /*
1993  * Actual dumper
1994  *
1995  * This is a two-pass process; first we find the offsets of the bits,
1996  * and then they are actually written out.  If we run out of core limit
1997  * we just truncate.
1998  */
1999 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
2000 {
2001         int has_dumped = 0;
2002         mm_segment_t fs;
2003         int segs;
2004         size_t size = 0;
2005         struct vm_area_struct *vma, *gate_vma;
2006         struct elfhdr *elf = NULL;
2007         loff_t offset = 0, dataoff, foffset;
2008         unsigned long mm_flags;
2009         struct elf_note_info info;
2010
2011         /*
2012          * We no longer stop all VM operations.
2013          * 
2014          * This is because those proceses that could possibly change map_count
2015          * or the mmap / vma pages are now blocked in do_exit on current
2016          * finishing this core dump.
2017          *
2018          * Only ptrace can touch these memory addresses, but it doesn't change
2019          * the map_count or the pages allocated. So no possibility of crashing
2020          * exists while dumping the mm->vm_next areas to the core file.
2021          */
2022   
2023         /* alloc memory for large data structures: too large to be on stack */
2024         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2025         if (!elf)
2026                 goto cleanup;
2027         
2028         segs = current->mm->map_count;
2029 #ifdef ELF_CORE_EXTRA_PHDRS
2030         segs += ELF_CORE_EXTRA_PHDRS;
2031 #endif
2032
2033         gate_vma = get_gate_vma(current);
2034         if (gate_vma != NULL)
2035                 segs++;
2036
2037         /*
2038          * Collect all the non-memory information about the process for the
2039          * notes.  This also sets up the file header.
2040          */
2041         if (!fill_note_info(elf, segs + 1, /* including notes section */
2042                             &info, signr, regs))
2043                 goto cleanup;
2044
2045         has_dumped = 1;
2046         current->flags |= PF_DUMPCORE;
2047   
2048         fs = get_fs();
2049         set_fs(KERNEL_DS);
2050
2051         DUMP_WRITE(elf, sizeof(*elf));
2052         offset += sizeof(*elf);                         /* Elf header */
2053         offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
2054         foffset = offset;
2055
2056         /* Write notes phdr entry */
2057         {
2058                 struct elf_phdr phdr;
2059                 size_t sz = get_note_info_size(&info);
2060
2061                 sz += elf_coredump_extra_notes_size();
2062
2063                 fill_elf_note_phdr(&phdr, sz, offset);
2064                 offset += sz;
2065                 DUMP_WRITE(&phdr, sizeof(phdr));
2066         }
2067
2068         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2069
2070         /*
2071          * We must use the same mm->flags while dumping core to avoid
2072          * inconsistency between the program headers and bodies, otherwise an
2073          * unusable core file can be generated.
2074          */
2075         mm_flags = current->mm->flags;
2076
2077         /* Write program headers for segments dump */
2078         for (vma = first_vma(current, gate_vma); vma != NULL;
2079                         vma = next_vma(vma, gate_vma)) {
2080                 struct elf_phdr phdr;
2081
2082                 phdr.p_type = PT_LOAD;
2083                 phdr.p_offset = offset;
2084                 phdr.p_vaddr = vma->vm_start;
2085                 phdr.p_paddr = 0;
2086                 phdr.p_filesz = vma_dump_size(vma, mm_flags);
2087                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2088                 offset += phdr.p_filesz;
2089                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2090                 if (vma->vm_flags & VM_WRITE)
2091                         phdr.p_flags |= PF_W;
2092                 if (vma->vm_flags & VM_EXEC)
2093                         phdr.p_flags |= PF_X;
2094                 phdr.p_align = ELF_EXEC_PAGESIZE;
2095
2096                 DUMP_WRITE(&phdr, sizeof(phdr));
2097         }
2098
2099 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2100         ELF_CORE_WRITE_EXTRA_PHDRS;
2101 #endif
2102
2103         /* write out the notes section */
2104         if (!write_note_info(&info, file, &foffset))
2105                 goto end_coredump;
2106
2107         if (elf_coredump_extra_notes_write(file, &foffset))
2108                 goto end_coredump;
2109
2110         /* Align to page */
2111         DUMP_SEEK(dataoff - foffset);
2112
2113         for (vma = first_vma(current, gate_vma); vma != NULL;
2114                         vma = next_vma(vma, gate_vma)) {
2115                 unsigned long addr;
2116                 unsigned long end;
2117
2118                 end = vma->vm_start + vma_dump_size(vma, mm_flags);
2119
2120                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2121                         struct page *page;
2122                         struct vm_area_struct *vma;
2123
2124                         if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2125                                                 &page, &vma) <= 0) {
2126                                 DUMP_SEEK(PAGE_SIZE);
2127                         } else {
2128                                 if (page == ZERO_PAGE(0)) {
2129                                         if (!dump_seek(file, PAGE_SIZE)) {
2130                                                 page_cache_release(page);
2131                                                 goto end_coredump;
2132                                         }
2133                                 } else {
2134                                         void *kaddr;
2135                                         flush_cache_page(vma, addr,
2136                                                          page_to_pfn(page));
2137                                         kaddr = kmap(page);
2138                                         if ((size += PAGE_SIZE) > limit ||
2139                                             !dump_write(file, kaddr,
2140                                             PAGE_SIZE)) {
2141                                                 kunmap(page);
2142                                                 page_cache_release(page);
2143                                                 goto end_coredump;
2144                                         }
2145                                         kunmap(page);
2146                                 }
2147                                 page_cache_release(page);
2148                         }
2149                 }
2150         }
2151
2152 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2153         ELF_CORE_WRITE_EXTRA_DATA;
2154 #endif
2155
2156 end_coredump:
2157         set_fs(fs);
2158
2159 cleanup:
2160         kfree(elf);
2161         free_note_info(&info);
2162         return has_dumped;
2163 }
2164
2165 #endif          /* USE_ELF_CORE_DUMP */
2166
2167 static int __init init_elf_binfmt(void)
2168 {
2169         return register_binfmt(&elf_format);
2170 }
2171
2172 static void __exit exit_elf_binfmt(void)
2173 {
2174         /* Remove the COFF and ELF loaders. */
2175         unregister_binfmt(&elf_format);
2176 }
2177
2178 core_initcall(init_elf_binfmt);
2179 module_exit(exit_elf_binfmt);
2180 MODULE_LICENSE("GPL");