elf core dump: notes user_regset
[linux-2.6] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/compiler.h>
35 #include <linux/highmem.h>
36 #include <linux/pagemap.h>
37 #include <linux/security.h>
38 #include <linux/syscalls.h>
39 #include <linux/random.h>
40 #include <linux/elf.h>
41 #include <linux/utsname.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
49                                 int, int, unsigned long);
50
51 /*
52  * If we don't support core dumping, then supply a NULL so we
53  * don't even try.
54  */
55 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
56 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
57 #else
58 #define elf_core_dump   NULL
59 #endif
60
61 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
62 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
63 #else
64 #define ELF_MIN_ALIGN   PAGE_SIZE
65 #endif
66
67 #ifndef ELF_CORE_EFLAGS
68 #define ELF_CORE_EFLAGS 0
69 #endif
70
71 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
72 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
73 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
74
75 static struct linux_binfmt elf_format = {
76                 .module         = THIS_MODULE,
77                 .load_binary    = load_elf_binary,
78                 .load_shlib     = load_elf_library,
79                 .core_dump      = elf_core_dump,
80                 .min_coredump   = ELF_EXEC_PAGESIZE,
81                 .hasvdso        = 1
82 };
83
84 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
85
86 static int set_brk(unsigned long start, unsigned long end)
87 {
88         start = ELF_PAGEALIGN(start);
89         end = ELF_PAGEALIGN(end);
90         if (end > start) {
91                 unsigned long addr;
92                 down_write(&current->mm->mmap_sem);
93                 addr = do_brk(start, end - start);
94                 up_write(&current->mm->mmap_sem);
95                 if (BAD_ADDR(addr))
96                         return addr;
97         }
98         current->mm->start_brk = current->mm->brk = end;
99         return 0;
100 }
101
102 /* We need to explicitly zero any fractional pages
103    after the data section (i.e. bss).  This would
104    contain the junk from the file that should not
105    be in memory
106  */
107 static int padzero(unsigned long elf_bss)
108 {
109         unsigned long nbyte;
110
111         nbyte = ELF_PAGEOFFSET(elf_bss);
112         if (nbyte) {
113                 nbyte = ELF_MIN_ALIGN - nbyte;
114                 if (clear_user((void __user *) elf_bss, nbyte))
115                         return -EFAULT;
116         }
117         return 0;
118 }
119
120 /* Let's use some macros to make this stack manipulation a litle clearer */
121 #ifdef CONFIG_STACK_GROWSUP
122 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
123 #define STACK_ROUND(sp, items) \
124         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
125 #define STACK_ALLOC(sp, len) ({ \
126         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
127         old_sp; })
128 #else
129 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
130 #define STACK_ROUND(sp, items) \
131         (((unsigned long) (sp - items)) &~ 15UL)
132 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
133 #endif
134
135 static int
136 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
137                 int interp_aout, unsigned long load_addr,
138                 unsigned long interp_load_addr)
139 {
140         unsigned long p = bprm->p;
141         int argc = bprm->argc;
142         int envc = bprm->envc;
143         elf_addr_t __user *argv;
144         elf_addr_t __user *envp;
145         elf_addr_t __user *sp;
146         elf_addr_t __user *u_platform;
147         const char *k_platform = ELF_PLATFORM;
148         int items;
149         elf_addr_t *elf_info;
150         int ei_index = 0;
151         struct task_struct *tsk = current;
152         struct vm_area_struct *vma;
153
154         /*
155          * In some cases (e.g. Hyper-Threading), we want to avoid L1
156          * evictions by the processes running on the same package. One
157          * thing we can do is to shuffle the initial stack for them.
158          */
159
160         p = arch_align_stack(p);
161
162         /*
163          * If this architecture has a platform capability string, copy it
164          * to userspace.  In some cases (Sparc), this info is impossible
165          * for userspace to get any other way, in others (i386) it is
166          * merely difficult.
167          */
168         u_platform = NULL;
169         if (k_platform) {
170                 size_t len = strlen(k_platform) + 1;
171
172                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
173                 if (__copy_to_user(u_platform, k_platform, len))
174                         return -EFAULT;
175         }
176
177         /* Create the ELF interpreter info */
178         elf_info = (elf_addr_t *)current->mm->saved_auxv;
179         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
180 #define NEW_AUX_ENT(id, val) \
181         do { \
182                 elf_info[ei_index++] = id; \
183                 elf_info[ei_index++] = val; \
184         } while (0)
185
186 #ifdef ARCH_DLINFO
187         /* 
188          * ARCH_DLINFO must come first so PPC can do its special alignment of
189          * AUXV.
190          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
191          * ARCH_DLINFO changes
192          */
193         ARCH_DLINFO;
194 #endif
195         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
196         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
197         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
198         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
199         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
200         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
201         NEW_AUX_ENT(AT_BASE, interp_load_addr);
202         NEW_AUX_ENT(AT_FLAGS, 0);
203         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
204         NEW_AUX_ENT(AT_UID, tsk->uid);
205         NEW_AUX_ENT(AT_EUID, tsk->euid);
206         NEW_AUX_ENT(AT_GID, tsk->gid);
207         NEW_AUX_ENT(AT_EGID, tsk->egid);
208         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
209         if (k_platform) {
210                 NEW_AUX_ENT(AT_PLATFORM,
211                             (elf_addr_t)(unsigned long)u_platform);
212         }
213         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
214                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
215         }
216 #undef NEW_AUX_ENT
217         /* AT_NULL is zero; clear the rest too */
218         memset(&elf_info[ei_index], 0,
219                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
220
221         /* And advance past the AT_NULL entry.  */
222         ei_index += 2;
223
224         sp = STACK_ADD(p, ei_index);
225
226         items = (argc + 1) + (envc + 1);
227         if (interp_aout) {
228                 items += 3; /* a.out interpreters require argv & envp too */
229         } else {
230                 items += 1; /* ELF interpreters only put argc on the stack */
231         }
232         bprm->p = STACK_ROUND(sp, items);
233
234         /* Point sp at the lowest address on the stack */
235 #ifdef CONFIG_STACK_GROWSUP
236         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
237         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
238 #else
239         sp = (elf_addr_t __user *)bprm->p;
240 #endif
241
242
243         /*
244          * Grow the stack manually; some architectures have a limit on how
245          * far ahead a user-space access may be in order to grow the stack.
246          */
247         vma = find_extend_vma(current->mm, bprm->p);
248         if (!vma)
249                 return -EFAULT;
250
251         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
252         if (__put_user(argc, sp++))
253                 return -EFAULT;
254         if (interp_aout) {
255                 argv = sp + 2;
256                 envp = argv + argc + 1;
257                 if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
258                     __put_user((elf_addr_t)(unsigned long)envp, sp++))
259                         return -EFAULT;
260         } else {
261                 argv = sp;
262                 envp = argv + argc + 1;
263         }
264
265         /* Populate argv and envp */
266         p = current->mm->arg_end = current->mm->arg_start;
267         while (argc-- > 0) {
268                 size_t len;
269                 if (__put_user((elf_addr_t)p, argv++))
270                         return -EFAULT;
271                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
272                 if (!len || len > MAX_ARG_STRLEN)
273                         return 0;
274                 p += len;
275         }
276         if (__put_user(0, argv))
277                 return -EFAULT;
278         current->mm->arg_end = current->mm->env_start = p;
279         while (envc-- > 0) {
280                 size_t len;
281                 if (__put_user((elf_addr_t)p, envp++))
282                         return -EFAULT;
283                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
284                 if (!len || len > MAX_ARG_STRLEN)
285                         return 0;
286                 p += len;
287         }
288         if (__put_user(0, envp))
289                 return -EFAULT;
290         current->mm->env_end = p;
291
292         /* Put the elf_info on the stack in the right place.  */
293         sp = (elf_addr_t __user *)envp + 1;
294         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
295                 return -EFAULT;
296         return 0;
297 }
298
299 #ifndef elf_map
300
301 static unsigned long elf_map(struct file *filep, unsigned long addr,
302                 struct elf_phdr *eppnt, int prot, int type,
303                 unsigned long total_size)
304 {
305         unsigned long map_addr;
306         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
307         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
308         addr = ELF_PAGESTART(addr);
309         size = ELF_PAGEALIGN(size);
310
311         /* mmap() will return -EINVAL if given a zero size, but a
312          * segment with zero filesize is perfectly valid */
313         if (!size)
314                 return addr;
315
316         down_write(&current->mm->mmap_sem);
317         /*
318         * total_size is the size of the ELF (interpreter) image.
319         * The _first_ mmap needs to know the full size, otherwise
320         * randomization might put this image into an overlapping
321         * position with the ELF binary image. (since size < total_size)
322         * So we first map the 'big' image - and unmap the remainder at
323         * the end. (which unmap is needed for ELF images with holes.)
324         */
325         if (total_size) {
326                 total_size = ELF_PAGEALIGN(total_size);
327                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
328                 if (!BAD_ADDR(map_addr))
329                         do_munmap(current->mm, map_addr+size, total_size-size);
330         } else
331                 map_addr = do_mmap(filep, addr, size, prot, type, off);
332
333         up_write(&current->mm->mmap_sem);
334         return(map_addr);
335 }
336
337 #endif /* !elf_map */
338
339 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
340 {
341         int i, first_idx = -1, last_idx = -1;
342
343         for (i = 0; i < nr; i++) {
344                 if (cmds[i].p_type == PT_LOAD) {
345                         last_idx = i;
346                         if (first_idx == -1)
347                                 first_idx = i;
348                 }
349         }
350         if (first_idx == -1)
351                 return 0;
352
353         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
354                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
355 }
356
357
358 /* This is much more generalized than the library routine read function,
359    so we keep this separate.  Technically the library read function
360    is only provided so that we can read a.out libraries that have
361    an ELF header */
362
363 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
364                 struct file *interpreter, unsigned long *interp_map_addr,
365                 unsigned long no_base)
366 {
367         struct elf_phdr *elf_phdata;
368         struct elf_phdr *eppnt;
369         unsigned long load_addr = 0;
370         int load_addr_set = 0;
371         unsigned long last_bss = 0, elf_bss = 0;
372         unsigned long error = ~0UL;
373         unsigned long total_size;
374         int retval, i, size;
375
376         /* First of all, some simple consistency checks */
377         if (interp_elf_ex->e_type != ET_EXEC &&
378             interp_elf_ex->e_type != ET_DYN)
379                 goto out;
380         if (!elf_check_arch(interp_elf_ex))
381                 goto out;
382         if (!interpreter->f_op || !interpreter->f_op->mmap)
383                 goto out;
384
385         /*
386          * If the size of this structure has changed, then punt, since
387          * we will be doing the wrong thing.
388          */
389         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
390                 goto out;
391         if (interp_elf_ex->e_phnum < 1 ||
392                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
393                 goto out;
394
395         /* Now read in all of the header information */
396         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
397         if (size > ELF_MIN_ALIGN)
398                 goto out;
399         elf_phdata = kmalloc(size, GFP_KERNEL);
400         if (!elf_phdata)
401                 goto out;
402
403         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
404                              (char *)elf_phdata,size);
405         error = -EIO;
406         if (retval != size) {
407                 if (retval < 0)
408                         error = retval; 
409                 goto out_close;
410         }
411
412         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
413         if (!total_size) {
414                 error = -EINVAL;
415                 goto out_close;
416         }
417
418         eppnt = elf_phdata;
419         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
420                 if (eppnt->p_type == PT_LOAD) {
421                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
422                         int elf_prot = 0;
423                         unsigned long vaddr = 0;
424                         unsigned long k, map_addr;
425
426                         if (eppnt->p_flags & PF_R)
427                                 elf_prot = PROT_READ;
428                         if (eppnt->p_flags & PF_W)
429                                 elf_prot |= PROT_WRITE;
430                         if (eppnt->p_flags & PF_X)
431                                 elf_prot |= PROT_EXEC;
432                         vaddr = eppnt->p_vaddr;
433                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
434                                 elf_type |= MAP_FIXED;
435                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
436                                 load_addr = -vaddr;
437
438                         map_addr = elf_map(interpreter, load_addr + vaddr,
439                                         eppnt, elf_prot, elf_type, total_size);
440                         total_size = 0;
441                         if (!*interp_map_addr)
442                                 *interp_map_addr = map_addr;
443                         error = map_addr;
444                         if (BAD_ADDR(map_addr))
445                                 goto out_close;
446
447                         if (!load_addr_set &&
448                             interp_elf_ex->e_type == ET_DYN) {
449                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
450                                 load_addr_set = 1;
451                         }
452
453                         /*
454                          * Check to see if the section's size will overflow the
455                          * allowed task size. Note that p_filesz must always be
456                          * <= p_memsize so it's only necessary to check p_memsz.
457                          */
458                         k = load_addr + eppnt->p_vaddr;
459                         if (BAD_ADDR(k) ||
460                             eppnt->p_filesz > eppnt->p_memsz ||
461                             eppnt->p_memsz > TASK_SIZE ||
462                             TASK_SIZE - eppnt->p_memsz < k) {
463                                 error = -ENOMEM;
464                                 goto out_close;
465                         }
466
467                         /*
468                          * Find the end of the file mapping for this phdr, and
469                          * keep track of the largest address we see for this.
470                          */
471                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
472                         if (k > elf_bss)
473                                 elf_bss = k;
474
475                         /*
476                          * Do the same thing for the memory mapping - between
477                          * elf_bss and last_bss is the bss section.
478                          */
479                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
480                         if (k > last_bss)
481                                 last_bss = k;
482                 }
483         }
484
485         /*
486          * Now fill out the bss section.  First pad the last page up
487          * to the page boundary, and then perform a mmap to make sure
488          * that there are zero-mapped pages up to and including the 
489          * last bss page.
490          */
491         if (padzero(elf_bss)) {
492                 error = -EFAULT;
493                 goto out_close;
494         }
495
496         /* What we have mapped so far */
497         elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
498
499         /* Map the last of the bss segment */
500         if (last_bss > elf_bss) {
501                 down_write(&current->mm->mmap_sem);
502                 error = do_brk(elf_bss, last_bss - elf_bss);
503                 up_write(&current->mm->mmap_sem);
504                 if (BAD_ADDR(error))
505                         goto out_close;
506         }
507
508         error = load_addr;
509
510 out_close:
511         kfree(elf_phdata);
512 out:
513         return error;
514 }
515
516 static unsigned long load_aout_interp(struct exec *interp_ex,
517                 struct file *interpreter)
518 {
519         unsigned long text_data, elf_entry = ~0UL;
520         char __user * addr;
521         loff_t offset;
522
523         current->mm->end_code = interp_ex->a_text;
524         text_data = interp_ex->a_text + interp_ex->a_data;
525         current->mm->end_data = text_data;
526         current->mm->brk = interp_ex->a_bss + text_data;
527
528         switch (N_MAGIC(*interp_ex)) {
529         case OMAGIC:
530                 offset = 32;
531                 addr = (char __user *)0;
532                 break;
533         case ZMAGIC:
534         case QMAGIC:
535                 offset = N_TXTOFF(*interp_ex);
536                 addr = (char __user *)N_TXTADDR(*interp_ex);
537                 break;
538         default:
539                 goto out;
540         }
541
542         down_write(&current->mm->mmap_sem);     
543         do_brk(0, text_data);
544         up_write(&current->mm->mmap_sem);
545         if (!interpreter->f_op || !interpreter->f_op->read)
546                 goto out;
547         if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
548                 goto out;
549         flush_icache_range((unsigned long)addr,
550                            (unsigned long)addr + text_data);
551
552         down_write(&current->mm->mmap_sem);     
553         do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
554                 interp_ex->a_bss);
555         up_write(&current->mm->mmap_sem);
556         elf_entry = interp_ex->a_entry;
557
558 out:
559         return elf_entry;
560 }
561
562 /*
563  * These are the functions used to load ELF style executables and shared
564  * libraries.  There is no binary dependent code anywhere else.
565  */
566
567 #define INTERPRETER_NONE 0
568 #define INTERPRETER_AOUT 1
569 #define INTERPRETER_ELF 2
570
571 #ifndef STACK_RND_MASK
572 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
573 #endif
574
575 static unsigned long randomize_stack_top(unsigned long stack_top)
576 {
577         unsigned int random_variable = 0;
578
579         if ((current->flags & PF_RANDOMIZE) &&
580                 !(current->personality & ADDR_NO_RANDOMIZE)) {
581                 random_variable = get_random_int() & STACK_RND_MASK;
582                 random_variable <<= PAGE_SHIFT;
583         }
584 #ifdef CONFIG_STACK_GROWSUP
585         return PAGE_ALIGN(stack_top) + random_variable;
586 #else
587         return PAGE_ALIGN(stack_top) - random_variable;
588 #endif
589 }
590
591 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
592 {
593         struct file *interpreter = NULL; /* to shut gcc up */
594         unsigned long load_addr = 0, load_bias = 0;
595         int load_addr_set = 0;
596         char * elf_interpreter = NULL;
597         unsigned int interpreter_type = INTERPRETER_NONE;
598         unsigned char ibcs2_interpreter = 0;
599         unsigned long error;
600         struct elf_phdr *elf_ppnt, *elf_phdata;
601         unsigned long elf_bss, elf_brk;
602         int elf_exec_fileno;
603         int retval, i;
604         unsigned int size;
605         unsigned long elf_entry;
606         unsigned long interp_load_addr = 0;
607         unsigned long start_code, end_code, start_data, end_data;
608         unsigned long reloc_func_desc = 0;
609         char passed_fileno[6];
610         struct files_struct *files;
611         int executable_stack = EXSTACK_DEFAULT;
612         unsigned long def_flags = 0;
613         struct {
614                 struct elfhdr elf_ex;
615                 struct elfhdr interp_elf_ex;
616                 struct exec interp_ex;
617         } *loc;
618
619         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
620         if (!loc) {
621                 retval = -ENOMEM;
622                 goto out_ret;
623         }
624         
625         /* Get the exec-header */
626         loc->elf_ex = *((struct elfhdr *)bprm->buf);
627
628         retval = -ENOEXEC;
629         /* First of all, some simple consistency checks */
630         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
631                 goto out;
632
633         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
634                 goto out;
635         if (!elf_check_arch(&loc->elf_ex))
636                 goto out;
637         if (!bprm->file->f_op||!bprm->file->f_op->mmap)
638                 goto out;
639
640         /* Now read in all of the header information */
641         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
642                 goto out;
643         if (loc->elf_ex.e_phnum < 1 ||
644                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
645                 goto out;
646         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
647         retval = -ENOMEM;
648         elf_phdata = kmalloc(size, GFP_KERNEL);
649         if (!elf_phdata)
650                 goto out;
651
652         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
653                              (char *)elf_phdata, size);
654         if (retval != size) {
655                 if (retval >= 0)
656                         retval = -EIO;
657                 goto out_free_ph;
658         }
659
660         files = current->files; /* Refcounted so ok */
661         retval = unshare_files();
662         if (retval < 0)
663                 goto out_free_ph;
664         if (files == current->files) {
665                 put_files_struct(files);
666                 files = NULL;
667         }
668
669         /* exec will make our files private anyway, but for the a.out
670            loader stuff we need to do it earlier */
671         retval = get_unused_fd();
672         if (retval < 0)
673                 goto out_free_fh;
674         get_file(bprm->file);
675         fd_install(elf_exec_fileno = retval, bprm->file);
676
677         elf_ppnt = elf_phdata;
678         elf_bss = 0;
679         elf_brk = 0;
680
681         start_code = ~0UL;
682         end_code = 0;
683         start_data = 0;
684         end_data = 0;
685
686         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
687                 if (elf_ppnt->p_type == PT_INTERP) {
688                         /* This is the program interpreter used for
689                          * shared libraries - for now assume that this
690                          * is an a.out format binary
691                          */
692                         retval = -ENOEXEC;
693                         if (elf_ppnt->p_filesz > PATH_MAX || 
694                             elf_ppnt->p_filesz < 2)
695                                 goto out_free_file;
696
697                         retval = -ENOMEM;
698                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
699                                                   GFP_KERNEL);
700                         if (!elf_interpreter)
701                                 goto out_free_file;
702
703                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
704                                              elf_interpreter,
705                                              elf_ppnt->p_filesz);
706                         if (retval != elf_ppnt->p_filesz) {
707                                 if (retval >= 0)
708                                         retval = -EIO;
709                                 goto out_free_interp;
710                         }
711                         /* make sure path is NULL terminated */
712                         retval = -ENOEXEC;
713                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
714                                 goto out_free_interp;
715
716                         /* If the program interpreter is one of these two,
717                          * then assume an iBCS2 image. Otherwise assume
718                          * a native linux image.
719                          */
720                         if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
721                             strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
722                                 ibcs2_interpreter = 1;
723
724                         /*
725                          * The early SET_PERSONALITY here is so that the lookup
726                          * for the interpreter happens in the namespace of the 
727                          * to-be-execed image.  SET_PERSONALITY can select an
728                          * alternate root.
729                          *
730                          * However, SET_PERSONALITY is NOT allowed to switch
731                          * this task into the new images's memory mapping
732                          * policy - that is, TASK_SIZE must still evaluate to
733                          * that which is appropriate to the execing application.
734                          * This is because exit_mmap() needs to have TASK_SIZE
735                          * evaluate to the size of the old image.
736                          *
737                          * So if (say) a 64-bit application is execing a 32-bit
738                          * application it is the architecture's responsibility
739                          * to defer changing the value of TASK_SIZE until the
740                          * switch really is going to happen - do this in
741                          * flush_thread().      - akpm
742                          */
743                         SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
744
745                         interpreter = open_exec(elf_interpreter);
746                         retval = PTR_ERR(interpreter);
747                         if (IS_ERR(interpreter))
748                                 goto out_free_interp;
749
750                         /*
751                          * If the binary is not readable then enforce
752                          * mm->dumpable = 0 regardless of the interpreter's
753                          * permissions.
754                          */
755                         if (file_permission(interpreter, MAY_READ) < 0)
756                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
757
758                         retval = kernel_read(interpreter, 0, bprm->buf,
759                                              BINPRM_BUF_SIZE);
760                         if (retval != BINPRM_BUF_SIZE) {
761                                 if (retval >= 0)
762                                         retval = -EIO;
763                                 goto out_free_dentry;
764                         }
765
766                         /* Get the exec headers */
767                         loc->interp_ex = *((struct exec *)bprm->buf);
768                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
769                         break;
770                 }
771                 elf_ppnt++;
772         }
773
774         elf_ppnt = elf_phdata;
775         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
776                 if (elf_ppnt->p_type == PT_GNU_STACK) {
777                         if (elf_ppnt->p_flags & PF_X)
778                                 executable_stack = EXSTACK_ENABLE_X;
779                         else
780                                 executable_stack = EXSTACK_DISABLE_X;
781                         break;
782                 }
783
784         /* Some simple consistency checks for the interpreter */
785         if (elf_interpreter) {
786                 static int warn;
787                 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
788
789                 /* Now figure out which format our binary is */
790                 if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
791                     (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
792                     (N_MAGIC(loc->interp_ex) != QMAGIC))
793                         interpreter_type = INTERPRETER_ELF;
794
795                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
796                         interpreter_type &= ~INTERPRETER_ELF;
797
798                 if (interpreter_type == INTERPRETER_AOUT && warn < 10) {
799                         printk(KERN_WARNING "a.out ELF interpreter %s is "
800                                 "deprecated and will not be supported "
801                                 "after Linux 2.6.25\n", elf_interpreter);
802                         warn++;
803                 }
804
805                 retval = -ELIBBAD;
806                 if (!interpreter_type)
807                         goto out_free_dentry;
808
809                 /* Make sure only one type was selected */
810                 if ((interpreter_type & INTERPRETER_ELF) &&
811                      interpreter_type != INTERPRETER_ELF) {
812                         // FIXME - ratelimit this before re-enabling
813                         // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
814                         interpreter_type = INTERPRETER_ELF;
815                 }
816                 /* Verify the interpreter has a valid arch */
817                 if ((interpreter_type == INTERPRETER_ELF) &&
818                     !elf_check_arch(&loc->interp_elf_ex))
819                         goto out_free_dentry;
820         } else {
821                 /* Executables without an interpreter also need a personality  */
822                 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
823         }
824
825         /* OK, we are done with that, now set up the arg stuff,
826            and then start this sucker up */
827         if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
828                 char *passed_p = passed_fileno;
829                 sprintf(passed_fileno, "%d", elf_exec_fileno);
830
831                 if (elf_interpreter) {
832                         retval = copy_strings_kernel(1, &passed_p, bprm);
833                         if (retval)
834                                 goto out_free_dentry; 
835                         bprm->argc++;
836                 }
837         }
838
839         /* Flush all traces of the currently running executable */
840         retval = flush_old_exec(bprm);
841         if (retval)
842                 goto out_free_dentry;
843
844         /* Discard our unneeded old files struct */
845         if (files) {
846                 put_files_struct(files);
847                 files = NULL;
848         }
849
850         /* OK, This is the point of no return */
851         current->flags &= ~PF_FORKNOEXEC;
852         current->mm->def_flags = def_flags;
853
854         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
855            may depend on the personality.  */
856         SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
857         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
858                 current->personality |= READ_IMPLIES_EXEC;
859
860         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
861                 current->flags |= PF_RANDOMIZE;
862         arch_pick_mmap_layout(current->mm);
863
864         /* Do this so that we can load the interpreter, if need be.  We will
865            change some of these later */
866         current->mm->free_area_cache = current->mm->mmap_base;
867         current->mm->cached_hole_size = 0;
868         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
869                                  executable_stack);
870         if (retval < 0) {
871                 send_sig(SIGKILL, current, 0);
872                 goto out_free_dentry;
873         }
874         
875         current->mm->start_stack = bprm->p;
876
877         /* Now we do a little grungy work by mmaping the ELF image into
878            the correct location in memory. */
879         for(i = 0, elf_ppnt = elf_phdata;
880             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
881                 int elf_prot = 0, elf_flags;
882                 unsigned long k, vaddr;
883
884                 if (elf_ppnt->p_type != PT_LOAD)
885                         continue;
886
887                 if (unlikely (elf_brk > elf_bss)) {
888                         unsigned long nbyte;
889                     
890                         /* There was a PT_LOAD segment with p_memsz > p_filesz
891                            before this one. Map anonymous pages, if needed,
892                            and clear the area.  */
893                         retval = set_brk (elf_bss + load_bias,
894                                           elf_brk + load_bias);
895                         if (retval) {
896                                 send_sig(SIGKILL, current, 0);
897                                 goto out_free_dentry;
898                         }
899                         nbyte = ELF_PAGEOFFSET(elf_bss);
900                         if (nbyte) {
901                                 nbyte = ELF_MIN_ALIGN - nbyte;
902                                 if (nbyte > elf_brk - elf_bss)
903                                         nbyte = elf_brk - elf_bss;
904                                 if (clear_user((void __user *)elf_bss +
905                                                         load_bias, nbyte)) {
906                                         /*
907                                          * This bss-zeroing can fail if the ELF
908                                          * file specifies odd protections. So
909                                          * we don't check the return value
910                                          */
911                                 }
912                         }
913                 }
914
915                 if (elf_ppnt->p_flags & PF_R)
916                         elf_prot |= PROT_READ;
917                 if (elf_ppnt->p_flags & PF_W)
918                         elf_prot |= PROT_WRITE;
919                 if (elf_ppnt->p_flags & PF_X)
920                         elf_prot |= PROT_EXEC;
921
922                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
923
924                 vaddr = elf_ppnt->p_vaddr;
925                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
926                         elf_flags |= MAP_FIXED;
927                 } else if (loc->elf_ex.e_type == ET_DYN) {
928                         /* Try and get dynamic programs out of the way of the
929                          * default mmap base, as well as whatever program they
930                          * might try to exec.  This is because the brk will
931                          * follow the loader, and is not movable.  */
932 #ifdef CONFIG_X86
933                         load_bias = 0;
934 #else
935                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
936 #endif
937                 }
938
939                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
940                                 elf_prot, elf_flags, 0);
941                 if (BAD_ADDR(error)) {
942                         send_sig(SIGKILL, current, 0);
943                         retval = IS_ERR((void *)error) ?
944                                 PTR_ERR((void*)error) : -EINVAL;
945                         goto out_free_dentry;
946                 }
947
948                 if (!load_addr_set) {
949                         load_addr_set = 1;
950                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
951                         if (loc->elf_ex.e_type == ET_DYN) {
952                                 load_bias += error -
953                                              ELF_PAGESTART(load_bias + vaddr);
954                                 load_addr += load_bias;
955                                 reloc_func_desc = load_bias;
956                         }
957                 }
958                 k = elf_ppnt->p_vaddr;
959                 if (k < start_code)
960                         start_code = k;
961                 if (start_data < k)
962                         start_data = k;
963
964                 /*
965                  * Check to see if the section's size will overflow the
966                  * allowed task size. Note that p_filesz must always be
967                  * <= p_memsz so it is only necessary to check p_memsz.
968                  */
969                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
970                     elf_ppnt->p_memsz > TASK_SIZE ||
971                     TASK_SIZE - elf_ppnt->p_memsz < k) {
972                         /* set_brk can never work. Avoid overflows. */
973                         send_sig(SIGKILL, current, 0);
974                         retval = -EINVAL;
975                         goto out_free_dentry;
976                 }
977
978                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
979
980                 if (k > elf_bss)
981                         elf_bss = k;
982                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
983                         end_code = k;
984                 if (end_data < k)
985                         end_data = k;
986                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
987                 if (k > elf_brk)
988                         elf_brk = k;
989         }
990
991         loc->elf_ex.e_entry += load_bias;
992         elf_bss += load_bias;
993         elf_brk += load_bias;
994         start_code += load_bias;
995         end_code += load_bias;
996         start_data += load_bias;
997         end_data += load_bias;
998
999         /* Calling set_brk effectively mmaps the pages that we need
1000          * for the bss and break sections.  We must do this before
1001          * mapping in the interpreter, to make sure it doesn't wind
1002          * up getting placed where the bss needs to go.
1003          */
1004         retval = set_brk(elf_bss, elf_brk);
1005         if (retval) {
1006                 send_sig(SIGKILL, current, 0);
1007                 goto out_free_dentry;
1008         }
1009         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1010                 send_sig(SIGSEGV, current, 0);
1011                 retval = -EFAULT; /* Nobody gets to see this, but.. */
1012                 goto out_free_dentry;
1013         }
1014
1015         if (elf_interpreter) {
1016                 if (interpreter_type == INTERPRETER_AOUT) {
1017                         elf_entry = load_aout_interp(&loc->interp_ex,
1018                                                      interpreter);
1019                 } else {
1020                         unsigned long uninitialized_var(interp_map_addr);
1021
1022                         elf_entry = load_elf_interp(&loc->interp_elf_ex,
1023                                                     interpreter,
1024                                                     &interp_map_addr,
1025                                                     load_bias);
1026                         if (!IS_ERR((void *)elf_entry)) {
1027                                 /*
1028                                  * load_elf_interp() returns relocation
1029                                  * adjustment
1030                                  */
1031                                 interp_load_addr = elf_entry;
1032                                 elf_entry += loc->interp_elf_ex.e_entry;
1033                         }
1034                 }
1035                 if (BAD_ADDR(elf_entry)) {
1036                         force_sig(SIGSEGV, current);
1037                         retval = IS_ERR((void *)elf_entry) ?
1038                                         (int)elf_entry : -EINVAL;
1039                         goto out_free_dentry;
1040                 }
1041                 reloc_func_desc = interp_load_addr;
1042
1043                 allow_write_access(interpreter);
1044                 fput(interpreter);
1045                 kfree(elf_interpreter);
1046         } else {
1047                 elf_entry = loc->elf_ex.e_entry;
1048                 if (BAD_ADDR(elf_entry)) {
1049                         force_sig(SIGSEGV, current);
1050                         retval = -EINVAL;
1051                         goto out_free_dentry;
1052                 }
1053         }
1054
1055         kfree(elf_phdata);
1056
1057         if (interpreter_type != INTERPRETER_AOUT)
1058                 sys_close(elf_exec_fileno);
1059
1060         set_binfmt(&elf_format);
1061
1062 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1063         retval = arch_setup_additional_pages(bprm, executable_stack);
1064         if (retval < 0) {
1065                 send_sig(SIGKILL, current, 0);
1066                 goto out;
1067         }
1068 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1069
1070         compute_creds(bprm);
1071         current->flags &= ~PF_FORKNOEXEC;
1072         retval = create_elf_tables(bprm, &loc->elf_ex,
1073                           (interpreter_type == INTERPRETER_AOUT),
1074                           load_addr, interp_load_addr);
1075         if (retval < 0) {
1076                 send_sig(SIGKILL, current, 0);
1077                 goto out;
1078         }
1079         /* N.B. passed_fileno might not be initialized? */
1080         if (interpreter_type == INTERPRETER_AOUT)
1081                 current->mm->arg_start += strlen(passed_fileno) + 1;
1082         current->mm->end_code = end_code;
1083         current->mm->start_code = start_code;
1084         current->mm->start_data = start_data;
1085         current->mm->end_data = end_data;
1086         current->mm->start_stack = bprm->p;
1087
1088 #ifdef arch_randomize_brk
1089         if (current->flags & PF_RANDOMIZE)
1090                 current->mm->brk = current->mm->start_brk =
1091                         arch_randomize_brk(current->mm);
1092 #endif
1093
1094         if (current->personality & MMAP_PAGE_ZERO) {
1095                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1096                    and some applications "depend" upon this behavior.
1097                    Since we do not have the power to recompile these, we
1098                    emulate the SVr4 behavior. Sigh. */
1099                 down_write(&current->mm->mmap_sem);
1100                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1101                                 MAP_FIXED | MAP_PRIVATE, 0);
1102                 up_write(&current->mm->mmap_sem);
1103         }
1104
1105 #ifdef ELF_PLAT_INIT
1106         /*
1107          * The ABI may specify that certain registers be set up in special
1108          * ways (on i386 %edx is the address of a DT_FINI function, for
1109          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1110          * that the e_entry field is the address of the function descriptor
1111          * for the startup routine, rather than the address of the startup
1112          * routine itself.  This macro performs whatever initialization to
1113          * the regs structure is required as well as any relocations to the
1114          * function descriptor entries when executing dynamically links apps.
1115          */
1116         ELF_PLAT_INIT(regs, reloc_func_desc);
1117 #endif
1118
1119         start_thread(regs, elf_entry, bprm->p);
1120         if (unlikely(current->ptrace & PT_PTRACED)) {
1121                 if (current->ptrace & PT_TRACE_EXEC)
1122                         ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1123                 else
1124                         send_sig(SIGTRAP, current, 0);
1125         }
1126         retval = 0;
1127 out:
1128         kfree(loc);
1129 out_ret:
1130         return retval;
1131
1132         /* error cleanup */
1133 out_free_dentry:
1134         allow_write_access(interpreter);
1135         if (interpreter)
1136                 fput(interpreter);
1137 out_free_interp:
1138         kfree(elf_interpreter);
1139 out_free_file:
1140         sys_close(elf_exec_fileno);
1141 out_free_fh:
1142         if (files)
1143                 reset_files_struct(current, files);
1144 out_free_ph:
1145         kfree(elf_phdata);
1146         goto out;
1147 }
1148
1149 /* This is really simpleminded and specialized - we are loading an
1150    a.out library that is given an ELF header. */
1151 static int load_elf_library(struct file *file)
1152 {
1153         struct elf_phdr *elf_phdata;
1154         struct elf_phdr *eppnt;
1155         unsigned long elf_bss, bss, len;
1156         int retval, error, i, j;
1157         struct elfhdr elf_ex;
1158
1159         error = -ENOEXEC;
1160         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1161         if (retval != sizeof(elf_ex))
1162                 goto out;
1163
1164         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1165                 goto out;
1166
1167         /* First of all, some simple consistency checks */
1168         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1169             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1170                 goto out;
1171
1172         /* Now read in all of the header information */
1173
1174         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1175         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1176
1177         error = -ENOMEM;
1178         elf_phdata = kmalloc(j, GFP_KERNEL);
1179         if (!elf_phdata)
1180                 goto out;
1181
1182         eppnt = elf_phdata;
1183         error = -ENOEXEC;
1184         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1185         if (retval != j)
1186                 goto out_free_ph;
1187
1188         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1189                 if ((eppnt + i)->p_type == PT_LOAD)
1190                         j++;
1191         if (j != 1)
1192                 goto out_free_ph;
1193
1194         while (eppnt->p_type != PT_LOAD)
1195                 eppnt++;
1196
1197         /* Now use mmap to map the library into memory. */
1198         down_write(&current->mm->mmap_sem);
1199         error = do_mmap(file,
1200                         ELF_PAGESTART(eppnt->p_vaddr),
1201                         (eppnt->p_filesz +
1202                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1203                         PROT_READ | PROT_WRITE | PROT_EXEC,
1204                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1205                         (eppnt->p_offset -
1206                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1207         up_write(&current->mm->mmap_sem);
1208         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1209                 goto out_free_ph;
1210
1211         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1212         if (padzero(elf_bss)) {
1213                 error = -EFAULT;
1214                 goto out_free_ph;
1215         }
1216
1217         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1218                             ELF_MIN_ALIGN - 1);
1219         bss = eppnt->p_memsz + eppnt->p_vaddr;
1220         if (bss > len) {
1221                 down_write(&current->mm->mmap_sem);
1222                 do_brk(len, bss - len);
1223                 up_write(&current->mm->mmap_sem);
1224         }
1225         error = 0;
1226
1227 out_free_ph:
1228         kfree(elf_phdata);
1229 out:
1230         return error;
1231 }
1232
1233 /*
1234  * Note that some platforms still use traditional core dumps and not
1235  * the ELF core dump.  Each platform can select it as appropriate.
1236  */
1237 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1238
1239 /*
1240  * ELF core dumper
1241  *
1242  * Modelled on fs/exec.c:aout_core_dump()
1243  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1244  */
1245 /*
1246  * These are the only things you should do on a core-file: use only these
1247  * functions to write out all the necessary info.
1248  */
1249 static int dump_write(struct file *file, const void *addr, int nr)
1250 {
1251         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1252 }
1253
1254 static int dump_seek(struct file *file, loff_t off)
1255 {
1256         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1257                 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1258                         return 0;
1259         } else {
1260                 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1261                 if (!buf)
1262                         return 0;
1263                 while (off > 0) {
1264                         unsigned long n = off;
1265                         if (n > PAGE_SIZE)
1266                                 n = PAGE_SIZE;
1267                         if (!dump_write(file, buf, n))
1268                                 return 0;
1269                         off -= n;
1270                 }
1271                 free_page((unsigned long)buf);
1272         }
1273         return 1;
1274 }
1275
1276 /*
1277  * Decide what to dump of a segment, part, all or none.
1278  */
1279 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1280                                    unsigned long mm_flags)
1281 {
1282         /* The vma can be set up to tell us the answer directly.  */
1283         if (vma->vm_flags & VM_ALWAYSDUMP)
1284                 goto whole;
1285
1286         /* Do not dump I/O mapped devices or special mappings */
1287         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1288                 return 0;
1289
1290 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1291
1292         /* By default, dump shared memory if mapped from an anonymous file. */
1293         if (vma->vm_flags & VM_SHARED) {
1294                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1295                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1296                         goto whole;
1297                 return 0;
1298         }
1299
1300         /* Dump segments that have been written to.  */
1301         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1302                 goto whole;
1303         if (vma->vm_file == NULL)
1304                 return 0;
1305
1306         if (FILTER(MAPPED_PRIVATE))
1307                 goto whole;
1308
1309         /*
1310          * If this looks like the beginning of a DSO or executable mapping,
1311          * check for an ELF header.  If we find one, dump the first page to
1312          * aid in determining what was mapped here.
1313          */
1314         if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1315                 u32 __user *header = (u32 __user *) vma->vm_start;
1316                 u32 word;
1317                 /*
1318                  * Doing it this way gets the constant folded by GCC.
1319                  */
1320                 union {
1321                         u32 cmp;
1322                         char elfmag[SELFMAG];
1323                 } magic;
1324                 BUILD_BUG_ON(SELFMAG != sizeof word);
1325                 magic.elfmag[EI_MAG0] = ELFMAG0;
1326                 magic.elfmag[EI_MAG1] = ELFMAG1;
1327                 magic.elfmag[EI_MAG2] = ELFMAG2;
1328                 magic.elfmag[EI_MAG3] = ELFMAG3;
1329                 if (get_user(word, header) == 0 && word == magic.cmp)
1330                         return PAGE_SIZE;
1331         }
1332
1333 #undef  FILTER
1334
1335         return 0;
1336
1337 whole:
1338         return vma->vm_end - vma->vm_start;
1339 }
1340
1341 /* An ELF note in memory */
1342 struct memelfnote
1343 {
1344         const char *name;
1345         int type;
1346         unsigned int datasz;
1347         void *data;
1348 };
1349
1350 static int notesize(struct memelfnote *en)
1351 {
1352         int sz;
1353
1354         sz = sizeof(struct elf_note);
1355         sz += roundup(strlen(en->name) + 1, 4);
1356         sz += roundup(en->datasz, 4);
1357
1358         return sz;
1359 }
1360
1361 #define DUMP_WRITE(addr, nr, foffset)   \
1362         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1363
1364 static int alignfile(struct file *file, loff_t *foffset)
1365 {
1366         static const char buf[4] = { 0, };
1367         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1368         return 1;
1369 }
1370
1371 static int writenote(struct memelfnote *men, struct file *file,
1372                         loff_t *foffset)
1373 {
1374         struct elf_note en;
1375         en.n_namesz = strlen(men->name) + 1;
1376         en.n_descsz = men->datasz;
1377         en.n_type = men->type;
1378
1379         DUMP_WRITE(&en, sizeof(en), foffset);
1380         DUMP_WRITE(men->name, en.n_namesz, foffset);
1381         if (!alignfile(file, foffset))
1382                 return 0;
1383         DUMP_WRITE(men->data, men->datasz, foffset);
1384         if (!alignfile(file, foffset))
1385                 return 0;
1386
1387         return 1;
1388 }
1389 #undef DUMP_WRITE
1390
1391 #define DUMP_WRITE(addr, nr)    \
1392         if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1393                 goto end_coredump;
1394 #define DUMP_SEEK(off)  \
1395         if (!dump_seek(file, (off))) \
1396                 goto end_coredump;
1397
1398 static void fill_elf_header(struct elfhdr *elf, int segs,
1399                             u16 machine, u32 flags, u8 osabi)
1400 {
1401         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1402         elf->e_ident[EI_CLASS] = ELF_CLASS;
1403         elf->e_ident[EI_DATA] = ELF_DATA;
1404         elf->e_ident[EI_VERSION] = EV_CURRENT;
1405         elf->e_ident[EI_OSABI] = ELF_OSABI;
1406         memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1407
1408         elf->e_type = ET_CORE;
1409         elf->e_machine = machine;
1410         elf->e_version = EV_CURRENT;
1411         elf->e_entry = 0;
1412         elf->e_phoff = sizeof(struct elfhdr);
1413         elf->e_shoff = 0;
1414         elf->e_flags = flags;
1415         elf->e_ehsize = sizeof(struct elfhdr);
1416         elf->e_phentsize = sizeof(struct elf_phdr);
1417         elf->e_phnum = segs;
1418         elf->e_shentsize = 0;
1419         elf->e_shnum = 0;
1420         elf->e_shstrndx = 0;
1421         return;
1422 }
1423
1424 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1425 {
1426         phdr->p_type = PT_NOTE;
1427         phdr->p_offset = offset;
1428         phdr->p_vaddr = 0;
1429         phdr->p_paddr = 0;
1430         phdr->p_filesz = sz;
1431         phdr->p_memsz = 0;
1432         phdr->p_flags = 0;
1433         phdr->p_align = 0;
1434         return;
1435 }
1436
1437 static void fill_note(struct memelfnote *note, const char *name, int type, 
1438                 unsigned int sz, void *data)
1439 {
1440         note->name = name;
1441         note->type = type;
1442         note->datasz = sz;
1443         note->data = data;
1444         return;
1445 }
1446
1447 /*
1448  * fill up all the fields in prstatus from the given task struct, except
1449  * registers which need to be filled up separately.
1450  */
1451 static void fill_prstatus(struct elf_prstatus *prstatus,
1452                 struct task_struct *p, long signr)
1453 {
1454         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1455         prstatus->pr_sigpend = p->pending.signal.sig[0];
1456         prstatus->pr_sighold = p->blocked.sig[0];
1457         prstatus->pr_pid = task_pid_vnr(p);
1458         prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1459         prstatus->pr_pgrp = task_pgrp_vnr(p);
1460         prstatus->pr_sid = task_session_vnr(p);
1461         if (thread_group_leader(p)) {
1462                 /*
1463                  * This is the record for the group leader.  Add in the
1464                  * cumulative times of previous dead threads.  This total
1465                  * won't include the time of each live thread whose state
1466                  * is included in the core dump.  The final total reported
1467                  * to our parent process when it calls wait4 will include
1468                  * those sums as well as the little bit more time it takes
1469                  * this and each other thread to finish dying after the
1470                  * core dump synchronization phase.
1471                  */
1472                 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1473                                    &prstatus->pr_utime);
1474                 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1475                                    &prstatus->pr_stime);
1476         } else {
1477                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1478                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1479         }
1480         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1481         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1482 }
1483
1484 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1485                        struct mm_struct *mm)
1486 {
1487         unsigned int i, len;
1488         
1489         /* first copy the parameters from user space */
1490         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1491
1492         len = mm->arg_end - mm->arg_start;
1493         if (len >= ELF_PRARGSZ)
1494                 len = ELF_PRARGSZ-1;
1495         if (copy_from_user(&psinfo->pr_psargs,
1496                            (const char __user *)mm->arg_start, len))
1497                 return -EFAULT;
1498         for(i = 0; i < len; i++)
1499                 if (psinfo->pr_psargs[i] == 0)
1500                         psinfo->pr_psargs[i] = ' ';
1501         psinfo->pr_psargs[len] = 0;
1502
1503         psinfo->pr_pid = task_pid_vnr(p);
1504         psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1505         psinfo->pr_pgrp = task_pgrp_vnr(p);
1506         psinfo->pr_sid = task_session_vnr(p);
1507
1508         i = p->state ? ffz(~p->state) + 1 : 0;
1509         psinfo->pr_state = i;
1510         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1511         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1512         psinfo->pr_nice = task_nice(p);
1513         psinfo->pr_flag = p->flags;
1514         SET_UID(psinfo->pr_uid, p->uid);
1515         SET_GID(psinfo->pr_gid, p->gid);
1516         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1517         
1518         return 0;
1519 }
1520
1521 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1522 {
1523         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1524         int i = 0;
1525         do
1526                 i += 2;
1527         while (auxv[i - 2] != AT_NULL);
1528         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1529 }
1530
1531 #ifdef CORE_DUMP_USE_REGSET
1532 #include <linux/regset.h>
1533
1534 struct elf_thread_core_info {
1535         struct elf_thread_core_info *next;
1536         struct task_struct *task;
1537         struct elf_prstatus prstatus;
1538         struct memelfnote notes[0];
1539 };
1540
1541 struct elf_note_info {
1542         struct elf_thread_core_info *thread;
1543         struct memelfnote psinfo;
1544         struct memelfnote auxv;
1545         size_t size;
1546         int thread_notes;
1547 };
1548
1549 static int fill_thread_core_info(struct elf_thread_core_info *t,
1550                                  const struct user_regset_view *view,
1551                                  long signr, size_t *total)
1552 {
1553         unsigned int i;
1554
1555         /*
1556          * NT_PRSTATUS is the one special case, because the regset data
1557          * goes into the pr_reg field inside the note contents, rather
1558          * than being the whole note contents.  We fill the reset in here.
1559          * We assume that regset 0 is NT_PRSTATUS.
1560          */
1561         fill_prstatus(&t->prstatus, t->task, signr);
1562         (void) view->regsets[0].get(t->task, &view->regsets[0],
1563                                     0, sizeof(t->prstatus.pr_reg),
1564                                     &t->prstatus.pr_reg, NULL);
1565
1566         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1567                   sizeof(t->prstatus), &t->prstatus);
1568         *total += notesize(&t->notes[0]);
1569
1570         /*
1571          * Each other regset might generate a note too.  For each regset
1572          * that has no core_note_type or is inactive, we leave t->notes[i]
1573          * all zero and we'll know to skip writing it later.
1574          */
1575         for (i = 1; i < view->n; ++i) {
1576                 const struct user_regset *regset = &view->regsets[i];
1577                 if (regset->core_note_type &&
1578                     (!regset->active || regset->active(t->task, regset))) {
1579                         int ret;
1580                         size_t size = regset->n * regset->size;
1581                         void *data = kmalloc(size, GFP_KERNEL);
1582                         if (unlikely(!data))
1583                                 return 0;
1584                         ret = regset->get(t->task, regset,
1585                                           0, size, data, NULL);
1586                         if (unlikely(ret))
1587                                 kfree(data);
1588                         else {
1589                                 if (regset->core_note_type != NT_PRFPREG)
1590                                         fill_note(&t->notes[i], "LINUX",
1591                                                   regset->core_note_type,
1592                                                   size, data);
1593                                 else {
1594                                         t->prstatus.pr_fpvalid = 1;
1595                                         fill_note(&t->notes[i], "CORE",
1596                                                   NT_PRFPREG, size, data);
1597                                 }
1598                                 *total += notesize(&t->notes[i]);
1599                         }
1600                 }
1601         }
1602
1603         return 1;
1604 }
1605
1606 static int fill_note_info(struct elfhdr *elf, int phdrs,
1607                           struct elf_note_info *info,
1608                           long signr, struct pt_regs *regs)
1609 {
1610         struct task_struct *dump_task = current;
1611         const struct user_regset_view *view = task_user_regset_view(dump_task);
1612         struct elf_thread_core_info *t;
1613         struct elf_prpsinfo *psinfo;
1614         struct task_struct *g, *p;
1615         unsigned int i;
1616
1617         info->size = 0;
1618         info->thread = NULL;
1619
1620         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1621         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1622
1623         if (psinfo == NULL)
1624                 return 0;
1625
1626         /*
1627          * Figure out how many notes we're going to need for each thread.
1628          */
1629         info->thread_notes = 0;
1630         for (i = 0; i < view->n; ++i)
1631                 if (view->regsets[i].core_note_type != 0)
1632                         ++info->thread_notes;
1633
1634         /*
1635          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1636          * since it is our one special case.
1637          */
1638         if (unlikely(info->thread_notes == 0) ||
1639             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1640                 WARN_ON(1);
1641                 return 0;
1642         }
1643
1644         /*
1645          * Initialize the ELF file header.
1646          */
1647         fill_elf_header(elf, phdrs,
1648                         view->e_machine, view->e_flags, view->ei_osabi);
1649
1650         /*
1651          * Allocate a structure for each thread.
1652          */
1653         rcu_read_lock();
1654         do_each_thread(g, p)
1655                 if (p->mm == dump_task->mm) {
1656                         t = kzalloc(offsetof(struct elf_thread_core_info,
1657                                              notes[info->thread_notes]),
1658                                     GFP_ATOMIC);
1659                         if (unlikely(!t)) {
1660                                 rcu_read_unlock();
1661                                 return 0;
1662                         }
1663                         t->task = p;
1664                         if (p == dump_task || !info->thread) {
1665                                 t->next = info->thread;
1666                                 info->thread = t;
1667                         } else {
1668                                 /*
1669                                  * Make sure to keep the original task at
1670                                  * the head of the list.
1671                                  */
1672                                 t->next = info->thread->next;
1673                                 info->thread->next = t;
1674                         }
1675                 }
1676         while_each_thread(g, p);
1677         rcu_read_unlock();
1678
1679         /*
1680          * Now fill in each thread's information.
1681          */
1682         for (t = info->thread; t != NULL; t = t->next)
1683                 if (!fill_thread_core_info(t, view, signr, &info->size))
1684                         return 0;
1685
1686         /*
1687          * Fill in the two process-wide notes.
1688          */
1689         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1690         info->size += notesize(&info->psinfo);
1691
1692         fill_auxv_note(&info->auxv, current->mm);
1693         info->size += notesize(&info->auxv);
1694
1695         return 1;
1696 }
1697
1698 static size_t get_note_info_size(struct elf_note_info *info)
1699 {
1700         return info->size;
1701 }
1702
1703 /*
1704  * Write all the notes for each thread.  When writing the first thread, the
1705  * process-wide notes are interleaved after the first thread-specific note.
1706  */
1707 static int write_note_info(struct elf_note_info *info,
1708                            struct file *file, loff_t *foffset)
1709 {
1710         bool first = 1;
1711         struct elf_thread_core_info *t = info->thread;
1712
1713         do {
1714                 int i;
1715
1716                 if (!writenote(&t->notes[0], file, foffset))
1717                         return 0;
1718
1719                 if (first && !writenote(&info->psinfo, file, foffset))
1720                         return 0;
1721                 if (first && !writenote(&info->auxv, file, foffset))
1722                         return 0;
1723
1724                 for (i = 1; i < info->thread_notes; ++i)
1725                         if (t->notes[i].data &&
1726                             !writenote(&t->notes[i], file, foffset))
1727                                 return 0;
1728
1729                 first = 0;
1730                 t = t->next;
1731         } while (t);
1732
1733         return 1;
1734 }
1735
1736 static void free_note_info(struct elf_note_info *info)
1737 {
1738         struct elf_thread_core_info *threads = info->thread;
1739         while (threads) {
1740                 unsigned int i;
1741                 struct elf_thread_core_info *t = threads;
1742                 threads = t->next;
1743                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1744                 for (i = 1; i < info->thread_notes; ++i)
1745                         kfree(t->notes[i].data);
1746                 kfree(t);
1747         }
1748         kfree(info->psinfo.data);
1749 }
1750
1751 #else
1752
1753 /* Here is the structure in which status of each thread is captured. */
1754 struct elf_thread_status
1755 {
1756         struct list_head list;
1757         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1758         elf_fpregset_t fpu;             /* NT_PRFPREG */
1759         struct task_struct *thread;
1760 #ifdef ELF_CORE_COPY_XFPREGS
1761         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1762 #endif
1763         struct memelfnote notes[3];
1764         int num_notes;
1765 };
1766
1767 /*
1768  * In order to add the specific thread information for the elf file format,
1769  * we need to keep a linked list of every threads pr_status and then create
1770  * a single section for them in the final core file.
1771  */
1772 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1773 {
1774         int sz = 0;
1775         struct task_struct *p = t->thread;
1776         t->num_notes = 0;
1777
1778         fill_prstatus(&t->prstatus, p, signr);
1779         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1780         
1781         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1782                   &(t->prstatus));
1783         t->num_notes++;
1784         sz += notesize(&t->notes[0]);
1785
1786         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1787                                                                 &t->fpu))) {
1788                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1789                           &(t->fpu));
1790                 t->num_notes++;
1791                 sz += notesize(&t->notes[1]);
1792         }
1793
1794 #ifdef ELF_CORE_COPY_XFPREGS
1795         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1796                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1797                           sizeof(t->xfpu), &t->xfpu);
1798                 t->num_notes++;
1799                 sz += notesize(&t->notes[2]);
1800         }
1801 #endif  
1802         return sz;
1803 }
1804
1805 struct elf_note_info {
1806         struct memelfnote *notes;
1807         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1808         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1809         struct list_head thread_list;
1810         elf_fpregset_t *fpu;
1811 #ifdef ELF_CORE_COPY_XFPREGS
1812         elf_fpxregset_t *xfpu;
1813 #endif
1814         int thread_status_size;
1815         int numnote;
1816 };
1817
1818 static int fill_note_info(struct elfhdr *elf, int phdrs,
1819                           struct elf_note_info *info,
1820                           long signr, struct pt_regs *regs)
1821 {
1822 #define NUM_NOTES       6
1823         struct list_head *t;
1824         struct task_struct *g, *p;
1825
1826         info->notes = NULL;
1827         info->prstatus = NULL;
1828         info->psinfo = NULL;
1829         info->fpu = NULL;
1830 #ifdef ELF_CORE_COPY_XFPREGS
1831         info->xfpu = NULL;
1832 #endif
1833         INIT_LIST_HEAD(&info->thread_list);
1834
1835         info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1836                               GFP_KERNEL);
1837         if (!info->notes)
1838                 return 0;
1839         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1840         if (!info->psinfo)
1841                 return 0;
1842         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1843         if (!info->prstatus)
1844                 return 0;
1845         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1846         if (!info->fpu)
1847                 return 0;
1848 #ifdef ELF_CORE_COPY_XFPREGS
1849         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1850         if (!info->xfpu)
1851                 return 0;
1852 #endif
1853
1854         info->thread_status_size = 0;
1855         if (signr) {
1856                 struct elf_thread_status *tmp;
1857                 rcu_read_lock();
1858                 do_each_thread(g, p)
1859                         if (current->mm == p->mm && current != p) {
1860                                 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1861                                 if (!tmp) {
1862                                         rcu_read_unlock();
1863                                         return 0;
1864                                 }
1865                                 tmp->thread = p;
1866                                 list_add(&tmp->list, &info->thread_list);
1867                         }
1868                 while_each_thread(g, p);
1869                 rcu_read_unlock();
1870                 list_for_each(t, &info->thread_list) {
1871                         struct elf_thread_status *tmp;
1872                         int sz;
1873
1874                         tmp = list_entry(t, struct elf_thread_status, list);
1875                         sz = elf_dump_thread_status(signr, tmp);
1876                         info->thread_status_size += sz;
1877                 }
1878         }
1879         /* now collect the dump for the current */
1880         memset(info->prstatus, 0, sizeof(*info->prstatus));
1881         fill_prstatus(info->prstatus, current, signr);
1882         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1883
1884         /* Set up header */
1885         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1886
1887         /*
1888          * Set up the notes in similar form to SVR4 core dumps made
1889          * with info from their /proc.
1890          */
1891
1892         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1893                   sizeof(*info->prstatus), info->prstatus);
1894         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1895         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1896                   sizeof(*info->psinfo), info->psinfo);
1897
1898         info->numnote = 2;
1899
1900         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1901
1902         /* Try to dump the FPU. */
1903         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1904                                                                info->fpu);
1905         if (info->prstatus->pr_fpvalid)
1906                 fill_note(info->notes + info->numnote++,
1907                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1908 #ifdef ELF_CORE_COPY_XFPREGS
1909         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1910                 fill_note(info->notes + info->numnote++,
1911                           "LINUX", ELF_CORE_XFPREG_TYPE,
1912                           sizeof(*info->xfpu), info->xfpu);
1913 #endif
1914
1915         return 1;
1916
1917 #undef NUM_NOTES
1918 }
1919
1920 static size_t get_note_info_size(struct elf_note_info *info)
1921 {
1922         int sz = 0;
1923         int i;
1924
1925         for (i = 0; i < info->numnote; i++)
1926                 sz += notesize(info->notes + i);
1927
1928         sz += info->thread_status_size;
1929
1930         return sz;
1931 }
1932
1933 static int write_note_info(struct elf_note_info *info,
1934                            struct file *file, loff_t *foffset)
1935 {
1936         int i;
1937         struct list_head *t;
1938
1939         for (i = 0; i < info->numnote; i++)
1940                 if (!writenote(info->notes + i, file, foffset))
1941                         return 0;
1942
1943         /* write out the thread status notes section */
1944         list_for_each(t, &info->thread_list) {
1945                 struct elf_thread_status *tmp =
1946                                 list_entry(t, struct elf_thread_status, list);
1947
1948                 for (i = 0; i < tmp->num_notes; i++)
1949                         if (!writenote(&tmp->notes[i], file, foffset))
1950                                 return 0;
1951         }
1952
1953         return 1;
1954 }
1955
1956 static void free_note_info(struct elf_note_info *info)
1957 {
1958         while (!list_empty(&info->thread_list)) {
1959                 struct list_head *tmp = info->thread_list.next;
1960                 list_del(tmp);
1961                 kfree(list_entry(tmp, struct elf_thread_status, list));
1962         }
1963
1964         kfree(info->prstatus);
1965         kfree(info->psinfo);
1966         kfree(info->notes);
1967         kfree(info->fpu);
1968 #ifdef ELF_CORE_COPY_XFPREGS
1969         kfree(info->xfpu);
1970 #endif
1971 }
1972
1973 #endif
1974
1975 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1976                                         struct vm_area_struct *gate_vma)
1977 {
1978         struct vm_area_struct *ret = tsk->mm->mmap;
1979
1980         if (ret)
1981                 return ret;
1982         return gate_vma;
1983 }
1984 /*
1985  * Helper function for iterating across a vma list.  It ensures that the caller
1986  * will visit `gate_vma' prior to terminating the search.
1987  */
1988 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1989                                         struct vm_area_struct *gate_vma)
1990 {
1991         struct vm_area_struct *ret;
1992
1993         ret = this_vma->vm_next;
1994         if (ret)
1995                 return ret;
1996         if (this_vma == gate_vma)
1997                 return NULL;
1998         return gate_vma;
1999 }
2000
2001 /*
2002  * Actual dumper
2003  *
2004  * This is a two-pass process; first we find the offsets of the bits,
2005  * and then they are actually written out.  If we run out of core limit
2006  * we just truncate.
2007  */
2008 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
2009 {
2010         int has_dumped = 0;
2011         mm_segment_t fs;
2012         int segs;
2013         size_t size = 0;
2014         struct vm_area_struct *vma, *gate_vma;
2015         struct elfhdr *elf = NULL;
2016         loff_t offset = 0, dataoff, foffset;
2017         unsigned long mm_flags;
2018         struct elf_note_info info;
2019
2020         /*
2021          * We no longer stop all VM operations.
2022          * 
2023          * This is because those proceses that could possibly change map_count
2024          * or the mmap / vma pages are now blocked in do_exit on current
2025          * finishing this core dump.
2026          *
2027          * Only ptrace can touch these memory addresses, but it doesn't change
2028          * the map_count or the pages allocated. So no possibility of crashing
2029          * exists while dumping the mm->vm_next areas to the core file.
2030          */
2031   
2032         /* alloc memory for large data structures: too large to be on stack */
2033         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2034         if (!elf)
2035                 goto cleanup;
2036         
2037         segs = current->mm->map_count;
2038 #ifdef ELF_CORE_EXTRA_PHDRS
2039         segs += ELF_CORE_EXTRA_PHDRS;
2040 #endif
2041
2042         gate_vma = get_gate_vma(current);
2043         if (gate_vma != NULL)
2044                 segs++;
2045
2046         /*
2047          * Collect all the non-memory information about the process for the
2048          * notes.  This also sets up the file header.
2049          */
2050         if (!fill_note_info(elf, segs + 1, /* including notes section */
2051                             &info, signr, regs))
2052                 goto cleanup;
2053
2054         has_dumped = 1;
2055         current->flags |= PF_DUMPCORE;
2056   
2057         fs = get_fs();
2058         set_fs(KERNEL_DS);
2059
2060         DUMP_WRITE(elf, sizeof(*elf));
2061         offset += sizeof(*elf);                         /* Elf header */
2062         offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
2063         foffset = offset;
2064
2065         /* Write notes phdr entry */
2066         {
2067                 struct elf_phdr phdr;
2068                 size_t sz = get_note_info_size(&info);
2069
2070                 sz += elf_coredump_extra_notes_size();
2071
2072                 fill_elf_note_phdr(&phdr, sz, offset);
2073                 offset += sz;
2074                 DUMP_WRITE(&phdr, sizeof(phdr));
2075         }
2076
2077         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2078
2079         /*
2080          * We must use the same mm->flags while dumping core to avoid
2081          * inconsistency between the program headers and bodies, otherwise an
2082          * unusable core file can be generated.
2083          */
2084         mm_flags = current->mm->flags;
2085
2086         /* Write program headers for segments dump */
2087         for (vma = first_vma(current, gate_vma); vma != NULL;
2088                         vma = next_vma(vma, gate_vma)) {
2089                 struct elf_phdr phdr;
2090
2091                 phdr.p_type = PT_LOAD;
2092                 phdr.p_offset = offset;
2093                 phdr.p_vaddr = vma->vm_start;
2094                 phdr.p_paddr = 0;
2095                 phdr.p_filesz = vma_dump_size(vma, mm_flags);
2096                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2097                 offset += phdr.p_filesz;
2098                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2099                 if (vma->vm_flags & VM_WRITE)
2100                         phdr.p_flags |= PF_W;
2101                 if (vma->vm_flags & VM_EXEC)
2102                         phdr.p_flags |= PF_X;
2103                 phdr.p_align = ELF_EXEC_PAGESIZE;
2104
2105                 DUMP_WRITE(&phdr, sizeof(phdr));
2106         }
2107
2108 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2109         ELF_CORE_WRITE_EXTRA_PHDRS;
2110 #endif
2111
2112         /* write out the notes section */
2113         if (!write_note_info(&info, file, &foffset))
2114                 goto end_coredump;
2115
2116         if (elf_coredump_extra_notes_write(file, &foffset))
2117                 goto end_coredump;
2118
2119         /* Align to page */
2120         DUMP_SEEK(dataoff - foffset);
2121
2122         for (vma = first_vma(current, gate_vma); vma != NULL;
2123                         vma = next_vma(vma, gate_vma)) {
2124                 unsigned long addr;
2125                 unsigned long end;
2126
2127                 end = vma->vm_start + vma_dump_size(vma, mm_flags);
2128
2129                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2130                         struct page *page;
2131                         struct vm_area_struct *vma;
2132
2133                         if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2134                                                 &page, &vma) <= 0) {
2135                                 DUMP_SEEK(PAGE_SIZE);
2136                         } else {
2137                                 if (page == ZERO_PAGE(0)) {
2138                                         if (!dump_seek(file, PAGE_SIZE)) {
2139                                                 page_cache_release(page);
2140                                                 goto end_coredump;
2141                                         }
2142                                 } else {
2143                                         void *kaddr;
2144                                         flush_cache_page(vma, addr,
2145                                                          page_to_pfn(page));
2146                                         kaddr = kmap(page);
2147                                         if ((size += PAGE_SIZE) > limit ||
2148                                             !dump_write(file, kaddr,
2149                                             PAGE_SIZE)) {
2150                                                 kunmap(page);
2151                                                 page_cache_release(page);
2152                                                 goto end_coredump;
2153                                         }
2154                                         kunmap(page);
2155                                 }
2156                                 page_cache_release(page);
2157                         }
2158                 }
2159         }
2160
2161 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2162         ELF_CORE_WRITE_EXTRA_DATA;
2163 #endif
2164
2165 end_coredump:
2166         set_fs(fs);
2167
2168 cleanup:
2169         kfree(elf);
2170         free_note_info(&info);
2171         return has_dumped;
2172 }
2173
2174 #endif          /* USE_ELF_CORE_DUMP */
2175
2176 static int __init init_elf_binfmt(void)
2177 {
2178         return register_binfmt(&elf_format);
2179 }
2180
2181 static void __exit exit_elf_binfmt(void)
2182 {
2183         /* Remove the COFF and ELF loaders. */
2184         unregister_binfmt(&elf_format);
2185 }
2186
2187 core_initcall(init_elf_binfmt);
2188 module_exit(exit_elf_binfmt);
2189 MODULE_LICENSE("GPL");