aout: suppress A.OUT library support if !CONFIG_ARCH_SUPPORTS_AOUT
[linux-2.6] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/compiler.h>
35 #include <linux/highmem.h>
36 #include <linux/pagemap.h>
37 #include <linux/security.h>
38 #include <linux/syscalls.h>
39 #include <linux/random.h>
40 #include <linux/elf.h>
41 #include <linux/utsname.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
49                                 int, int, unsigned long);
50
51 /*
52  * If we don't support core dumping, then supply a NULL so we
53  * don't even try.
54  */
55 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
56 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
57 #else
58 #define elf_core_dump   NULL
59 #endif
60
61 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
62 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
63 #else
64 #define ELF_MIN_ALIGN   PAGE_SIZE
65 #endif
66
67 #ifndef ELF_CORE_EFLAGS
68 #define ELF_CORE_EFLAGS 0
69 #endif
70
71 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
72 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
73 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
74
75 static struct linux_binfmt elf_format = {
76                 .module         = THIS_MODULE,
77                 .load_binary    = load_elf_binary,
78                 .load_shlib     = load_elf_library,
79                 .core_dump      = elf_core_dump,
80                 .min_coredump   = ELF_EXEC_PAGESIZE,
81                 .hasvdso        = 1
82 };
83
84 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
85
86 static int set_brk(unsigned long start, unsigned long end)
87 {
88         start = ELF_PAGEALIGN(start);
89         end = ELF_PAGEALIGN(end);
90         if (end > start) {
91                 unsigned long addr;
92                 down_write(&current->mm->mmap_sem);
93                 addr = do_brk(start, end - start);
94                 up_write(&current->mm->mmap_sem);
95                 if (BAD_ADDR(addr))
96                         return addr;
97         }
98         current->mm->start_brk = current->mm->brk = end;
99         return 0;
100 }
101
102 /* We need to explicitly zero any fractional pages
103    after the data section (i.e. bss).  This would
104    contain the junk from the file that should not
105    be in memory
106  */
107 static int padzero(unsigned long elf_bss)
108 {
109         unsigned long nbyte;
110
111         nbyte = ELF_PAGEOFFSET(elf_bss);
112         if (nbyte) {
113                 nbyte = ELF_MIN_ALIGN - nbyte;
114                 if (clear_user((void __user *) elf_bss, nbyte))
115                         return -EFAULT;
116         }
117         return 0;
118 }
119
120 /* Let's use some macros to make this stack manipulation a little clearer */
121 #ifdef CONFIG_STACK_GROWSUP
122 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
123 #define STACK_ROUND(sp, items) \
124         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
125 #define STACK_ALLOC(sp, len) ({ \
126         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
127         old_sp; })
128 #else
129 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
130 #define STACK_ROUND(sp, items) \
131         (((unsigned long) (sp - items)) &~ 15UL)
132 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
133 #endif
134
135 static int
136 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
137                 int interp_aout, unsigned long load_addr,
138                 unsigned long interp_load_addr)
139 {
140         unsigned long p = bprm->p;
141         int argc = bprm->argc;
142         int envc = bprm->envc;
143         elf_addr_t __user *argv;
144         elf_addr_t __user *envp;
145         elf_addr_t __user *sp;
146         elf_addr_t __user *u_platform;
147         const char *k_platform = ELF_PLATFORM;
148         int items;
149         elf_addr_t *elf_info;
150         int ei_index = 0;
151         struct task_struct *tsk = current;
152         struct vm_area_struct *vma;
153
154         /*
155          * In some cases (e.g. Hyper-Threading), we want to avoid L1
156          * evictions by the processes running on the same package. One
157          * thing we can do is to shuffle the initial stack for them.
158          */
159
160         p = arch_align_stack(p);
161
162         /*
163          * If this architecture has a platform capability string, copy it
164          * to userspace.  In some cases (Sparc), this info is impossible
165          * for userspace to get any other way, in others (i386) it is
166          * merely difficult.
167          */
168         u_platform = NULL;
169         if (k_platform) {
170                 size_t len = strlen(k_platform) + 1;
171
172                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
173                 if (__copy_to_user(u_platform, k_platform, len))
174                         return -EFAULT;
175         }
176
177         /* Create the ELF interpreter info */
178         elf_info = (elf_addr_t *)current->mm->saved_auxv;
179         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
180 #define NEW_AUX_ENT(id, val) \
181         do { \
182                 elf_info[ei_index++] = id; \
183                 elf_info[ei_index++] = val; \
184         } while (0)
185
186 #ifdef ARCH_DLINFO
187         /* 
188          * ARCH_DLINFO must come first so PPC can do its special alignment of
189          * AUXV.
190          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
191          * ARCH_DLINFO changes
192          */
193         ARCH_DLINFO;
194 #endif
195         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
196         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
197         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
198         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
199         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
200         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
201         NEW_AUX_ENT(AT_BASE, interp_load_addr);
202         NEW_AUX_ENT(AT_FLAGS, 0);
203         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
204         NEW_AUX_ENT(AT_UID, tsk->uid);
205         NEW_AUX_ENT(AT_EUID, tsk->euid);
206         NEW_AUX_ENT(AT_GID, tsk->gid);
207         NEW_AUX_ENT(AT_EGID, tsk->egid);
208         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
209         if (k_platform) {
210                 NEW_AUX_ENT(AT_PLATFORM,
211                             (elf_addr_t)(unsigned long)u_platform);
212         }
213         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
214                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
215         }
216 #undef NEW_AUX_ENT
217         /* AT_NULL is zero; clear the rest too */
218         memset(&elf_info[ei_index], 0,
219                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
220
221         /* And advance past the AT_NULL entry.  */
222         ei_index += 2;
223
224         sp = STACK_ADD(p, ei_index);
225
226         items = (argc + 1) + (envc + 1);
227         if (interp_aout) {
228                 items += 3; /* a.out interpreters require argv & envp too */
229         } else {
230                 items += 1; /* ELF interpreters only put argc on the stack */
231         }
232         bprm->p = STACK_ROUND(sp, items);
233
234         /* Point sp at the lowest address on the stack */
235 #ifdef CONFIG_STACK_GROWSUP
236         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
237         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
238 #else
239         sp = (elf_addr_t __user *)bprm->p;
240 #endif
241
242
243         /*
244          * Grow the stack manually; some architectures have a limit on how
245          * far ahead a user-space access may be in order to grow the stack.
246          */
247         vma = find_extend_vma(current->mm, bprm->p);
248         if (!vma)
249                 return -EFAULT;
250
251         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
252         if (__put_user(argc, sp++))
253                 return -EFAULT;
254         if (interp_aout) {
255                 argv = sp + 2;
256                 envp = argv + argc + 1;
257                 if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
258                     __put_user((elf_addr_t)(unsigned long)envp, sp++))
259                         return -EFAULT;
260         } else {
261                 argv = sp;
262                 envp = argv + argc + 1;
263         }
264
265         /* Populate argv and envp */
266         p = current->mm->arg_end = current->mm->arg_start;
267         while (argc-- > 0) {
268                 size_t len;
269                 if (__put_user((elf_addr_t)p, argv++))
270                         return -EFAULT;
271                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
272                 if (!len || len > MAX_ARG_STRLEN)
273                         return 0;
274                 p += len;
275         }
276         if (__put_user(0, argv))
277                 return -EFAULT;
278         current->mm->arg_end = current->mm->env_start = p;
279         while (envc-- > 0) {
280                 size_t len;
281                 if (__put_user((elf_addr_t)p, envp++))
282                         return -EFAULT;
283                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
284                 if (!len || len > MAX_ARG_STRLEN)
285                         return 0;
286                 p += len;
287         }
288         if (__put_user(0, envp))
289                 return -EFAULT;
290         current->mm->env_end = p;
291
292         /* Put the elf_info on the stack in the right place.  */
293         sp = (elf_addr_t __user *)envp + 1;
294         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
295                 return -EFAULT;
296         return 0;
297 }
298
299 #ifndef elf_map
300
301 static unsigned long elf_map(struct file *filep, unsigned long addr,
302                 struct elf_phdr *eppnt, int prot, int type,
303                 unsigned long total_size)
304 {
305         unsigned long map_addr;
306         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
307         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
308         addr = ELF_PAGESTART(addr);
309         size = ELF_PAGEALIGN(size);
310
311         /* mmap() will return -EINVAL if given a zero size, but a
312          * segment with zero filesize is perfectly valid */
313         if (!size)
314                 return addr;
315
316         down_write(&current->mm->mmap_sem);
317         /*
318         * total_size is the size of the ELF (interpreter) image.
319         * The _first_ mmap needs to know the full size, otherwise
320         * randomization might put this image into an overlapping
321         * position with the ELF binary image. (since size < total_size)
322         * So we first map the 'big' image - and unmap the remainder at
323         * the end. (which unmap is needed for ELF images with holes.)
324         */
325         if (total_size) {
326                 total_size = ELF_PAGEALIGN(total_size);
327                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
328                 if (!BAD_ADDR(map_addr))
329                         do_munmap(current->mm, map_addr+size, total_size-size);
330         } else
331                 map_addr = do_mmap(filep, addr, size, prot, type, off);
332
333         up_write(&current->mm->mmap_sem);
334         return(map_addr);
335 }
336
337 #endif /* !elf_map */
338
339 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
340 {
341         int i, first_idx = -1, last_idx = -1;
342
343         for (i = 0; i < nr; i++) {
344                 if (cmds[i].p_type == PT_LOAD) {
345                         last_idx = i;
346                         if (first_idx == -1)
347                                 first_idx = i;
348                 }
349         }
350         if (first_idx == -1)
351                 return 0;
352
353         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
354                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
355 }
356
357
358 /* This is much more generalized than the library routine read function,
359    so we keep this separate.  Technically the library read function
360    is only provided so that we can read a.out libraries that have
361    an ELF header */
362
363 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
364                 struct file *interpreter, unsigned long *interp_map_addr,
365                 unsigned long no_base)
366 {
367         struct elf_phdr *elf_phdata;
368         struct elf_phdr *eppnt;
369         unsigned long load_addr = 0;
370         int load_addr_set = 0;
371         unsigned long last_bss = 0, elf_bss = 0;
372         unsigned long error = ~0UL;
373         unsigned long total_size;
374         int retval, i, size;
375
376         /* First of all, some simple consistency checks */
377         if (interp_elf_ex->e_type != ET_EXEC &&
378             interp_elf_ex->e_type != ET_DYN)
379                 goto out;
380         if (!elf_check_arch(interp_elf_ex))
381                 goto out;
382         if (!interpreter->f_op || !interpreter->f_op->mmap)
383                 goto out;
384
385         /*
386          * If the size of this structure has changed, then punt, since
387          * we will be doing the wrong thing.
388          */
389         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
390                 goto out;
391         if (interp_elf_ex->e_phnum < 1 ||
392                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
393                 goto out;
394
395         /* Now read in all of the header information */
396         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
397         if (size > ELF_MIN_ALIGN)
398                 goto out;
399         elf_phdata = kmalloc(size, GFP_KERNEL);
400         if (!elf_phdata)
401                 goto out;
402
403         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
404                              (char *)elf_phdata,size);
405         error = -EIO;
406         if (retval != size) {
407                 if (retval < 0)
408                         error = retval; 
409                 goto out_close;
410         }
411
412         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
413         if (!total_size) {
414                 error = -EINVAL;
415                 goto out_close;
416         }
417
418         eppnt = elf_phdata;
419         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
420                 if (eppnt->p_type == PT_LOAD) {
421                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
422                         int elf_prot = 0;
423                         unsigned long vaddr = 0;
424                         unsigned long k, map_addr;
425
426                         if (eppnt->p_flags & PF_R)
427                                 elf_prot = PROT_READ;
428                         if (eppnt->p_flags & PF_W)
429                                 elf_prot |= PROT_WRITE;
430                         if (eppnt->p_flags & PF_X)
431                                 elf_prot |= PROT_EXEC;
432                         vaddr = eppnt->p_vaddr;
433                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
434                                 elf_type |= MAP_FIXED;
435                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
436                                 load_addr = -vaddr;
437
438                         map_addr = elf_map(interpreter, load_addr + vaddr,
439                                         eppnt, elf_prot, elf_type, total_size);
440                         total_size = 0;
441                         if (!*interp_map_addr)
442                                 *interp_map_addr = map_addr;
443                         error = map_addr;
444                         if (BAD_ADDR(map_addr))
445                                 goto out_close;
446
447                         if (!load_addr_set &&
448                             interp_elf_ex->e_type == ET_DYN) {
449                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
450                                 load_addr_set = 1;
451                         }
452
453                         /*
454                          * Check to see if the section's size will overflow the
455                          * allowed task size. Note that p_filesz must always be
456                          * <= p_memsize so it's only necessary to check p_memsz.
457                          */
458                         k = load_addr + eppnt->p_vaddr;
459                         if (BAD_ADDR(k) ||
460                             eppnt->p_filesz > eppnt->p_memsz ||
461                             eppnt->p_memsz > TASK_SIZE ||
462                             TASK_SIZE - eppnt->p_memsz < k) {
463                                 error = -ENOMEM;
464                                 goto out_close;
465                         }
466
467                         /*
468                          * Find the end of the file mapping for this phdr, and
469                          * keep track of the largest address we see for this.
470                          */
471                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
472                         if (k > elf_bss)
473                                 elf_bss = k;
474
475                         /*
476                          * Do the same thing for the memory mapping - between
477                          * elf_bss and last_bss is the bss section.
478                          */
479                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
480                         if (k > last_bss)
481                                 last_bss = k;
482                 }
483         }
484
485         /*
486          * Now fill out the bss section.  First pad the last page up
487          * to the page boundary, and then perform a mmap to make sure
488          * that there are zero-mapped pages up to and including the 
489          * last bss page.
490          */
491         if (padzero(elf_bss)) {
492                 error = -EFAULT;
493                 goto out_close;
494         }
495
496         /* What we have mapped so far */
497         elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
498
499         /* Map the last of the bss segment */
500         if (last_bss > elf_bss) {
501                 down_write(&current->mm->mmap_sem);
502                 error = do_brk(elf_bss, last_bss - elf_bss);
503                 up_write(&current->mm->mmap_sem);
504                 if (BAD_ADDR(error))
505                         goto out_close;
506         }
507
508         error = load_addr;
509
510 out_close:
511         kfree(elf_phdata);
512 out:
513         return error;
514 }
515
516 #ifdef CONFIG_ARCH_SUPPORTS_AOUT
517 static unsigned long load_aout_interp(struct exec *interp_ex,
518                 struct file *interpreter)
519 {
520         unsigned long text_data, elf_entry = ~0UL;
521         char __user * addr;
522         loff_t offset;
523
524         current->mm->end_code = interp_ex->a_text;
525         text_data = interp_ex->a_text + interp_ex->a_data;
526         current->mm->end_data = text_data;
527         current->mm->brk = interp_ex->a_bss + text_data;
528
529         switch (N_MAGIC(*interp_ex)) {
530         case OMAGIC:
531                 offset = 32;
532                 addr = (char __user *)0;
533                 break;
534         case ZMAGIC:
535         case QMAGIC:
536                 offset = N_TXTOFF(*interp_ex);
537                 addr = (char __user *)N_TXTADDR(*interp_ex);
538                 break;
539         default:
540                 goto out;
541         }
542
543         down_write(&current->mm->mmap_sem);     
544         do_brk(0, text_data);
545         up_write(&current->mm->mmap_sem);
546         if (!interpreter->f_op || !interpreter->f_op->read)
547                 goto out;
548         if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
549                 goto out;
550         flush_icache_range((unsigned long)addr,
551                            (unsigned long)addr + text_data);
552
553         down_write(&current->mm->mmap_sem);     
554         do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
555                 interp_ex->a_bss);
556         up_write(&current->mm->mmap_sem);
557         elf_entry = interp_ex->a_entry;
558
559 out:
560         return elf_entry;
561 }
562 #else
563 /* dummy extern - the function should never be called if !CONFIG_AOUT_BINFMT */
564 static inline unsigned long load_aout_interp(struct exec *interp_ex,
565                                              struct file *interpreter)
566 {
567         return -ELIBACC;
568 }
569 #endif
570
571 /*
572  * These are the functions used to load ELF style executables and shared
573  * libraries.  There is no binary dependent code anywhere else.
574  */
575
576 #define INTERPRETER_NONE 0
577 #define INTERPRETER_ELF 2
578
579 #ifdef CONFIG_ARCH_SUPPORTS_AOUT
580 #define INTERPRETER_AOUT 1
581 #define IS_AOUT_INTERP(x) ((x) == INTERPRETER_AOUT)
582 #else
583 #define IS_AOUT_INTERP(x) (0)
584 #endif
585
586 #ifndef STACK_RND_MASK
587 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
588 #endif
589
590 static unsigned long randomize_stack_top(unsigned long stack_top)
591 {
592         unsigned int random_variable = 0;
593
594         if ((current->flags & PF_RANDOMIZE) &&
595                 !(current->personality & ADDR_NO_RANDOMIZE)) {
596                 random_variable = get_random_int() & STACK_RND_MASK;
597                 random_variable <<= PAGE_SHIFT;
598         }
599 #ifdef CONFIG_STACK_GROWSUP
600         return PAGE_ALIGN(stack_top) + random_variable;
601 #else
602         return PAGE_ALIGN(stack_top) - random_variable;
603 #endif
604 }
605
606 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
607 {
608         struct file *interpreter = NULL; /* to shut gcc up */
609         unsigned long load_addr = 0, load_bias = 0;
610         int load_addr_set = 0;
611         char * elf_interpreter = NULL;
612         unsigned int interpreter_type = INTERPRETER_NONE;
613         unsigned long error;
614         struct elf_phdr *elf_ppnt, *elf_phdata;
615         unsigned long elf_bss, elf_brk;
616         int elf_exec_fileno;
617         int retval, i;
618         unsigned int size;
619         unsigned long elf_entry;
620         unsigned long interp_load_addr = 0;
621         unsigned long start_code, end_code, start_data, end_data;
622         unsigned long reloc_func_desc = 0;
623         char passed_fileno[6];
624         struct files_struct *files;
625         int executable_stack = EXSTACK_DEFAULT;
626         unsigned long def_flags = 0;
627         struct {
628                 struct elfhdr elf_ex;
629                 struct elfhdr interp_elf_ex;
630                 struct exec interp_ex;
631         } *loc;
632
633         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
634         if (!loc) {
635                 retval = -ENOMEM;
636                 goto out_ret;
637         }
638         
639         /* Get the exec-header */
640         loc->elf_ex = *((struct elfhdr *)bprm->buf);
641
642         retval = -ENOEXEC;
643         /* First of all, some simple consistency checks */
644         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
645                 goto out;
646
647         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
648                 goto out;
649         if (!elf_check_arch(&loc->elf_ex))
650                 goto out;
651         if (!bprm->file->f_op||!bprm->file->f_op->mmap)
652                 goto out;
653
654         /* Now read in all of the header information */
655         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
656                 goto out;
657         if (loc->elf_ex.e_phnum < 1 ||
658                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
659                 goto out;
660         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
661         retval = -ENOMEM;
662         elf_phdata = kmalloc(size, GFP_KERNEL);
663         if (!elf_phdata)
664                 goto out;
665
666         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
667                              (char *)elf_phdata, size);
668         if (retval != size) {
669                 if (retval >= 0)
670                         retval = -EIO;
671                 goto out_free_ph;
672         }
673
674         files = current->files; /* Refcounted so ok */
675         retval = unshare_files();
676         if (retval < 0)
677                 goto out_free_ph;
678         if (files == current->files) {
679                 put_files_struct(files);
680                 files = NULL;
681         }
682
683         /* exec will make our files private anyway, but for the a.out
684            loader stuff we need to do it earlier */
685         retval = get_unused_fd();
686         if (retval < 0)
687                 goto out_free_fh;
688         get_file(bprm->file);
689         fd_install(elf_exec_fileno = retval, bprm->file);
690
691         elf_ppnt = elf_phdata;
692         elf_bss = 0;
693         elf_brk = 0;
694
695         start_code = ~0UL;
696         end_code = 0;
697         start_data = 0;
698         end_data = 0;
699
700         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
701                 if (elf_ppnt->p_type == PT_INTERP) {
702                         /* This is the program interpreter used for
703                          * shared libraries - for now assume that this
704                          * is an a.out format binary
705                          */
706                         retval = -ENOEXEC;
707                         if (elf_ppnt->p_filesz > PATH_MAX || 
708                             elf_ppnt->p_filesz < 2)
709                                 goto out_free_file;
710
711                         retval = -ENOMEM;
712                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
713                                                   GFP_KERNEL);
714                         if (!elf_interpreter)
715                                 goto out_free_file;
716
717                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
718                                              elf_interpreter,
719                                              elf_ppnt->p_filesz);
720                         if (retval != elf_ppnt->p_filesz) {
721                                 if (retval >= 0)
722                                         retval = -EIO;
723                                 goto out_free_interp;
724                         }
725                         /* make sure path is NULL terminated */
726                         retval = -ENOEXEC;
727                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
728                                 goto out_free_interp;
729
730                         /*
731                          * The early SET_PERSONALITY here is so that the lookup
732                          * for the interpreter happens in the namespace of the 
733                          * to-be-execed image.  SET_PERSONALITY can select an
734                          * alternate root.
735                          *
736                          * However, SET_PERSONALITY is NOT allowed to switch
737                          * this task into the new images's memory mapping
738                          * policy - that is, TASK_SIZE must still evaluate to
739                          * that which is appropriate to the execing application.
740                          * This is because exit_mmap() needs to have TASK_SIZE
741                          * evaluate to the size of the old image.
742                          *
743                          * So if (say) a 64-bit application is execing a 32-bit
744                          * application it is the architecture's responsibility
745                          * to defer changing the value of TASK_SIZE until the
746                          * switch really is going to happen - do this in
747                          * flush_thread().      - akpm
748                          */
749                         SET_PERSONALITY(loc->elf_ex, 0);
750
751                         interpreter = open_exec(elf_interpreter);
752                         retval = PTR_ERR(interpreter);
753                         if (IS_ERR(interpreter))
754                                 goto out_free_interp;
755
756                         /*
757                          * If the binary is not readable then enforce
758                          * mm->dumpable = 0 regardless of the interpreter's
759                          * permissions.
760                          */
761                         if (file_permission(interpreter, MAY_READ) < 0)
762                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
763
764                         retval = kernel_read(interpreter, 0, bprm->buf,
765                                              BINPRM_BUF_SIZE);
766                         if (retval != BINPRM_BUF_SIZE) {
767                                 if (retval >= 0)
768                                         retval = -EIO;
769                                 goto out_free_dentry;
770                         }
771
772                         /* Get the exec headers */
773                         loc->interp_ex = *((struct exec *)bprm->buf);
774                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
775                         break;
776                 }
777                 elf_ppnt++;
778         }
779
780         elf_ppnt = elf_phdata;
781         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
782                 if (elf_ppnt->p_type == PT_GNU_STACK) {
783                         if (elf_ppnt->p_flags & PF_X)
784                                 executable_stack = EXSTACK_ENABLE_X;
785                         else
786                                 executable_stack = EXSTACK_DISABLE_X;
787                         break;
788                 }
789
790         /* Some simple consistency checks for the interpreter */
791         if (elf_interpreter) {
792                 static int warn;
793 #ifdef CONFIG_ARCH_SUPPORTS_AOUT
794                 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
795
796                 /* Now figure out which format our binary is */
797                 if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
798                     (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
799                     (N_MAGIC(loc->interp_ex) != QMAGIC))
800                         interpreter_type = INTERPRETER_ELF;
801 #else
802                 interpreter_type = INTERPRETER_ELF;
803 #endif
804                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
805                         interpreter_type &= ~INTERPRETER_ELF;
806
807                 if (IS_AOUT_INTERP(interpreter_type) && warn < 10) {
808                         printk(KERN_WARNING "a.out ELF interpreter %s is "
809                                 "deprecated and will not be supported "
810                                 "after Linux 2.6.25\n", elf_interpreter);
811                         warn++;
812                 }
813
814                 retval = -ELIBBAD;
815                 if (!interpreter_type)
816                         goto out_free_dentry;
817
818                 /* Make sure only one type was selected */
819                 if ((interpreter_type & INTERPRETER_ELF) &&
820                      interpreter_type != INTERPRETER_ELF) {
821                         // FIXME - ratelimit this before re-enabling
822                         // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
823                         interpreter_type = INTERPRETER_ELF;
824                 }
825                 /* Verify the interpreter has a valid arch */
826                 if ((interpreter_type == INTERPRETER_ELF) &&
827                     !elf_check_arch(&loc->interp_elf_ex))
828                         goto out_free_dentry;
829         } else {
830                 /* Executables without an interpreter also need a personality  */
831                 SET_PERSONALITY(loc->elf_ex, 0);
832         }
833
834         /* OK, we are done with that, now set up the arg stuff,
835            and then start this sucker up */
836         if (IS_AOUT_INTERP(interpreter_type) && !bprm->sh_bang) {
837                 char *passed_p = passed_fileno;
838                 sprintf(passed_fileno, "%d", elf_exec_fileno);
839
840                 if (elf_interpreter) {
841                         retval = copy_strings_kernel(1, &passed_p, bprm);
842                         if (retval)
843                                 goto out_free_dentry; 
844                         bprm->argc++;
845                 }
846         }
847
848         /* Flush all traces of the currently running executable */
849         retval = flush_old_exec(bprm);
850         if (retval)
851                 goto out_free_dentry;
852
853         /* Discard our unneeded old files struct */
854         if (files) {
855                 put_files_struct(files);
856                 files = NULL;
857         }
858
859         /* OK, This is the point of no return */
860         current->flags &= ~PF_FORKNOEXEC;
861         current->mm->def_flags = def_flags;
862
863         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
864            may depend on the personality.  */
865         SET_PERSONALITY(loc->elf_ex, 0);
866         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
867                 current->personality |= READ_IMPLIES_EXEC;
868
869         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
870                 current->flags |= PF_RANDOMIZE;
871         arch_pick_mmap_layout(current->mm);
872
873         /* Do this so that we can load the interpreter, if need be.  We will
874            change some of these later */
875         current->mm->free_area_cache = current->mm->mmap_base;
876         current->mm->cached_hole_size = 0;
877         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
878                                  executable_stack);
879         if (retval < 0) {
880                 send_sig(SIGKILL, current, 0);
881                 goto out_free_dentry;
882         }
883         
884         current->mm->start_stack = bprm->p;
885
886         /* Now we do a little grungy work by mmaping the ELF image into
887            the correct location in memory. */
888         for(i = 0, elf_ppnt = elf_phdata;
889             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
890                 int elf_prot = 0, elf_flags;
891                 unsigned long k, vaddr;
892
893                 if (elf_ppnt->p_type != PT_LOAD)
894                         continue;
895
896                 if (unlikely (elf_brk > elf_bss)) {
897                         unsigned long nbyte;
898                     
899                         /* There was a PT_LOAD segment with p_memsz > p_filesz
900                            before this one. Map anonymous pages, if needed,
901                            and clear the area.  */
902                         retval = set_brk (elf_bss + load_bias,
903                                           elf_brk + load_bias);
904                         if (retval) {
905                                 send_sig(SIGKILL, current, 0);
906                                 goto out_free_dentry;
907                         }
908                         nbyte = ELF_PAGEOFFSET(elf_bss);
909                         if (nbyte) {
910                                 nbyte = ELF_MIN_ALIGN - nbyte;
911                                 if (nbyte > elf_brk - elf_bss)
912                                         nbyte = elf_brk - elf_bss;
913                                 if (clear_user((void __user *)elf_bss +
914                                                         load_bias, nbyte)) {
915                                         /*
916                                          * This bss-zeroing can fail if the ELF
917                                          * file specifies odd protections. So
918                                          * we don't check the return value
919                                          */
920                                 }
921                         }
922                 }
923
924                 if (elf_ppnt->p_flags & PF_R)
925                         elf_prot |= PROT_READ;
926                 if (elf_ppnt->p_flags & PF_W)
927                         elf_prot |= PROT_WRITE;
928                 if (elf_ppnt->p_flags & PF_X)
929                         elf_prot |= PROT_EXEC;
930
931                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
932
933                 vaddr = elf_ppnt->p_vaddr;
934                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
935                         elf_flags |= MAP_FIXED;
936                 } else if (loc->elf_ex.e_type == ET_DYN) {
937                         /* Try and get dynamic programs out of the way of the
938                          * default mmap base, as well as whatever program they
939                          * might try to exec.  This is because the brk will
940                          * follow the loader, and is not movable.  */
941 #ifdef CONFIG_X86
942                         load_bias = 0;
943 #else
944                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
945 #endif
946                 }
947
948                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
949                                 elf_prot, elf_flags, 0);
950                 if (BAD_ADDR(error)) {
951                         send_sig(SIGKILL, current, 0);
952                         retval = IS_ERR((void *)error) ?
953                                 PTR_ERR((void*)error) : -EINVAL;
954                         goto out_free_dentry;
955                 }
956
957                 if (!load_addr_set) {
958                         load_addr_set = 1;
959                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
960                         if (loc->elf_ex.e_type == ET_DYN) {
961                                 load_bias += error -
962                                              ELF_PAGESTART(load_bias + vaddr);
963                                 load_addr += load_bias;
964                                 reloc_func_desc = load_bias;
965                         }
966                 }
967                 k = elf_ppnt->p_vaddr;
968                 if (k < start_code)
969                         start_code = k;
970                 if (start_data < k)
971                         start_data = k;
972
973                 /*
974                  * Check to see if the section's size will overflow the
975                  * allowed task size. Note that p_filesz must always be
976                  * <= p_memsz so it is only necessary to check p_memsz.
977                  */
978                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
979                     elf_ppnt->p_memsz > TASK_SIZE ||
980                     TASK_SIZE - elf_ppnt->p_memsz < k) {
981                         /* set_brk can never work. Avoid overflows. */
982                         send_sig(SIGKILL, current, 0);
983                         retval = -EINVAL;
984                         goto out_free_dentry;
985                 }
986
987                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
988
989                 if (k > elf_bss)
990                         elf_bss = k;
991                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
992                         end_code = k;
993                 if (end_data < k)
994                         end_data = k;
995                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
996                 if (k > elf_brk)
997                         elf_brk = k;
998         }
999
1000         loc->elf_ex.e_entry += load_bias;
1001         elf_bss += load_bias;
1002         elf_brk += load_bias;
1003         start_code += load_bias;
1004         end_code += load_bias;
1005         start_data += load_bias;
1006         end_data += load_bias;
1007
1008         /* Calling set_brk effectively mmaps the pages that we need
1009          * for the bss and break sections.  We must do this before
1010          * mapping in the interpreter, to make sure it doesn't wind
1011          * up getting placed where the bss needs to go.
1012          */
1013         retval = set_brk(elf_bss, elf_brk);
1014         if (retval) {
1015                 send_sig(SIGKILL, current, 0);
1016                 goto out_free_dentry;
1017         }
1018         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1019                 send_sig(SIGSEGV, current, 0);
1020                 retval = -EFAULT; /* Nobody gets to see this, but.. */
1021                 goto out_free_dentry;
1022         }
1023
1024         if (elf_interpreter) {
1025                 if (IS_AOUT_INTERP(interpreter_type)) {
1026                         elf_entry = load_aout_interp(&loc->interp_ex,
1027                                                      interpreter);
1028                 } else {
1029                         unsigned long uninitialized_var(interp_map_addr);
1030
1031                         elf_entry = load_elf_interp(&loc->interp_elf_ex,
1032                                                     interpreter,
1033                                                     &interp_map_addr,
1034                                                     load_bias);
1035                         if (!IS_ERR((void *)elf_entry)) {
1036                                 /*
1037                                  * load_elf_interp() returns relocation
1038                                  * adjustment
1039                                  */
1040                                 interp_load_addr = elf_entry;
1041                                 elf_entry += loc->interp_elf_ex.e_entry;
1042                         }
1043                 }
1044                 if (BAD_ADDR(elf_entry)) {
1045                         force_sig(SIGSEGV, current);
1046                         retval = IS_ERR((void *)elf_entry) ?
1047                                         (int)elf_entry : -EINVAL;
1048                         goto out_free_dentry;
1049                 }
1050                 reloc_func_desc = interp_load_addr;
1051
1052                 allow_write_access(interpreter);
1053                 fput(interpreter);
1054                 kfree(elf_interpreter);
1055         } else {
1056                 elf_entry = loc->elf_ex.e_entry;
1057                 if (BAD_ADDR(elf_entry)) {
1058                         force_sig(SIGSEGV, current);
1059                         retval = -EINVAL;
1060                         goto out_free_dentry;
1061                 }
1062         }
1063
1064         kfree(elf_phdata);
1065
1066         if (!IS_AOUT_INTERP(interpreter_type))
1067                 sys_close(elf_exec_fileno);
1068
1069         set_binfmt(&elf_format);
1070
1071 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1072         retval = arch_setup_additional_pages(bprm, executable_stack);
1073         if (retval < 0) {
1074                 send_sig(SIGKILL, current, 0);
1075                 goto out;
1076         }
1077 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1078
1079         compute_creds(bprm);
1080         current->flags &= ~PF_FORKNOEXEC;
1081         retval = create_elf_tables(bprm, &loc->elf_ex,
1082                           IS_AOUT_INTERP(interpreter_type),
1083                           load_addr, interp_load_addr);
1084         if (retval < 0) {
1085                 send_sig(SIGKILL, current, 0);
1086                 goto out;
1087         }
1088         /* N.B. passed_fileno might not be initialized? */
1089         if (IS_AOUT_INTERP(interpreter_type))
1090                 current->mm->arg_start += strlen(passed_fileno) + 1;
1091         current->mm->end_code = end_code;
1092         current->mm->start_code = start_code;
1093         current->mm->start_data = start_data;
1094         current->mm->end_data = end_data;
1095         current->mm->start_stack = bprm->p;
1096
1097 #ifdef arch_randomize_brk
1098         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
1099                 current->mm->brk = current->mm->start_brk =
1100                         arch_randomize_brk(current->mm);
1101 #endif
1102
1103         if (current->personality & MMAP_PAGE_ZERO) {
1104                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1105                    and some applications "depend" upon this behavior.
1106                    Since we do not have the power to recompile these, we
1107                    emulate the SVr4 behavior. Sigh. */
1108                 down_write(&current->mm->mmap_sem);
1109                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1110                                 MAP_FIXED | MAP_PRIVATE, 0);
1111                 up_write(&current->mm->mmap_sem);
1112         }
1113
1114 #ifdef ELF_PLAT_INIT
1115         /*
1116          * The ABI may specify that certain registers be set up in special
1117          * ways (on i386 %edx is the address of a DT_FINI function, for
1118          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1119          * that the e_entry field is the address of the function descriptor
1120          * for the startup routine, rather than the address of the startup
1121          * routine itself.  This macro performs whatever initialization to
1122          * the regs structure is required as well as any relocations to the
1123          * function descriptor entries when executing dynamically links apps.
1124          */
1125         ELF_PLAT_INIT(regs, reloc_func_desc);
1126 #endif
1127
1128         start_thread(regs, elf_entry, bprm->p);
1129         if (unlikely(current->ptrace & PT_PTRACED)) {
1130                 if (current->ptrace & PT_TRACE_EXEC)
1131                         ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1132                 else
1133                         send_sig(SIGTRAP, current, 0);
1134         }
1135         retval = 0;
1136 out:
1137         kfree(loc);
1138 out_ret:
1139         return retval;
1140
1141         /* error cleanup */
1142 out_free_dentry:
1143         allow_write_access(interpreter);
1144         if (interpreter)
1145                 fput(interpreter);
1146 out_free_interp:
1147         kfree(elf_interpreter);
1148 out_free_file:
1149         sys_close(elf_exec_fileno);
1150 out_free_fh:
1151         if (files)
1152                 reset_files_struct(current, files);
1153 out_free_ph:
1154         kfree(elf_phdata);
1155         goto out;
1156 }
1157
1158 /* This is really simpleminded and specialized - we are loading an
1159    a.out library that is given an ELF header. */
1160 static int load_elf_library(struct file *file)
1161 {
1162         struct elf_phdr *elf_phdata;
1163         struct elf_phdr *eppnt;
1164         unsigned long elf_bss, bss, len;
1165         int retval, error, i, j;
1166         struct elfhdr elf_ex;
1167
1168         error = -ENOEXEC;
1169         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1170         if (retval != sizeof(elf_ex))
1171                 goto out;
1172
1173         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1174                 goto out;
1175
1176         /* First of all, some simple consistency checks */
1177         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1178             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1179                 goto out;
1180
1181         /* Now read in all of the header information */
1182
1183         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1184         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1185
1186         error = -ENOMEM;
1187         elf_phdata = kmalloc(j, GFP_KERNEL);
1188         if (!elf_phdata)
1189                 goto out;
1190
1191         eppnt = elf_phdata;
1192         error = -ENOEXEC;
1193         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1194         if (retval != j)
1195                 goto out_free_ph;
1196
1197         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1198                 if ((eppnt + i)->p_type == PT_LOAD)
1199                         j++;
1200         if (j != 1)
1201                 goto out_free_ph;
1202
1203         while (eppnt->p_type != PT_LOAD)
1204                 eppnt++;
1205
1206         /* Now use mmap to map the library into memory. */
1207         down_write(&current->mm->mmap_sem);
1208         error = do_mmap(file,
1209                         ELF_PAGESTART(eppnt->p_vaddr),
1210                         (eppnt->p_filesz +
1211                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1212                         PROT_READ | PROT_WRITE | PROT_EXEC,
1213                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1214                         (eppnt->p_offset -
1215                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1216         up_write(&current->mm->mmap_sem);
1217         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1218                 goto out_free_ph;
1219
1220         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1221         if (padzero(elf_bss)) {
1222                 error = -EFAULT;
1223                 goto out_free_ph;
1224         }
1225
1226         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1227                             ELF_MIN_ALIGN - 1);
1228         bss = eppnt->p_memsz + eppnt->p_vaddr;
1229         if (bss > len) {
1230                 down_write(&current->mm->mmap_sem);
1231                 do_brk(len, bss - len);
1232                 up_write(&current->mm->mmap_sem);
1233         }
1234         error = 0;
1235
1236 out_free_ph:
1237         kfree(elf_phdata);
1238 out:
1239         return error;
1240 }
1241
1242 /*
1243  * Note that some platforms still use traditional core dumps and not
1244  * the ELF core dump.  Each platform can select it as appropriate.
1245  */
1246 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1247
1248 /*
1249  * ELF core dumper
1250  *
1251  * Modelled on fs/exec.c:aout_core_dump()
1252  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1253  */
1254 /*
1255  * These are the only things you should do on a core-file: use only these
1256  * functions to write out all the necessary info.
1257  */
1258 static int dump_write(struct file *file, const void *addr, int nr)
1259 {
1260         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1261 }
1262
1263 static int dump_seek(struct file *file, loff_t off)
1264 {
1265         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1266                 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1267                         return 0;
1268         } else {
1269                 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1270                 if (!buf)
1271                         return 0;
1272                 while (off > 0) {
1273                         unsigned long n = off;
1274                         if (n > PAGE_SIZE)
1275                                 n = PAGE_SIZE;
1276                         if (!dump_write(file, buf, n))
1277                                 return 0;
1278                         off -= n;
1279                 }
1280                 free_page((unsigned long)buf);
1281         }
1282         return 1;
1283 }
1284
1285 /*
1286  * Decide what to dump of a segment, part, all or none.
1287  */
1288 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1289                                    unsigned long mm_flags)
1290 {
1291         /* The vma can be set up to tell us the answer directly.  */
1292         if (vma->vm_flags & VM_ALWAYSDUMP)
1293                 goto whole;
1294
1295         /* Do not dump I/O mapped devices or special mappings */
1296         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1297                 return 0;
1298
1299 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1300
1301         /* By default, dump shared memory if mapped from an anonymous file. */
1302         if (vma->vm_flags & VM_SHARED) {
1303                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1304                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1305                         goto whole;
1306                 return 0;
1307         }
1308
1309         /* Dump segments that have been written to.  */
1310         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1311                 goto whole;
1312         if (vma->vm_file == NULL)
1313                 return 0;
1314
1315         if (FILTER(MAPPED_PRIVATE))
1316                 goto whole;
1317
1318         /*
1319          * If this looks like the beginning of a DSO or executable mapping,
1320          * check for an ELF header.  If we find one, dump the first page to
1321          * aid in determining what was mapped here.
1322          */
1323         if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1324                 u32 __user *header = (u32 __user *) vma->vm_start;
1325                 u32 word;
1326                 /*
1327                  * Doing it this way gets the constant folded by GCC.
1328                  */
1329                 union {
1330                         u32 cmp;
1331                         char elfmag[SELFMAG];
1332                 } magic;
1333                 BUILD_BUG_ON(SELFMAG != sizeof word);
1334                 magic.elfmag[EI_MAG0] = ELFMAG0;
1335                 magic.elfmag[EI_MAG1] = ELFMAG1;
1336                 magic.elfmag[EI_MAG2] = ELFMAG2;
1337                 magic.elfmag[EI_MAG3] = ELFMAG3;
1338                 if (get_user(word, header) == 0 && word == magic.cmp)
1339                         return PAGE_SIZE;
1340         }
1341
1342 #undef  FILTER
1343
1344         return 0;
1345
1346 whole:
1347         return vma->vm_end - vma->vm_start;
1348 }
1349
1350 /* An ELF note in memory */
1351 struct memelfnote
1352 {
1353         const char *name;
1354         int type;
1355         unsigned int datasz;
1356         void *data;
1357 };
1358
1359 static int notesize(struct memelfnote *en)
1360 {
1361         int sz;
1362
1363         sz = sizeof(struct elf_note);
1364         sz += roundup(strlen(en->name) + 1, 4);
1365         sz += roundup(en->datasz, 4);
1366
1367         return sz;
1368 }
1369
1370 #define DUMP_WRITE(addr, nr, foffset)   \
1371         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1372
1373 static int alignfile(struct file *file, loff_t *foffset)
1374 {
1375         static const char buf[4] = { 0, };
1376         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1377         return 1;
1378 }
1379
1380 static int writenote(struct memelfnote *men, struct file *file,
1381                         loff_t *foffset)
1382 {
1383         struct elf_note en;
1384         en.n_namesz = strlen(men->name) + 1;
1385         en.n_descsz = men->datasz;
1386         en.n_type = men->type;
1387
1388         DUMP_WRITE(&en, sizeof(en), foffset);
1389         DUMP_WRITE(men->name, en.n_namesz, foffset);
1390         if (!alignfile(file, foffset))
1391                 return 0;
1392         DUMP_WRITE(men->data, men->datasz, foffset);
1393         if (!alignfile(file, foffset))
1394                 return 0;
1395
1396         return 1;
1397 }
1398 #undef DUMP_WRITE
1399
1400 #define DUMP_WRITE(addr, nr)    \
1401         if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1402                 goto end_coredump;
1403 #define DUMP_SEEK(off)  \
1404         if (!dump_seek(file, (off))) \
1405                 goto end_coredump;
1406
1407 static void fill_elf_header(struct elfhdr *elf, int segs,
1408                             u16 machine, u32 flags, u8 osabi)
1409 {
1410         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1411         elf->e_ident[EI_CLASS] = ELF_CLASS;
1412         elf->e_ident[EI_DATA] = ELF_DATA;
1413         elf->e_ident[EI_VERSION] = EV_CURRENT;
1414         elf->e_ident[EI_OSABI] = ELF_OSABI;
1415         memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1416
1417         elf->e_type = ET_CORE;
1418         elf->e_machine = machine;
1419         elf->e_version = EV_CURRENT;
1420         elf->e_entry = 0;
1421         elf->e_phoff = sizeof(struct elfhdr);
1422         elf->e_shoff = 0;
1423         elf->e_flags = flags;
1424         elf->e_ehsize = sizeof(struct elfhdr);
1425         elf->e_phentsize = sizeof(struct elf_phdr);
1426         elf->e_phnum = segs;
1427         elf->e_shentsize = 0;
1428         elf->e_shnum = 0;
1429         elf->e_shstrndx = 0;
1430         return;
1431 }
1432
1433 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1434 {
1435         phdr->p_type = PT_NOTE;
1436         phdr->p_offset = offset;
1437         phdr->p_vaddr = 0;
1438         phdr->p_paddr = 0;
1439         phdr->p_filesz = sz;
1440         phdr->p_memsz = 0;
1441         phdr->p_flags = 0;
1442         phdr->p_align = 0;
1443         return;
1444 }
1445
1446 static void fill_note(struct memelfnote *note, const char *name, int type, 
1447                 unsigned int sz, void *data)
1448 {
1449         note->name = name;
1450         note->type = type;
1451         note->datasz = sz;
1452         note->data = data;
1453         return;
1454 }
1455
1456 /*
1457  * fill up all the fields in prstatus from the given task struct, except
1458  * registers which need to be filled up separately.
1459  */
1460 static void fill_prstatus(struct elf_prstatus *prstatus,
1461                 struct task_struct *p, long signr)
1462 {
1463         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1464         prstatus->pr_sigpend = p->pending.signal.sig[0];
1465         prstatus->pr_sighold = p->blocked.sig[0];
1466         prstatus->pr_pid = task_pid_vnr(p);
1467         prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1468         prstatus->pr_pgrp = task_pgrp_vnr(p);
1469         prstatus->pr_sid = task_session_vnr(p);
1470         if (thread_group_leader(p)) {
1471                 /*
1472                  * This is the record for the group leader.  Add in the
1473                  * cumulative times of previous dead threads.  This total
1474                  * won't include the time of each live thread whose state
1475                  * is included in the core dump.  The final total reported
1476                  * to our parent process when it calls wait4 will include
1477                  * those sums as well as the little bit more time it takes
1478                  * this and each other thread to finish dying after the
1479                  * core dump synchronization phase.
1480                  */
1481                 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1482                                    &prstatus->pr_utime);
1483                 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1484                                    &prstatus->pr_stime);
1485         } else {
1486                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1487                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1488         }
1489         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1490         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1491 }
1492
1493 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1494                        struct mm_struct *mm)
1495 {
1496         unsigned int i, len;
1497         
1498         /* first copy the parameters from user space */
1499         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1500
1501         len = mm->arg_end - mm->arg_start;
1502         if (len >= ELF_PRARGSZ)
1503                 len = ELF_PRARGSZ-1;
1504         if (copy_from_user(&psinfo->pr_psargs,
1505                            (const char __user *)mm->arg_start, len))
1506                 return -EFAULT;
1507         for(i = 0; i < len; i++)
1508                 if (psinfo->pr_psargs[i] == 0)
1509                         psinfo->pr_psargs[i] = ' ';
1510         psinfo->pr_psargs[len] = 0;
1511
1512         psinfo->pr_pid = task_pid_vnr(p);
1513         psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1514         psinfo->pr_pgrp = task_pgrp_vnr(p);
1515         psinfo->pr_sid = task_session_vnr(p);
1516
1517         i = p->state ? ffz(~p->state) + 1 : 0;
1518         psinfo->pr_state = i;
1519         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1520         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1521         psinfo->pr_nice = task_nice(p);
1522         psinfo->pr_flag = p->flags;
1523         SET_UID(psinfo->pr_uid, p->uid);
1524         SET_GID(psinfo->pr_gid, p->gid);
1525         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1526         
1527         return 0;
1528 }
1529
1530 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1531 {
1532         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1533         int i = 0;
1534         do
1535                 i += 2;
1536         while (auxv[i - 2] != AT_NULL);
1537         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1538 }
1539
1540 #ifdef CORE_DUMP_USE_REGSET
1541 #include <linux/regset.h>
1542
1543 struct elf_thread_core_info {
1544         struct elf_thread_core_info *next;
1545         struct task_struct *task;
1546         struct elf_prstatus prstatus;
1547         struct memelfnote notes[0];
1548 };
1549
1550 struct elf_note_info {
1551         struct elf_thread_core_info *thread;
1552         struct memelfnote psinfo;
1553         struct memelfnote auxv;
1554         size_t size;
1555         int thread_notes;
1556 };
1557
1558 static int fill_thread_core_info(struct elf_thread_core_info *t,
1559                                  const struct user_regset_view *view,
1560                                  long signr, size_t *total)
1561 {
1562         unsigned int i;
1563
1564         /*
1565          * NT_PRSTATUS is the one special case, because the regset data
1566          * goes into the pr_reg field inside the note contents, rather
1567          * than being the whole note contents.  We fill the reset in here.
1568          * We assume that regset 0 is NT_PRSTATUS.
1569          */
1570         fill_prstatus(&t->prstatus, t->task, signr);
1571         (void) view->regsets[0].get(t->task, &view->regsets[0],
1572                                     0, sizeof(t->prstatus.pr_reg),
1573                                     &t->prstatus.pr_reg, NULL);
1574
1575         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1576                   sizeof(t->prstatus), &t->prstatus);
1577         *total += notesize(&t->notes[0]);
1578
1579         /*
1580          * Each other regset might generate a note too.  For each regset
1581          * that has no core_note_type or is inactive, we leave t->notes[i]
1582          * all zero and we'll know to skip writing it later.
1583          */
1584         for (i = 1; i < view->n; ++i) {
1585                 const struct user_regset *regset = &view->regsets[i];
1586                 if (regset->core_note_type &&
1587                     (!regset->active || regset->active(t->task, regset))) {
1588                         int ret;
1589                         size_t size = regset->n * regset->size;
1590                         void *data = kmalloc(size, GFP_KERNEL);
1591                         if (unlikely(!data))
1592                                 return 0;
1593                         ret = regset->get(t->task, regset,
1594                                           0, size, data, NULL);
1595                         if (unlikely(ret))
1596                                 kfree(data);
1597                         else {
1598                                 if (regset->core_note_type != NT_PRFPREG)
1599                                         fill_note(&t->notes[i], "LINUX",
1600                                                   regset->core_note_type,
1601                                                   size, data);
1602                                 else {
1603                                         t->prstatus.pr_fpvalid = 1;
1604                                         fill_note(&t->notes[i], "CORE",
1605                                                   NT_PRFPREG, size, data);
1606                                 }
1607                                 *total += notesize(&t->notes[i]);
1608                         }
1609                 }
1610         }
1611
1612         return 1;
1613 }
1614
1615 static int fill_note_info(struct elfhdr *elf, int phdrs,
1616                           struct elf_note_info *info,
1617                           long signr, struct pt_regs *regs)
1618 {
1619         struct task_struct *dump_task = current;
1620         const struct user_regset_view *view = task_user_regset_view(dump_task);
1621         struct elf_thread_core_info *t;
1622         struct elf_prpsinfo *psinfo;
1623         struct task_struct *g, *p;
1624         unsigned int i;
1625
1626         info->size = 0;
1627         info->thread = NULL;
1628
1629         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1630         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1631
1632         if (psinfo == NULL)
1633                 return 0;
1634
1635         /*
1636          * Figure out how many notes we're going to need for each thread.
1637          */
1638         info->thread_notes = 0;
1639         for (i = 0; i < view->n; ++i)
1640                 if (view->regsets[i].core_note_type != 0)
1641                         ++info->thread_notes;
1642
1643         /*
1644          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1645          * since it is our one special case.
1646          */
1647         if (unlikely(info->thread_notes == 0) ||
1648             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1649                 WARN_ON(1);
1650                 return 0;
1651         }
1652
1653         /*
1654          * Initialize the ELF file header.
1655          */
1656         fill_elf_header(elf, phdrs,
1657                         view->e_machine, view->e_flags, view->ei_osabi);
1658
1659         /*
1660          * Allocate a structure for each thread.
1661          */
1662         rcu_read_lock();
1663         do_each_thread(g, p)
1664                 if (p->mm == dump_task->mm) {
1665                         t = kzalloc(offsetof(struct elf_thread_core_info,
1666                                              notes[info->thread_notes]),
1667                                     GFP_ATOMIC);
1668                         if (unlikely(!t)) {
1669                                 rcu_read_unlock();
1670                                 return 0;
1671                         }
1672                         t->task = p;
1673                         if (p == dump_task || !info->thread) {
1674                                 t->next = info->thread;
1675                                 info->thread = t;
1676                         } else {
1677                                 /*
1678                                  * Make sure to keep the original task at
1679                                  * the head of the list.
1680                                  */
1681                                 t->next = info->thread->next;
1682                                 info->thread->next = t;
1683                         }
1684                 }
1685         while_each_thread(g, p);
1686         rcu_read_unlock();
1687
1688         /*
1689          * Now fill in each thread's information.
1690          */
1691         for (t = info->thread; t != NULL; t = t->next)
1692                 if (!fill_thread_core_info(t, view, signr, &info->size))
1693                         return 0;
1694
1695         /*
1696          * Fill in the two process-wide notes.
1697          */
1698         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1699         info->size += notesize(&info->psinfo);
1700
1701         fill_auxv_note(&info->auxv, current->mm);
1702         info->size += notesize(&info->auxv);
1703
1704         return 1;
1705 }
1706
1707 static size_t get_note_info_size(struct elf_note_info *info)
1708 {
1709         return info->size;
1710 }
1711
1712 /*
1713  * Write all the notes for each thread.  When writing the first thread, the
1714  * process-wide notes are interleaved after the first thread-specific note.
1715  */
1716 static int write_note_info(struct elf_note_info *info,
1717                            struct file *file, loff_t *foffset)
1718 {
1719         bool first = 1;
1720         struct elf_thread_core_info *t = info->thread;
1721
1722         do {
1723                 int i;
1724
1725                 if (!writenote(&t->notes[0], file, foffset))
1726                         return 0;
1727
1728                 if (first && !writenote(&info->psinfo, file, foffset))
1729                         return 0;
1730                 if (first && !writenote(&info->auxv, file, foffset))
1731                         return 0;
1732
1733                 for (i = 1; i < info->thread_notes; ++i)
1734                         if (t->notes[i].data &&
1735                             !writenote(&t->notes[i], file, foffset))
1736                                 return 0;
1737
1738                 first = 0;
1739                 t = t->next;
1740         } while (t);
1741
1742         return 1;
1743 }
1744
1745 static void free_note_info(struct elf_note_info *info)
1746 {
1747         struct elf_thread_core_info *threads = info->thread;
1748         while (threads) {
1749                 unsigned int i;
1750                 struct elf_thread_core_info *t = threads;
1751                 threads = t->next;
1752                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1753                 for (i = 1; i < info->thread_notes; ++i)
1754                         kfree(t->notes[i].data);
1755                 kfree(t);
1756         }
1757         kfree(info->psinfo.data);
1758 }
1759
1760 #else
1761
1762 /* Here is the structure in which status of each thread is captured. */
1763 struct elf_thread_status
1764 {
1765         struct list_head list;
1766         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1767         elf_fpregset_t fpu;             /* NT_PRFPREG */
1768         struct task_struct *thread;
1769 #ifdef ELF_CORE_COPY_XFPREGS
1770         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1771 #endif
1772         struct memelfnote notes[3];
1773         int num_notes;
1774 };
1775
1776 /*
1777  * In order to add the specific thread information for the elf file format,
1778  * we need to keep a linked list of every threads pr_status and then create
1779  * a single section for them in the final core file.
1780  */
1781 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1782 {
1783         int sz = 0;
1784         struct task_struct *p = t->thread;
1785         t->num_notes = 0;
1786
1787         fill_prstatus(&t->prstatus, p, signr);
1788         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1789         
1790         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1791                   &(t->prstatus));
1792         t->num_notes++;
1793         sz += notesize(&t->notes[0]);
1794
1795         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1796                                                                 &t->fpu))) {
1797                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1798                           &(t->fpu));
1799                 t->num_notes++;
1800                 sz += notesize(&t->notes[1]);
1801         }
1802
1803 #ifdef ELF_CORE_COPY_XFPREGS
1804         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1805                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1806                           sizeof(t->xfpu), &t->xfpu);
1807                 t->num_notes++;
1808                 sz += notesize(&t->notes[2]);
1809         }
1810 #endif  
1811         return sz;
1812 }
1813
1814 struct elf_note_info {
1815         struct memelfnote *notes;
1816         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1817         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1818         struct list_head thread_list;
1819         elf_fpregset_t *fpu;
1820 #ifdef ELF_CORE_COPY_XFPREGS
1821         elf_fpxregset_t *xfpu;
1822 #endif
1823         int thread_status_size;
1824         int numnote;
1825 };
1826
1827 static int fill_note_info(struct elfhdr *elf, int phdrs,
1828                           struct elf_note_info *info,
1829                           long signr, struct pt_regs *regs)
1830 {
1831 #define NUM_NOTES       6
1832         struct list_head *t;
1833         struct task_struct *g, *p;
1834
1835         info->notes = NULL;
1836         info->prstatus = NULL;
1837         info->psinfo = NULL;
1838         info->fpu = NULL;
1839 #ifdef ELF_CORE_COPY_XFPREGS
1840         info->xfpu = NULL;
1841 #endif
1842         INIT_LIST_HEAD(&info->thread_list);
1843
1844         info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1845                               GFP_KERNEL);
1846         if (!info->notes)
1847                 return 0;
1848         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1849         if (!info->psinfo)
1850                 return 0;
1851         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1852         if (!info->prstatus)
1853                 return 0;
1854         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1855         if (!info->fpu)
1856                 return 0;
1857 #ifdef ELF_CORE_COPY_XFPREGS
1858         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1859         if (!info->xfpu)
1860                 return 0;
1861 #endif
1862
1863         info->thread_status_size = 0;
1864         if (signr) {
1865                 struct elf_thread_status *tmp;
1866                 rcu_read_lock();
1867                 do_each_thread(g, p)
1868                         if (current->mm == p->mm && current != p) {
1869                                 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1870                                 if (!tmp) {
1871                                         rcu_read_unlock();
1872                                         return 0;
1873                                 }
1874                                 tmp->thread = p;
1875                                 list_add(&tmp->list, &info->thread_list);
1876                         }
1877                 while_each_thread(g, p);
1878                 rcu_read_unlock();
1879                 list_for_each(t, &info->thread_list) {
1880                         struct elf_thread_status *tmp;
1881                         int sz;
1882
1883                         tmp = list_entry(t, struct elf_thread_status, list);
1884                         sz = elf_dump_thread_status(signr, tmp);
1885                         info->thread_status_size += sz;
1886                 }
1887         }
1888         /* now collect the dump for the current */
1889         memset(info->prstatus, 0, sizeof(*info->prstatus));
1890         fill_prstatus(info->prstatus, current, signr);
1891         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1892
1893         /* Set up header */
1894         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1895
1896         /*
1897          * Set up the notes in similar form to SVR4 core dumps made
1898          * with info from their /proc.
1899          */
1900
1901         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1902                   sizeof(*info->prstatus), info->prstatus);
1903         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1904         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1905                   sizeof(*info->psinfo), info->psinfo);
1906
1907         info->numnote = 2;
1908
1909         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1910
1911         /* Try to dump the FPU. */
1912         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1913                                                                info->fpu);
1914         if (info->prstatus->pr_fpvalid)
1915                 fill_note(info->notes + info->numnote++,
1916                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1917 #ifdef ELF_CORE_COPY_XFPREGS
1918         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1919                 fill_note(info->notes + info->numnote++,
1920                           "LINUX", ELF_CORE_XFPREG_TYPE,
1921                           sizeof(*info->xfpu), info->xfpu);
1922 #endif
1923
1924         return 1;
1925
1926 #undef NUM_NOTES
1927 }
1928
1929 static size_t get_note_info_size(struct elf_note_info *info)
1930 {
1931         int sz = 0;
1932         int i;
1933
1934         for (i = 0; i < info->numnote; i++)
1935                 sz += notesize(info->notes + i);
1936
1937         sz += info->thread_status_size;
1938
1939         return sz;
1940 }
1941
1942 static int write_note_info(struct elf_note_info *info,
1943                            struct file *file, loff_t *foffset)
1944 {
1945         int i;
1946         struct list_head *t;
1947
1948         for (i = 0; i < info->numnote; i++)
1949                 if (!writenote(info->notes + i, file, foffset))
1950                         return 0;
1951
1952         /* write out the thread status notes section */
1953         list_for_each(t, &info->thread_list) {
1954                 struct elf_thread_status *tmp =
1955                                 list_entry(t, struct elf_thread_status, list);
1956
1957                 for (i = 0; i < tmp->num_notes; i++)
1958                         if (!writenote(&tmp->notes[i], file, foffset))
1959                                 return 0;
1960         }
1961
1962         return 1;
1963 }
1964
1965 static void free_note_info(struct elf_note_info *info)
1966 {
1967         while (!list_empty(&info->thread_list)) {
1968                 struct list_head *tmp = info->thread_list.next;
1969                 list_del(tmp);
1970                 kfree(list_entry(tmp, struct elf_thread_status, list));
1971         }
1972
1973         kfree(info->prstatus);
1974         kfree(info->psinfo);
1975         kfree(info->notes);
1976         kfree(info->fpu);
1977 #ifdef ELF_CORE_COPY_XFPREGS
1978         kfree(info->xfpu);
1979 #endif
1980 }
1981
1982 #endif
1983
1984 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1985                                         struct vm_area_struct *gate_vma)
1986 {
1987         struct vm_area_struct *ret = tsk->mm->mmap;
1988
1989         if (ret)
1990                 return ret;
1991         return gate_vma;
1992 }
1993 /*
1994  * Helper function for iterating across a vma list.  It ensures that the caller
1995  * will visit `gate_vma' prior to terminating the search.
1996  */
1997 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1998                                         struct vm_area_struct *gate_vma)
1999 {
2000         struct vm_area_struct *ret;
2001
2002         ret = this_vma->vm_next;
2003         if (ret)
2004                 return ret;
2005         if (this_vma == gate_vma)
2006                 return NULL;
2007         return gate_vma;
2008 }
2009
2010 /*
2011  * Actual dumper
2012  *
2013  * This is a two-pass process; first we find the offsets of the bits,
2014  * and then they are actually written out.  If we run out of core limit
2015  * we just truncate.
2016  */
2017 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
2018 {
2019         int has_dumped = 0;
2020         mm_segment_t fs;
2021         int segs;
2022         size_t size = 0;
2023         struct vm_area_struct *vma, *gate_vma;
2024         struct elfhdr *elf = NULL;
2025         loff_t offset = 0, dataoff, foffset;
2026         unsigned long mm_flags;
2027         struct elf_note_info info;
2028
2029         /*
2030          * We no longer stop all VM operations.
2031          * 
2032          * This is because those proceses that could possibly change map_count
2033          * or the mmap / vma pages are now blocked in do_exit on current
2034          * finishing this core dump.
2035          *
2036          * Only ptrace can touch these memory addresses, but it doesn't change
2037          * the map_count or the pages allocated. So no possibility of crashing
2038          * exists while dumping the mm->vm_next areas to the core file.
2039          */
2040   
2041         /* alloc memory for large data structures: too large to be on stack */
2042         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2043         if (!elf)
2044                 goto cleanup;
2045         
2046         segs = current->mm->map_count;
2047 #ifdef ELF_CORE_EXTRA_PHDRS
2048         segs += ELF_CORE_EXTRA_PHDRS;
2049 #endif
2050
2051         gate_vma = get_gate_vma(current);
2052         if (gate_vma != NULL)
2053                 segs++;
2054
2055         /*
2056          * Collect all the non-memory information about the process for the
2057          * notes.  This also sets up the file header.
2058          */
2059         if (!fill_note_info(elf, segs + 1, /* including notes section */
2060                             &info, signr, regs))
2061                 goto cleanup;
2062
2063         has_dumped = 1;
2064         current->flags |= PF_DUMPCORE;
2065   
2066         fs = get_fs();
2067         set_fs(KERNEL_DS);
2068
2069         DUMP_WRITE(elf, sizeof(*elf));
2070         offset += sizeof(*elf);                         /* Elf header */
2071         offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
2072         foffset = offset;
2073
2074         /* Write notes phdr entry */
2075         {
2076                 struct elf_phdr phdr;
2077                 size_t sz = get_note_info_size(&info);
2078
2079                 sz += elf_coredump_extra_notes_size();
2080
2081                 fill_elf_note_phdr(&phdr, sz, offset);
2082                 offset += sz;
2083                 DUMP_WRITE(&phdr, sizeof(phdr));
2084         }
2085
2086         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2087
2088         /*
2089          * We must use the same mm->flags while dumping core to avoid
2090          * inconsistency between the program headers and bodies, otherwise an
2091          * unusable core file can be generated.
2092          */
2093         mm_flags = current->mm->flags;
2094
2095         /* Write program headers for segments dump */
2096         for (vma = first_vma(current, gate_vma); vma != NULL;
2097                         vma = next_vma(vma, gate_vma)) {
2098                 struct elf_phdr phdr;
2099
2100                 phdr.p_type = PT_LOAD;
2101                 phdr.p_offset = offset;
2102                 phdr.p_vaddr = vma->vm_start;
2103                 phdr.p_paddr = 0;
2104                 phdr.p_filesz = vma_dump_size(vma, mm_flags);
2105                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2106                 offset += phdr.p_filesz;
2107                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2108                 if (vma->vm_flags & VM_WRITE)
2109                         phdr.p_flags |= PF_W;
2110                 if (vma->vm_flags & VM_EXEC)
2111                         phdr.p_flags |= PF_X;
2112                 phdr.p_align = ELF_EXEC_PAGESIZE;
2113
2114                 DUMP_WRITE(&phdr, sizeof(phdr));
2115         }
2116
2117 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2118         ELF_CORE_WRITE_EXTRA_PHDRS;
2119 #endif
2120
2121         /* write out the notes section */
2122         if (!write_note_info(&info, file, &foffset))
2123                 goto end_coredump;
2124
2125         if (elf_coredump_extra_notes_write(file, &foffset))
2126                 goto end_coredump;
2127
2128         /* Align to page */
2129         DUMP_SEEK(dataoff - foffset);
2130
2131         for (vma = first_vma(current, gate_vma); vma != NULL;
2132                         vma = next_vma(vma, gate_vma)) {
2133                 unsigned long addr;
2134                 unsigned long end;
2135
2136                 end = vma->vm_start + vma_dump_size(vma, mm_flags);
2137
2138                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2139                         struct page *page;
2140                         struct vm_area_struct *vma;
2141
2142                         if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2143                                                 &page, &vma) <= 0) {
2144                                 DUMP_SEEK(PAGE_SIZE);
2145                         } else {
2146                                 if (page == ZERO_PAGE(0)) {
2147                                         if (!dump_seek(file, PAGE_SIZE)) {
2148                                                 page_cache_release(page);
2149                                                 goto end_coredump;
2150                                         }
2151                                 } else {
2152                                         void *kaddr;
2153                                         flush_cache_page(vma, addr,
2154                                                          page_to_pfn(page));
2155                                         kaddr = kmap(page);
2156                                         if ((size += PAGE_SIZE) > limit ||
2157                                             !dump_write(file, kaddr,
2158                                             PAGE_SIZE)) {
2159                                                 kunmap(page);
2160                                                 page_cache_release(page);
2161                                                 goto end_coredump;
2162                                         }
2163                                         kunmap(page);
2164                                 }
2165                                 page_cache_release(page);
2166                         }
2167                 }
2168         }
2169
2170 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2171         ELF_CORE_WRITE_EXTRA_DATA;
2172 #endif
2173
2174 end_coredump:
2175         set_fs(fs);
2176
2177 cleanup:
2178         kfree(elf);
2179         free_note_info(&info);
2180         return has_dumped;
2181 }
2182
2183 #endif          /* USE_ELF_CORE_DUMP */
2184
2185 static int __init init_elf_binfmt(void)
2186 {
2187         return register_binfmt(&elf_format);
2188 }
2189
2190 static void __exit exit_elf_binfmt(void)
2191 {
2192         /* Remove the COFF and ELF loaders. */
2193         unregister_binfmt(&elf_format);
2194 }
2195
2196 core_initcall(init_elf_binfmt);
2197 module_exit(exit_elf_binfmt);
2198 MODULE_LICENSE("GPL");