ftrace: trace schedule
[linux-2.6] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/fcntl.h>
25 #include <linux/ptrace.h>
26 #include <linux/slab.h>
27 #include <linux/shm.h>
28 #include <linux/personality.h>
29 #include <linux/elfcore.h>
30 #include <linux/init.h>
31 #include <linux/highuid.h>
32 #include <linux/smp.h>
33 #include <linux/compiler.h>
34 #include <linux/highmem.h>
35 #include <linux/pagemap.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/random.h>
39 #include <linux/elf.h>
40 #include <linux/utsname.h>
41 #include <asm/uaccess.h>
42 #include <asm/param.h>
43 #include <asm/page.h>
44
45 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
46 static int load_elf_library(struct file *);
47 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
48                                 int, int, unsigned long);
49
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
56 #else
57 #define elf_core_dump   NULL
58 #endif
59
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN   PAGE_SIZE
64 #endif
65
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS 0
68 #endif
69
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73
74 static struct linux_binfmt elf_format = {
75                 .module         = THIS_MODULE,
76                 .load_binary    = load_elf_binary,
77                 .load_shlib     = load_elf_library,
78                 .core_dump      = elf_core_dump,
79                 .min_coredump   = ELF_EXEC_PAGESIZE,
80                 .hasvdso        = 1
81 };
82
83 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87         start = ELF_PAGEALIGN(start);
88         end = ELF_PAGEALIGN(end);
89         if (end > start) {
90                 unsigned long addr;
91                 down_write(&current->mm->mmap_sem);
92                 addr = do_brk(start, end - start);
93                 up_write(&current->mm->mmap_sem);
94                 if (BAD_ADDR(addr))
95                         return addr;
96         }
97         current->mm->start_brk = current->mm->brk = end;
98         return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108         unsigned long nbyte;
109
110         nbyte = ELF_PAGEOFFSET(elf_bss);
111         if (nbyte) {
112                 nbyte = ELF_MIN_ALIGN - nbyte;
113                 if (clear_user((void __user *) elf_bss, nbyte))
114                         return -EFAULT;
115         }
116         return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126         old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130         (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 static int
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136                 unsigned long load_addr, unsigned long interp_load_addr)
137 {
138         unsigned long p = bprm->p;
139         int argc = bprm->argc;
140         int envc = bprm->envc;
141         elf_addr_t __user *argv;
142         elf_addr_t __user *envp;
143         elf_addr_t __user *sp;
144         elf_addr_t __user *u_platform;
145         const char *k_platform = ELF_PLATFORM;
146         int items;
147         elf_addr_t *elf_info;
148         int ei_index = 0;
149         struct task_struct *tsk = current;
150         struct vm_area_struct *vma;
151
152         /*
153          * In some cases (e.g. Hyper-Threading), we want to avoid L1
154          * evictions by the processes running on the same package. One
155          * thing we can do is to shuffle the initial stack for them.
156          */
157
158         p = arch_align_stack(p);
159
160         /*
161          * If this architecture has a platform capability string, copy it
162          * to userspace.  In some cases (Sparc), this info is impossible
163          * for userspace to get any other way, in others (i386) it is
164          * merely difficult.
165          */
166         u_platform = NULL;
167         if (k_platform) {
168                 size_t len = strlen(k_platform) + 1;
169
170                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
171                 if (__copy_to_user(u_platform, k_platform, len))
172                         return -EFAULT;
173         }
174
175         /* Create the ELF interpreter info */
176         elf_info = (elf_addr_t *)current->mm->saved_auxv;
177         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
178 #define NEW_AUX_ENT(id, val) \
179         do { \
180                 elf_info[ei_index++] = id; \
181                 elf_info[ei_index++] = val; \
182         } while (0)
183
184 #ifdef ARCH_DLINFO
185         /* 
186          * ARCH_DLINFO must come first so PPC can do its special alignment of
187          * AUXV.
188          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
189          * ARCH_DLINFO changes
190          */
191         ARCH_DLINFO;
192 #endif
193         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
194         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
195         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
196         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
197         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
198         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
199         NEW_AUX_ENT(AT_BASE, interp_load_addr);
200         NEW_AUX_ENT(AT_FLAGS, 0);
201         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
202         NEW_AUX_ENT(AT_UID, tsk->uid);
203         NEW_AUX_ENT(AT_EUID, tsk->euid);
204         NEW_AUX_ENT(AT_GID, tsk->gid);
205         NEW_AUX_ENT(AT_EGID, tsk->egid);
206         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
207         if (k_platform) {
208                 NEW_AUX_ENT(AT_PLATFORM,
209                             (elf_addr_t)(unsigned long)u_platform);
210         }
211         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
212                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
213         }
214 #undef NEW_AUX_ENT
215         /* AT_NULL is zero; clear the rest too */
216         memset(&elf_info[ei_index], 0,
217                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
218
219         /* And advance past the AT_NULL entry.  */
220         ei_index += 2;
221
222         sp = STACK_ADD(p, ei_index);
223
224         items = (argc + 1) + (envc + 1) + 1;
225         bprm->p = STACK_ROUND(sp, items);
226
227         /* Point sp at the lowest address on the stack */
228 #ifdef CONFIG_STACK_GROWSUP
229         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
230         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
231 #else
232         sp = (elf_addr_t __user *)bprm->p;
233 #endif
234
235
236         /*
237          * Grow the stack manually; some architectures have a limit on how
238          * far ahead a user-space access may be in order to grow the stack.
239          */
240         vma = find_extend_vma(current->mm, bprm->p);
241         if (!vma)
242                 return -EFAULT;
243
244         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
245         if (__put_user(argc, sp++))
246                 return -EFAULT;
247         argv = sp;
248         envp = argv + argc + 1;
249
250         /* Populate argv and envp */
251         p = current->mm->arg_end = current->mm->arg_start;
252         while (argc-- > 0) {
253                 size_t len;
254                 if (__put_user((elf_addr_t)p, argv++))
255                         return -EFAULT;
256                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
257                 if (!len || len > MAX_ARG_STRLEN)
258                         return -EINVAL;
259                 p += len;
260         }
261         if (__put_user(0, argv))
262                 return -EFAULT;
263         current->mm->arg_end = current->mm->env_start = p;
264         while (envc-- > 0) {
265                 size_t len;
266                 if (__put_user((elf_addr_t)p, envp++))
267                         return -EFAULT;
268                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
269                 if (!len || len > MAX_ARG_STRLEN)
270                         return -EINVAL;
271                 p += len;
272         }
273         if (__put_user(0, envp))
274                 return -EFAULT;
275         current->mm->env_end = p;
276
277         /* Put the elf_info on the stack in the right place.  */
278         sp = (elf_addr_t __user *)envp + 1;
279         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
280                 return -EFAULT;
281         return 0;
282 }
283
284 #ifndef elf_map
285
286 static unsigned long elf_map(struct file *filep, unsigned long addr,
287                 struct elf_phdr *eppnt, int prot, int type,
288                 unsigned long total_size)
289 {
290         unsigned long map_addr;
291         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
292         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
293         addr = ELF_PAGESTART(addr);
294         size = ELF_PAGEALIGN(size);
295
296         /* mmap() will return -EINVAL if given a zero size, but a
297          * segment with zero filesize is perfectly valid */
298         if (!size)
299                 return addr;
300
301         down_write(&current->mm->mmap_sem);
302         /*
303         * total_size is the size of the ELF (interpreter) image.
304         * The _first_ mmap needs to know the full size, otherwise
305         * randomization might put this image into an overlapping
306         * position with the ELF binary image. (since size < total_size)
307         * So we first map the 'big' image - and unmap the remainder at
308         * the end. (which unmap is needed for ELF images with holes.)
309         */
310         if (total_size) {
311                 total_size = ELF_PAGEALIGN(total_size);
312                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
313                 if (!BAD_ADDR(map_addr))
314                         do_munmap(current->mm, map_addr+size, total_size-size);
315         } else
316                 map_addr = do_mmap(filep, addr, size, prot, type, off);
317
318         up_write(&current->mm->mmap_sem);
319         return(map_addr);
320 }
321
322 #endif /* !elf_map */
323
324 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
325 {
326         int i, first_idx = -1, last_idx = -1;
327
328         for (i = 0; i < nr; i++) {
329                 if (cmds[i].p_type == PT_LOAD) {
330                         last_idx = i;
331                         if (first_idx == -1)
332                                 first_idx = i;
333                 }
334         }
335         if (first_idx == -1)
336                 return 0;
337
338         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
339                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
340 }
341
342
343 /* This is much more generalized than the library routine read function,
344    so we keep this separate.  Technically the library read function
345    is only provided so that we can read a.out libraries that have
346    an ELF header */
347
348 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
349                 struct file *interpreter, unsigned long *interp_map_addr,
350                 unsigned long no_base)
351 {
352         struct elf_phdr *elf_phdata;
353         struct elf_phdr *eppnt;
354         unsigned long load_addr = 0;
355         int load_addr_set = 0;
356         unsigned long last_bss = 0, elf_bss = 0;
357         unsigned long error = ~0UL;
358         unsigned long total_size;
359         int retval, i, size;
360
361         /* First of all, some simple consistency checks */
362         if (interp_elf_ex->e_type != ET_EXEC &&
363             interp_elf_ex->e_type != ET_DYN)
364                 goto out;
365         if (!elf_check_arch(interp_elf_ex))
366                 goto out;
367         if (!interpreter->f_op || !interpreter->f_op->mmap)
368                 goto out;
369
370         /*
371          * If the size of this structure has changed, then punt, since
372          * we will be doing the wrong thing.
373          */
374         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
375                 goto out;
376         if (interp_elf_ex->e_phnum < 1 ||
377                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
378                 goto out;
379
380         /* Now read in all of the header information */
381         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
382         if (size > ELF_MIN_ALIGN)
383                 goto out;
384         elf_phdata = kmalloc(size, GFP_KERNEL);
385         if (!elf_phdata)
386                 goto out;
387
388         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
389                              (char *)elf_phdata,size);
390         error = -EIO;
391         if (retval != size) {
392                 if (retval < 0)
393                         error = retval; 
394                 goto out_close;
395         }
396
397         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
398         if (!total_size) {
399                 error = -EINVAL;
400                 goto out_close;
401         }
402
403         eppnt = elf_phdata;
404         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
405                 if (eppnt->p_type == PT_LOAD) {
406                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
407                         int elf_prot = 0;
408                         unsigned long vaddr = 0;
409                         unsigned long k, map_addr;
410
411                         if (eppnt->p_flags & PF_R)
412                                 elf_prot = PROT_READ;
413                         if (eppnt->p_flags & PF_W)
414                                 elf_prot |= PROT_WRITE;
415                         if (eppnt->p_flags & PF_X)
416                                 elf_prot |= PROT_EXEC;
417                         vaddr = eppnt->p_vaddr;
418                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
419                                 elf_type |= MAP_FIXED;
420                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
421                                 load_addr = -vaddr;
422
423                         map_addr = elf_map(interpreter, load_addr + vaddr,
424                                         eppnt, elf_prot, elf_type, total_size);
425                         total_size = 0;
426                         if (!*interp_map_addr)
427                                 *interp_map_addr = map_addr;
428                         error = map_addr;
429                         if (BAD_ADDR(map_addr))
430                                 goto out_close;
431
432                         if (!load_addr_set &&
433                             interp_elf_ex->e_type == ET_DYN) {
434                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
435                                 load_addr_set = 1;
436                         }
437
438                         /*
439                          * Check to see if the section's size will overflow the
440                          * allowed task size. Note that p_filesz must always be
441                          * <= p_memsize so it's only necessary to check p_memsz.
442                          */
443                         k = load_addr + eppnt->p_vaddr;
444                         if (BAD_ADDR(k) ||
445                             eppnt->p_filesz > eppnt->p_memsz ||
446                             eppnt->p_memsz > TASK_SIZE ||
447                             TASK_SIZE - eppnt->p_memsz < k) {
448                                 error = -ENOMEM;
449                                 goto out_close;
450                         }
451
452                         /*
453                          * Find the end of the file mapping for this phdr, and
454                          * keep track of the largest address we see for this.
455                          */
456                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
457                         if (k > elf_bss)
458                                 elf_bss = k;
459
460                         /*
461                          * Do the same thing for the memory mapping - between
462                          * elf_bss and last_bss is the bss section.
463                          */
464                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
465                         if (k > last_bss)
466                                 last_bss = k;
467                 }
468         }
469
470         /*
471          * Now fill out the bss section.  First pad the last page up
472          * to the page boundary, and then perform a mmap to make sure
473          * that there are zero-mapped pages up to and including the 
474          * last bss page.
475          */
476         if (padzero(elf_bss)) {
477                 error = -EFAULT;
478                 goto out_close;
479         }
480
481         /* What we have mapped so far */
482         elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
483
484         /* Map the last of the bss segment */
485         if (last_bss > elf_bss) {
486                 down_write(&current->mm->mmap_sem);
487                 error = do_brk(elf_bss, last_bss - elf_bss);
488                 up_write(&current->mm->mmap_sem);
489                 if (BAD_ADDR(error))
490                         goto out_close;
491         }
492
493         error = load_addr;
494
495 out_close:
496         kfree(elf_phdata);
497 out:
498         return error;
499 }
500
501 /*
502  * These are the functions used to load ELF style executables and shared
503  * libraries.  There is no binary dependent code anywhere else.
504  */
505
506 #define INTERPRETER_NONE 0
507 #define INTERPRETER_ELF 2
508
509 #ifndef STACK_RND_MASK
510 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
511 #endif
512
513 static unsigned long randomize_stack_top(unsigned long stack_top)
514 {
515         unsigned int random_variable = 0;
516
517         if ((current->flags & PF_RANDOMIZE) &&
518                 !(current->personality & ADDR_NO_RANDOMIZE)) {
519                 random_variable = get_random_int() & STACK_RND_MASK;
520                 random_variable <<= PAGE_SHIFT;
521         }
522 #ifdef CONFIG_STACK_GROWSUP
523         return PAGE_ALIGN(stack_top) + random_variable;
524 #else
525         return PAGE_ALIGN(stack_top) - random_variable;
526 #endif
527 }
528
529 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
530 {
531         struct file *interpreter = NULL; /* to shut gcc up */
532         unsigned long load_addr = 0, load_bias = 0;
533         int load_addr_set = 0;
534         char * elf_interpreter = NULL;
535         unsigned long error;
536         struct elf_phdr *elf_ppnt, *elf_phdata;
537         unsigned long elf_bss, elf_brk;
538         int elf_exec_fileno;
539         int retval, i;
540         unsigned int size;
541         unsigned long elf_entry;
542         unsigned long interp_load_addr = 0;
543         unsigned long start_code, end_code, start_data, end_data;
544         unsigned long reloc_func_desc = 0;
545         int executable_stack = EXSTACK_DEFAULT;
546         unsigned long def_flags = 0;
547         struct {
548                 struct elfhdr elf_ex;
549                 struct elfhdr interp_elf_ex;
550         } *loc;
551
552         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
553         if (!loc) {
554                 retval = -ENOMEM;
555                 goto out_ret;
556         }
557         
558         /* Get the exec-header */
559         loc->elf_ex = *((struct elfhdr *)bprm->buf);
560
561         retval = -ENOEXEC;
562         /* First of all, some simple consistency checks */
563         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
564                 goto out;
565
566         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
567                 goto out;
568         if (!elf_check_arch(&loc->elf_ex))
569                 goto out;
570         if (!bprm->file->f_op||!bprm->file->f_op->mmap)
571                 goto out;
572
573         /* Now read in all of the header information */
574         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
575                 goto out;
576         if (loc->elf_ex.e_phnum < 1 ||
577                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
578                 goto out;
579         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
580         retval = -ENOMEM;
581         elf_phdata = kmalloc(size, GFP_KERNEL);
582         if (!elf_phdata)
583                 goto out;
584
585         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
586                              (char *)elf_phdata, size);
587         if (retval != size) {
588                 if (retval >= 0)
589                         retval = -EIO;
590                 goto out_free_ph;
591         }
592
593         retval = get_unused_fd();
594         if (retval < 0)
595                 goto out_free_ph;
596         get_file(bprm->file);
597         fd_install(elf_exec_fileno = retval, bprm->file);
598
599         elf_ppnt = elf_phdata;
600         elf_bss = 0;
601         elf_brk = 0;
602
603         start_code = ~0UL;
604         end_code = 0;
605         start_data = 0;
606         end_data = 0;
607
608         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
609                 if (elf_ppnt->p_type == PT_INTERP) {
610                         /* This is the program interpreter used for
611                          * shared libraries - for now assume that this
612                          * is an a.out format binary
613                          */
614                         retval = -ENOEXEC;
615                         if (elf_ppnt->p_filesz > PATH_MAX || 
616                             elf_ppnt->p_filesz < 2)
617                                 goto out_free_file;
618
619                         retval = -ENOMEM;
620                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
621                                                   GFP_KERNEL);
622                         if (!elf_interpreter)
623                                 goto out_free_file;
624
625                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
626                                              elf_interpreter,
627                                              elf_ppnt->p_filesz);
628                         if (retval != elf_ppnt->p_filesz) {
629                                 if (retval >= 0)
630                                         retval = -EIO;
631                                 goto out_free_interp;
632                         }
633                         /* make sure path is NULL terminated */
634                         retval = -ENOEXEC;
635                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
636                                 goto out_free_interp;
637
638                         /*
639                          * The early SET_PERSONALITY here is so that the lookup
640                          * for the interpreter happens in the namespace of the 
641                          * to-be-execed image.  SET_PERSONALITY can select an
642                          * alternate root.
643                          *
644                          * However, SET_PERSONALITY is NOT allowed to switch
645                          * this task into the new images's memory mapping
646                          * policy - that is, TASK_SIZE must still evaluate to
647                          * that which is appropriate to the execing application.
648                          * This is because exit_mmap() needs to have TASK_SIZE
649                          * evaluate to the size of the old image.
650                          *
651                          * So if (say) a 64-bit application is execing a 32-bit
652                          * application it is the architecture's responsibility
653                          * to defer changing the value of TASK_SIZE until the
654                          * switch really is going to happen - do this in
655                          * flush_thread().      - akpm
656                          */
657                         SET_PERSONALITY(loc->elf_ex, 0);
658
659                         interpreter = open_exec(elf_interpreter);
660                         retval = PTR_ERR(interpreter);
661                         if (IS_ERR(interpreter))
662                                 goto out_free_interp;
663
664                         /*
665                          * If the binary is not readable then enforce
666                          * mm->dumpable = 0 regardless of the interpreter's
667                          * permissions.
668                          */
669                         if (file_permission(interpreter, MAY_READ) < 0)
670                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
671
672                         retval = kernel_read(interpreter, 0, bprm->buf,
673                                              BINPRM_BUF_SIZE);
674                         if (retval != BINPRM_BUF_SIZE) {
675                                 if (retval >= 0)
676                                         retval = -EIO;
677                                 goto out_free_dentry;
678                         }
679
680                         /* Get the exec headers */
681                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
682                         break;
683                 }
684                 elf_ppnt++;
685         }
686
687         elf_ppnt = elf_phdata;
688         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
689                 if (elf_ppnt->p_type == PT_GNU_STACK) {
690                         if (elf_ppnt->p_flags & PF_X)
691                                 executable_stack = EXSTACK_ENABLE_X;
692                         else
693                                 executable_stack = EXSTACK_DISABLE_X;
694                         break;
695                 }
696
697         /* Some simple consistency checks for the interpreter */
698         if (elf_interpreter) {
699                 retval = -ELIBBAD;
700                 /* Not an ELF interpreter */
701                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
702                         goto out_free_dentry;
703                 /* Verify the interpreter has a valid arch */
704                 if (!elf_check_arch(&loc->interp_elf_ex))
705                         goto out_free_dentry;
706         } else {
707                 /* Executables without an interpreter also need a personality  */
708                 SET_PERSONALITY(loc->elf_ex, 0);
709         }
710
711         /* Flush all traces of the currently running executable */
712         retval = flush_old_exec(bprm);
713         if (retval)
714                 goto out_free_dentry;
715
716         /* OK, This is the point of no return */
717         current->flags &= ~PF_FORKNOEXEC;
718         current->mm->def_flags = def_flags;
719
720         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
721            may depend on the personality.  */
722         SET_PERSONALITY(loc->elf_ex, 0);
723         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
724                 current->personality |= READ_IMPLIES_EXEC;
725
726         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
727                 current->flags |= PF_RANDOMIZE;
728         arch_pick_mmap_layout(current->mm);
729
730         /* Do this so that we can load the interpreter, if need be.  We will
731            change some of these later */
732         current->mm->free_area_cache = current->mm->mmap_base;
733         current->mm->cached_hole_size = 0;
734         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
735                                  executable_stack);
736         if (retval < 0) {
737                 send_sig(SIGKILL, current, 0);
738                 goto out_free_dentry;
739         }
740         
741         current->mm->start_stack = bprm->p;
742
743         /* Now we do a little grungy work by mmaping the ELF image into
744            the correct location in memory. */
745         for(i = 0, elf_ppnt = elf_phdata;
746             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
747                 int elf_prot = 0, elf_flags;
748                 unsigned long k, vaddr;
749
750                 if (elf_ppnt->p_type != PT_LOAD)
751                         continue;
752
753                 if (unlikely (elf_brk > elf_bss)) {
754                         unsigned long nbyte;
755                     
756                         /* There was a PT_LOAD segment with p_memsz > p_filesz
757                            before this one. Map anonymous pages, if needed,
758                            and clear the area.  */
759                         retval = set_brk (elf_bss + load_bias,
760                                           elf_brk + load_bias);
761                         if (retval) {
762                                 send_sig(SIGKILL, current, 0);
763                                 goto out_free_dentry;
764                         }
765                         nbyte = ELF_PAGEOFFSET(elf_bss);
766                         if (nbyte) {
767                                 nbyte = ELF_MIN_ALIGN - nbyte;
768                                 if (nbyte > elf_brk - elf_bss)
769                                         nbyte = elf_brk - elf_bss;
770                                 if (clear_user((void __user *)elf_bss +
771                                                         load_bias, nbyte)) {
772                                         /*
773                                          * This bss-zeroing can fail if the ELF
774                                          * file specifies odd protections. So
775                                          * we don't check the return value
776                                          */
777                                 }
778                         }
779                 }
780
781                 if (elf_ppnt->p_flags & PF_R)
782                         elf_prot |= PROT_READ;
783                 if (elf_ppnt->p_flags & PF_W)
784                         elf_prot |= PROT_WRITE;
785                 if (elf_ppnt->p_flags & PF_X)
786                         elf_prot |= PROT_EXEC;
787
788                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
789
790                 vaddr = elf_ppnt->p_vaddr;
791                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
792                         elf_flags |= MAP_FIXED;
793                 } else if (loc->elf_ex.e_type == ET_DYN) {
794                         /* Try and get dynamic programs out of the way of the
795                          * default mmap base, as well as whatever program they
796                          * might try to exec.  This is because the brk will
797                          * follow the loader, and is not movable.  */
798 #ifdef CONFIG_X86
799                         load_bias = 0;
800 #else
801                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
802 #endif
803                 }
804
805                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
806                                 elf_prot, elf_flags, 0);
807                 if (BAD_ADDR(error)) {
808                         send_sig(SIGKILL, current, 0);
809                         retval = IS_ERR((void *)error) ?
810                                 PTR_ERR((void*)error) : -EINVAL;
811                         goto out_free_dentry;
812                 }
813
814                 if (!load_addr_set) {
815                         load_addr_set = 1;
816                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
817                         if (loc->elf_ex.e_type == ET_DYN) {
818                                 load_bias += error -
819                                              ELF_PAGESTART(load_bias + vaddr);
820                                 load_addr += load_bias;
821                                 reloc_func_desc = load_bias;
822                         }
823                 }
824                 k = elf_ppnt->p_vaddr;
825                 if (k < start_code)
826                         start_code = k;
827                 if (start_data < k)
828                         start_data = k;
829
830                 /*
831                  * Check to see if the section's size will overflow the
832                  * allowed task size. Note that p_filesz must always be
833                  * <= p_memsz so it is only necessary to check p_memsz.
834                  */
835                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
836                     elf_ppnt->p_memsz > TASK_SIZE ||
837                     TASK_SIZE - elf_ppnt->p_memsz < k) {
838                         /* set_brk can never work. Avoid overflows. */
839                         send_sig(SIGKILL, current, 0);
840                         retval = -EINVAL;
841                         goto out_free_dentry;
842                 }
843
844                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
845
846                 if (k > elf_bss)
847                         elf_bss = k;
848                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
849                         end_code = k;
850                 if (end_data < k)
851                         end_data = k;
852                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
853                 if (k > elf_brk)
854                         elf_brk = k;
855         }
856
857         loc->elf_ex.e_entry += load_bias;
858         elf_bss += load_bias;
859         elf_brk += load_bias;
860         start_code += load_bias;
861         end_code += load_bias;
862         start_data += load_bias;
863         end_data += load_bias;
864
865         /* Calling set_brk effectively mmaps the pages that we need
866          * for the bss and break sections.  We must do this before
867          * mapping in the interpreter, to make sure it doesn't wind
868          * up getting placed where the bss needs to go.
869          */
870         retval = set_brk(elf_bss, elf_brk);
871         if (retval) {
872                 send_sig(SIGKILL, current, 0);
873                 goto out_free_dentry;
874         }
875         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
876                 send_sig(SIGSEGV, current, 0);
877                 retval = -EFAULT; /* Nobody gets to see this, but.. */
878                 goto out_free_dentry;
879         }
880
881         if (elf_interpreter) {
882                 unsigned long uninitialized_var(interp_map_addr);
883
884                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
885                                             interpreter,
886                                             &interp_map_addr,
887                                             load_bias);
888                 if (!IS_ERR((void *)elf_entry)) {
889                         /*
890                          * load_elf_interp() returns relocation
891                          * adjustment
892                          */
893                         interp_load_addr = elf_entry;
894                         elf_entry += loc->interp_elf_ex.e_entry;
895                 }
896                 if (BAD_ADDR(elf_entry)) {
897                         force_sig(SIGSEGV, current);
898                         retval = IS_ERR((void *)elf_entry) ?
899                                         (int)elf_entry : -EINVAL;
900                         goto out_free_dentry;
901                 }
902                 reloc_func_desc = interp_load_addr;
903
904                 allow_write_access(interpreter);
905                 fput(interpreter);
906                 kfree(elf_interpreter);
907         } else {
908                 elf_entry = loc->elf_ex.e_entry;
909                 if (BAD_ADDR(elf_entry)) {
910                         force_sig(SIGSEGV, current);
911                         retval = -EINVAL;
912                         goto out_free_dentry;
913                 }
914         }
915
916         kfree(elf_phdata);
917
918         sys_close(elf_exec_fileno);
919
920         set_binfmt(&elf_format);
921
922 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
923         retval = arch_setup_additional_pages(bprm, executable_stack);
924         if (retval < 0) {
925                 send_sig(SIGKILL, current, 0);
926                 goto out;
927         }
928 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
929
930         compute_creds(bprm);
931         current->flags &= ~PF_FORKNOEXEC;
932         retval = create_elf_tables(bprm, &loc->elf_ex,
933                           load_addr, interp_load_addr);
934         if (retval < 0) {
935                 send_sig(SIGKILL, current, 0);
936                 goto out;
937         }
938         /* N.B. passed_fileno might not be initialized? */
939         current->mm->end_code = end_code;
940         current->mm->start_code = start_code;
941         current->mm->start_data = start_data;
942         current->mm->end_data = end_data;
943         current->mm->start_stack = bprm->p;
944
945 #ifdef arch_randomize_brk
946         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
947                 current->mm->brk = current->mm->start_brk =
948                         arch_randomize_brk(current->mm);
949 #endif
950
951         if (current->personality & MMAP_PAGE_ZERO) {
952                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
953                    and some applications "depend" upon this behavior.
954                    Since we do not have the power to recompile these, we
955                    emulate the SVr4 behavior. Sigh. */
956                 down_write(&current->mm->mmap_sem);
957                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
958                                 MAP_FIXED | MAP_PRIVATE, 0);
959                 up_write(&current->mm->mmap_sem);
960         }
961
962 #ifdef ELF_PLAT_INIT
963         /*
964          * The ABI may specify that certain registers be set up in special
965          * ways (on i386 %edx is the address of a DT_FINI function, for
966          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
967          * that the e_entry field is the address of the function descriptor
968          * for the startup routine, rather than the address of the startup
969          * routine itself.  This macro performs whatever initialization to
970          * the regs structure is required as well as any relocations to the
971          * function descriptor entries when executing dynamically links apps.
972          */
973         ELF_PLAT_INIT(regs, reloc_func_desc);
974 #endif
975
976         start_thread(regs, elf_entry, bprm->p);
977         if (unlikely(current->ptrace & PT_PTRACED)) {
978                 if (current->ptrace & PT_TRACE_EXEC)
979                         ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
980                 else
981                         send_sig(SIGTRAP, current, 0);
982         }
983         retval = 0;
984 out:
985         kfree(loc);
986 out_ret:
987         return retval;
988
989         /* error cleanup */
990 out_free_dentry:
991         allow_write_access(interpreter);
992         if (interpreter)
993                 fput(interpreter);
994 out_free_interp:
995         kfree(elf_interpreter);
996 out_free_file:
997         sys_close(elf_exec_fileno);
998 out_free_ph:
999         kfree(elf_phdata);
1000         goto out;
1001 }
1002
1003 /* This is really simpleminded and specialized - we are loading an
1004    a.out library that is given an ELF header. */
1005 static int load_elf_library(struct file *file)
1006 {
1007         struct elf_phdr *elf_phdata;
1008         struct elf_phdr *eppnt;
1009         unsigned long elf_bss, bss, len;
1010         int retval, error, i, j;
1011         struct elfhdr elf_ex;
1012
1013         error = -ENOEXEC;
1014         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1015         if (retval != sizeof(elf_ex))
1016                 goto out;
1017
1018         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1019                 goto out;
1020
1021         /* First of all, some simple consistency checks */
1022         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1023             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1024                 goto out;
1025
1026         /* Now read in all of the header information */
1027
1028         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1029         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1030
1031         error = -ENOMEM;
1032         elf_phdata = kmalloc(j, GFP_KERNEL);
1033         if (!elf_phdata)
1034                 goto out;
1035
1036         eppnt = elf_phdata;
1037         error = -ENOEXEC;
1038         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1039         if (retval != j)
1040                 goto out_free_ph;
1041
1042         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1043                 if ((eppnt + i)->p_type == PT_LOAD)
1044                         j++;
1045         if (j != 1)
1046                 goto out_free_ph;
1047
1048         while (eppnt->p_type != PT_LOAD)
1049                 eppnt++;
1050
1051         /* Now use mmap to map the library into memory. */
1052         down_write(&current->mm->mmap_sem);
1053         error = do_mmap(file,
1054                         ELF_PAGESTART(eppnt->p_vaddr),
1055                         (eppnt->p_filesz +
1056                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1057                         PROT_READ | PROT_WRITE | PROT_EXEC,
1058                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1059                         (eppnt->p_offset -
1060                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1061         up_write(&current->mm->mmap_sem);
1062         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1063                 goto out_free_ph;
1064
1065         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1066         if (padzero(elf_bss)) {
1067                 error = -EFAULT;
1068                 goto out_free_ph;
1069         }
1070
1071         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1072                             ELF_MIN_ALIGN - 1);
1073         bss = eppnt->p_memsz + eppnt->p_vaddr;
1074         if (bss > len) {
1075                 down_write(&current->mm->mmap_sem);
1076                 do_brk(len, bss - len);
1077                 up_write(&current->mm->mmap_sem);
1078         }
1079         error = 0;
1080
1081 out_free_ph:
1082         kfree(elf_phdata);
1083 out:
1084         return error;
1085 }
1086
1087 /*
1088  * Note that some platforms still use traditional core dumps and not
1089  * the ELF core dump.  Each platform can select it as appropriate.
1090  */
1091 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1092
1093 /*
1094  * ELF core dumper
1095  *
1096  * Modelled on fs/exec.c:aout_core_dump()
1097  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1098  */
1099 /*
1100  * These are the only things you should do on a core-file: use only these
1101  * functions to write out all the necessary info.
1102  */
1103 static int dump_write(struct file *file, const void *addr, int nr)
1104 {
1105         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1106 }
1107
1108 static int dump_seek(struct file *file, loff_t off)
1109 {
1110         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1111                 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1112                         return 0;
1113         } else {
1114                 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1115                 if (!buf)
1116                         return 0;
1117                 while (off > 0) {
1118                         unsigned long n = off;
1119                         if (n > PAGE_SIZE)
1120                                 n = PAGE_SIZE;
1121                         if (!dump_write(file, buf, n))
1122                                 return 0;
1123                         off -= n;
1124                 }
1125                 free_page((unsigned long)buf);
1126         }
1127         return 1;
1128 }
1129
1130 /*
1131  * Decide what to dump of a segment, part, all or none.
1132  */
1133 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1134                                    unsigned long mm_flags)
1135 {
1136         /* The vma can be set up to tell us the answer directly.  */
1137         if (vma->vm_flags & VM_ALWAYSDUMP)
1138                 goto whole;
1139
1140         /* Do not dump I/O mapped devices or special mappings */
1141         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1142                 return 0;
1143
1144 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1145
1146         /* By default, dump shared memory if mapped from an anonymous file. */
1147         if (vma->vm_flags & VM_SHARED) {
1148                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1149                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1150                         goto whole;
1151                 return 0;
1152         }
1153
1154         /* Dump segments that have been written to.  */
1155         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1156                 goto whole;
1157         if (vma->vm_file == NULL)
1158                 return 0;
1159
1160         if (FILTER(MAPPED_PRIVATE))
1161                 goto whole;
1162
1163         /*
1164          * If this looks like the beginning of a DSO or executable mapping,
1165          * check for an ELF header.  If we find one, dump the first page to
1166          * aid in determining what was mapped here.
1167          */
1168         if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1169                 u32 __user *header = (u32 __user *) vma->vm_start;
1170                 u32 word;
1171                 /*
1172                  * Doing it this way gets the constant folded by GCC.
1173                  */
1174                 union {
1175                         u32 cmp;
1176                         char elfmag[SELFMAG];
1177                 } magic;
1178                 BUILD_BUG_ON(SELFMAG != sizeof word);
1179                 magic.elfmag[EI_MAG0] = ELFMAG0;
1180                 magic.elfmag[EI_MAG1] = ELFMAG1;
1181                 magic.elfmag[EI_MAG2] = ELFMAG2;
1182                 magic.elfmag[EI_MAG3] = ELFMAG3;
1183                 if (get_user(word, header) == 0 && word == magic.cmp)
1184                         return PAGE_SIZE;
1185         }
1186
1187 #undef  FILTER
1188
1189         return 0;
1190
1191 whole:
1192         return vma->vm_end - vma->vm_start;
1193 }
1194
1195 /* An ELF note in memory */
1196 struct memelfnote
1197 {
1198         const char *name;
1199         int type;
1200         unsigned int datasz;
1201         void *data;
1202 };
1203
1204 static int notesize(struct memelfnote *en)
1205 {
1206         int sz;
1207
1208         sz = sizeof(struct elf_note);
1209         sz += roundup(strlen(en->name) + 1, 4);
1210         sz += roundup(en->datasz, 4);
1211
1212         return sz;
1213 }
1214
1215 #define DUMP_WRITE(addr, nr, foffset)   \
1216         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1217
1218 static int alignfile(struct file *file, loff_t *foffset)
1219 {
1220         static const char buf[4] = { 0, };
1221         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1222         return 1;
1223 }
1224
1225 static int writenote(struct memelfnote *men, struct file *file,
1226                         loff_t *foffset)
1227 {
1228         struct elf_note en;
1229         en.n_namesz = strlen(men->name) + 1;
1230         en.n_descsz = men->datasz;
1231         en.n_type = men->type;
1232
1233         DUMP_WRITE(&en, sizeof(en), foffset);
1234         DUMP_WRITE(men->name, en.n_namesz, foffset);
1235         if (!alignfile(file, foffset))
1236                 return 0;
1237         DUMP_WRITE(men->data, men->datasz, foffset);
1238         if (!alignfile(file, foffset))
1239                 return 0;
1240
1241         return 1;
1242 }
1243 #undef DUMP_WRITE
1244
1245 #define DUMP_WRITE(addr, nr)    \
1246         if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1247                 goto end_coredump;
1248 #define DUMP_SEEK(off)  \
1249         if (!dump_seek(file, (off))) \
1250                 goto end_coredump;
1251
1252 static void fill_elf_header(struct elfhdr *elf, int segs,
1253                             u16 machine, u32 flags, u8 osabi)
1254 {
1255         memset(elf, 0, sizeof(*elf));
1256
1257         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1258         elf->e_ident[EI_CLASS] = ELF_CLASS;
1259         elf->e_ident[EI_DATA] = ELF_DATA;
1260         elf->e_ident[EI_VERSION] = EV_CURRENT;
1261         elf->e_ident[EI_OSABI] = ELF_OSABI;
1262
1263         elf->e_type = ET_CORE;
1264         elf->e_machine = machine;
1265         elf->e_version = EV_CURRENT;
1266         elf->e_phoff = sizeof(struct elfhdr);
1267         elf->e_flags = flags;
1268         elf->e_ehsize = sizeof(struct elfhdr);
1269         elf->e_phentsize = sizeof(struct elf_phdr);
1270         elf->e_phnum = segs;
1271
1272         return;
1273 }
1274
1275 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1276 {
1277         phdr->p_type = PT_NOTE;
1278         phdr->p_offset = offset;
1279         phdr->p_vaddr = 0;
1280         phdr->p_paddr = 0;
1281         phdr->p_filesz = sz;
1282         phdr->p_memsz = 0;
1283         phdr->p_flags = 0;
1284         phdr->p_align = 0;
1285         return;
1286 }
1287
1288 static void fill_note(struct memelfnote *note, const char *name, int type, 
1289                 unsigned int sz, void *data)
1290 {
1291         note->name = name;
1292         note->type = type;
1293         note->datasz = sz;
1294         note->data = data;
1295         return;
1296 }
1297
1298 /*
1299  * fill up all the fields in prstatus from the given task struct, except
1300  * registers which need to be filled up separately.
1301  */
1302 static void fill_prstatus(struct elf_prstatus *prstatus,
1303                 struct task_struct *p, long signr)
1304 {
1305         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1306         prstatus->pr_sigpend = p->pending.signal.sig[0];
1307         prstatus->pr_sighold = p->blocked.sig[0];
1308         prstatus->pr_pid = task_pid_vnr(p);
1309         prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1310         prstatus->pr_pgrp = task_pgrp_vnr(p);
1311         prstatus->pr_sid = task_session_vnr(p);
1312         if (thread_group_leader(p)) {
1313                 /*
1314                  * This is the record for the group leader.  Add in the
1315                  * cumulative times of previous dead threads.  This total
1316                  * won't include the time of each live thread whose state
1317                  * is included in the core dump.  The final total reported
1318                  * to our parent process when it calls wait4 will include
1319                  * those sums as well as the little bit more time it takes
1320                  * this and each other thread to finish dying after the
1321                  * core dump synchronization phase.
1322                  */
1323                 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1324                                    &prstatus->pr_utime);
1325                 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1326                                    &prstatus->pr_stime);
1327         } else {
1328                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1329                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1330         }
1331         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1332         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1333 }
1334
1335 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1336                        struct mm_struct *mm)
1337 {
1338         unsigned int i, len;
1339         
1340         /* first copy the parameters from user space */
1341         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1342
1343         len = mm->arg_end - mm->arg_start;
1344         if (len >= ELF_PRARGSZ)
1345                 len = ELF_PRARGSZ-1;
1346         if (copy_from_user(&psinfo->pr_psargs,
1347                            (const char __user *)mm->arg_start, len))
1348                 return -EFAULT;
1349         for(i = 0; i < len; i++)
1350                 if (psinfo->pr_psargs[i] == 0)
1351                         psinfo->pr_psargs[i] = ' ';
1352         psinfo->pr_psargs[len] = 0;
1353
1354         psinfo->pr_pid = task_pid_vnr(p);
1355         psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1356         psinfo->pr_pgrp = task_pgrp_vnr(p);
1357         psinfo->pr_sid = task_session_vnr(p);
1358
1359         i = p->state ? ffz(~p->state) + 1 : 0;
1360         psinfo->pr_state = i;
1361         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1362         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1363         psinfo->pr_nice = task_nice(p);
1364         psinfo->pr_flag = p->flags;
1365         SET_UID(psinfo->pr_uid, p->uid);
1366         SET_GID(psinfo->pr_gid, p->gid);
1367         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1368         
1369         return 0;
1370 }
1371
1372 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1373 {
1374         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1375         int i = 0;
1376         do
1377                 i += 2;
1378         while (auxv[i - 2] != AT_NULL);
1379         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1380 }
1381
1382 #ifdef CORE_DUMP_USE_REGSET
1383 #include <linux/regset.h>
1384
1385 struct elf_thread_core_info {
1386         struct elf_thread_core_info *next;
1387         struct task_struct *task;
1388         struct elf_prstatus prstatus;
1389         struct memelfnote notes[0];
1390 };
1391
1392 struct elf_note_info {
1393         struct elf_thread_core_info *thread;
1394         struct memelfnote psinfo;
1395         struct memelfnote auxv;
1396         size_t size;
1397         int thread_notes;
1398 };
1399
1400 /*
1401  * When a regset has a writeback hook, we call it on each thread before
1402  * dumping user memory.  On register window machines, this makes sure the
1403  * user memory backing the register data is up to date before we read it.
1404  */
1405 static void do_thread_regset_writeback(struct task_struct *task,
1406                                        const struct user_regset *regset)
1407 {
1408         if (regset->writeback)
1409                 regset->writeback(task, regset, 1);
1410 }
1411
1412 static int fill_thread_core_info(struct elf_thread_core_info *t,
1413                                  const struct user_regset_view *view,
1414                                  long signr, size_t *total)
1415 {
1416         unsigned int i;
1417
1418         /*
1419          * NT_PRSTATUS is the one special case, because the regset data
1420          * goes into the pr_reg field inside the note contents, rather
1421          * than being the whole note contents.  We fill the reset in here.
1422          * We assume that regset 0 is NT_PRSTATUS.
1423          */
1424         fill_prstatus(&t->prstatus, t->task, signr);
1425         (void) view->regsets[0].get(t->task, &view->regsets[0],
1426                                     0, sizeof(t->prstatus.pr_reg),
1427                                     &t->prstatus.pr_reg, NULL);
1428
1429         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1430                   sizeof(t->prstatus), &t->prstatus);
1431         *total += notesize(&t->notes[0]);
1432
1433         do_thread_regset_writeback(t->task, &view->regsets[0]);
1434
1435         /*
1436          * Each other regset might generate a note too.  For each regset
1437          * that has no core_note_type or is inactive, we leave t->notes[i]
1438          * all zero and we'll know to skip writing it later.
1439          */
1440         for (i = 1; i < view->n; ++i) {
1441                 const struct user_regset *regset = &view->regsets[i];
1442                 do_thread_regset_writeback(t->task, regset);
1443                 if (regset->core_note_type &&
1444                     (!regset->active || regset->active(t->task, regset))) {
1445                         int ret;
1446                         size_t size = regset->n * regset->size;
1447                         void *data = kmalloc(size, GFP_KERNEL);
1448                         if (unlikely(!data))
1449                                 return 0;
1450                         ret = regset->get(t->task, regset,
1451                                           0, size, data, NULL);
1452                         if (unlikely(ret))
1453                                 kfree(data);
1454                         else {
1455                                 if (regset->core_note_type != NT_PRFPREG)
1456                                         fill_note(&t->notes[i], "LINUX",
1457                                                   regset->core_note_type,
1458                                                   size, data);
1459                                 else {
1460                                         t->prstatus.pr_fpvalid = 1;
1461                                         fill_note(&t->notes[i], "CORE",
1462                                                   NT_PRFPREG, size, data);
1463                                 }
1464                                 *total += notesize(&t->notes[i]);
1465                         }
1466                 }
1467         }
1468
1469         return 1;
1470 }
1471
1472 static int fill_note_info(struct elfhdr *elf, int phdrs,
1473                           struct elf_note_info *info,
1474                           long signr, struct pt_regs *regs)
1475 {
1476         struct task_struct *dump_task = current;
1477         const struct user_regset_view *view = task_user_regset_view(dump_task);
1478         struct elf_thread_core_info *t;
1479         struct elf_prpsinfo *psinfo;
1480         struct task_struct *g, *p;
1481         unsigned int i;
1482
1483         info->size = 0;
1484         info->thread = NULL;
1485
1486         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1487         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1488
1489         if (psinfo == NULL)
1490                 return 0;
1491
1492         /*
1493          * Figure out how many notes we're going to need for each thread.
1494          */
1495         info->thread_notes = 0;
1496         for (i = 0; i < view->n; ++i)
1497                 if (view->regsets[i].core_note_type != 0)
1498                         ++info->thread_notes;
1499
1500         /*
1501          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1502          * since it is our one special case.
1503          */
1504         if (unlikely(info->thread_notes == 0) ||
1505             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1506                 WARN_ON(1);
1507                 return 0;
1508         }
1509
1510         /*
1511          * Initialize the ELF file header.
1512          */
1513         fill_elf_header(elf, phdrs,
1514                         view->e_machine, view->e_flags, view->ei_osabi);
1515
1516         /*
1517          * Allocate a structure for each thread.
1518          */
1519         rcu_read_lock();
1520         do_each_thread(g, p)
1521                 if (p->mm == dump_task->mm) {
1522                         t = kzalloc(offsetof(struct elf_thread_core_info,
1523                                              notes[info->thread_notes]),
1524                                     GFP_ATOMIC);
1525                         if (unlikely(!t)) {
1526                                 rcu_read_unlock();
1527                                 return 0;
1528                         }
1529                         t->task = p;
1530                         if (p == dump_task || !info->thread) {
1531                                 t->next = info->thread;
1532                                 info->thread = t;
1533                         } else {
1534                                 /*
1535                                  * Make sure to keep the original task at
1536                                  * the head of the list.
1537                                  */
1538                                 t->next = info->thread->next;
1539                                 info->thread->next = t;
1540                         }
1541                 }
1542         while_each_thread(g, p);
1543         rcu_read_unlock();
1544
1545         /*
1546          * Now fill in each thread's information.
1547          */
1548         for (t = info->thread; t != NULL; t = t->next)
1549                 if (!fill_thread_core_info(t, view, signr, &info->size))
1550                         return 0;
1551
1552         /*
1553          * Fill in the two process-wide notes.
1554          */
1555         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1556         info->size += notesize(&info->psinfo);
1557
1558         fill_auxv_note(&info->auxv, current->mm);
1559         info->size += notesize(&info->auxv);
1560
1561         return 1;
1562 }
1563
1564 static size_t get_note_info_size(struct elf_note_info *info)
1565 {
1566         return info->size;
1567 }
1568
1569 /*
1570  * Write all the notes for each thread.  When writing the first thread, the
1571  * process-wide notes are interleaved after the first thread-specific note.
1572  */
1573 static int write_note_info(struct elf_note_info *info,
1574                            struct file *file, loff_t *foffset)
1575 {
1576         bool first = 1;
1577         struct elf_thread_core_info *t = info->thread;
1578
1579         do {
1580                 int i;
1581
1582                 if (!writenote(&t->notes[0], file, foffset))
1583                         return 0;
1584
1585                 if (first && !writenote(&info->psinfo, file, foffset))
1586                         return 0;
1587                 if (first && !writenote(&info->auxv, file, foffset))
1588                         return 0;
1589
1590                 for (i = 1; i < info->thread_notes; ++i)
1591                         if (t->notes[i].data &&
1592                             !writenote(&t->notes[i], file, foffset))
1593                                 return 0;
1594
1595                 first = 0;
1596                 t = t->next;
1597         } while (t);
1598
1599         return 1;
1600 }
1601
1602 static void free_note_info(struct elf_note_info *info)
1603 {
1604         struct elf_thread_core_info *threads = info->thread;
1605         while (threads) {
1606                 unsigned int i;
1607                 struct elf_thread_core_info *t = threads;
1608                 threads = t->next;
1609                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1610                 for (i = 1; i < info->thread_notes; ++i)
1611                         kfree(t->notes[i].data);
1612                 kfree(t);
1613         }
1614         kfree(info->psinfo.data);
1615 }
1616
1617 #else
1618
1619 /* Here is the structure in which status of each thread is captured. */
1620 struct elf_thread_status
1621 {
1622         struct list_head list;
1623         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1624         elf_fpregset_t fpu;             /* NT_PRFPREG */
1625         struct task_struct *thread;
1626 #ifdef ELF_CORE_COPY_XFPREGS
1627         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1628 #endif
1629         struct memelfnote notes[3];
1630         int num_notes;
1631 };
1632
1633 /*
1634  * In order to add the specific thread information for the elf file format,
1635  * we need to keep a linked list of every threads pr_status and then create
1636  * a single section for them in the final core file.
1637  */
1638 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1639 {
1640         int sz = 0;
1641         struct task_struct *p = t->thread;
1642         t->num_notes = 0;
1643
1644         fill_prstatus(&t->prstatus, p, signr);
1645         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1646         
1647         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1648                   &(t->prstatus));
1649         t->num_notes++;
1650         sz += notesize(&t->notes[0]);
1651
1652         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1653                                                                 &t->fpu))) {
1654                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1655                           &(t->fpu));
1656                 t->num_notes++;
1657                 sz += notesize(&t->notes[1]);
1658         }
1659
1660 #ifdef ELF_CORE_COPY_XFPREGS
1661         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1662                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1663                           sizeof(t->xfpu), &t->xfpu);
1664                 t->num_notes++;
1665                 sz += notesize(&t->notes[2]);
1666         }
1667 #endif  
1668         return sz;
1669 }
1670
1671 struct elf_note_info {
1672         struct memelfnote *notes;
1673         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1674         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1675         struct list_head thread_list;
1676         elf_fpregset_t *fpu;
1677 #ifdef ELF_CORE_COPY_XFPREGS
1678         elf_fpxregset_t *xfpu;
1679 #endif
1680         int thread_status_size;
1681         int numnote;
1682 };
1683
1684 static int fill_note_info(struct elfhdr *elf, int phdrs,
1685                           struct elf_note_info *info,
1686                           long signr, struct pt_regs *regs)
1687 {
1688 #define NUM_NOTES       6
1689         struct list_head *t;
1690         struct task_struct *g, *p;
1691
1692         info->notes = NULL;
1693         info->prstatus = NULL;
1694         info->psinfo = NULL;
1695         info->fpu = NULL;
1696 #ifdef ELF_CORE_COPY_XFPREGS
1697         info->xfpu = NULL;
1698 #endif
1699         INIT_LIST_HEAD(&info->thread_list);
1700
1701         info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1702                               GFP_KERNEL);
1703         if (!info->notes)
1704                 return 0;
1705         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1706         if (!info->psinfo)
1707                 return 0;
1708         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1709         if (!info->prstatus)
1710                 return 0;
1711         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1712         if (!info->fpu)
1713                 return 0;
1714 #ifdef ELF_CORE_COPY_XFPREGS
1715         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1716         if (!info->xfpu)
1717                 return 0;
1718 #endif
1719
1720         info->thread_status_size = 0;
1721         if (signr) {
1722                 struct elf_thread_status *ets;
1723                 rcu_read_lock();
1724                 do_each_thread(g, p)
1725                         if (current->mm == p->mm && current != p) {
1726                                 ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
1727                                 if (!ets) {
1728                                         rcu_read_unlock();
1729                                         return 0;
1730                                 }
1731                                 ets->thread = p;
1732                                 list_add(&ets->list, &info->thread_list);
1733                         }
1734                 while_each_thread(g, p);
1735                 rcu_read_unlock();
1736                 list_for_each(t, &info->thread_list) {
1737                         int sz;
1738
1739                         ets = list_entry(t, struct elf_thread_status, list);
1740                         sz = elf_dump_thread_status(signr, ets);
1741                         info->thread_status_size += sz;
1742                 }
1743         }
1744         /* now collect the dump for the current */
1745         memset(info->prstatus, 0, sizeof(*info->prstatus));
1746         fill_prstatus(info->prstatus, current, signr);
1747         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1748
1749         /* Set up header */
1750         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1751
1752         /*
1753          * Set up the notes in similar form to SVR4 core dumps made
1754          * with info from their /proc.
1755          */
1756
1757         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1758                   sizeof(*info->prstatus), info->prstatus);
1759         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1760         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1761                   sizeof(*info->psinfo), info->psinfo);
1762
1763         info->numnote = 2;
1764
1765         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1766
1767         /* Try to dump the FPU. */
1768         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1769                                                                info->fpu);
1770         if (info->prstatus->pr_fpvalid)
1771                 fill_note(info->notes + info->numnote++,
1772                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1773 #ifdef ELF_CORE_COPY_XFPREGS
1774         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1775                 fill_note(info->notes + info->numnote++,
1776                           "LINUX", ELF_CORE_XFPREG_TYPE,
1777                           sizeof(*info->xfpu), info->xfpu);
1778 #endif
1779
1780         return 1;
1781
1782 #undef NUM_NOTES
1783 }
1784
1785 static size_t get_note_info_size(struct elf_note_info *info)
1786 {
1787         int sz = 0;
1788         int i;
1789
1790         for (i = 0; i < info->numnote; i++)
1791                 sz += notesize(info->notes + i);
1792
1793         sz += info->thread_status_size;
1794
1795         return sz;
1796 }
1797
1798 static int write_note_info(struct elf_note_info *info,
1799                            struct file *file, loff_t *foffset)
1800 {
1801         int i;
1802         struct list_head *t;
1803
1804         for (i = 0; i < info->numnote; i++)
1805                 if (!writenote(info->notes + i, file, foffset))
1806                         return 0;
1807
1808         /* write out the thread status notes section */
1809         list_for_each(t, &info->thread_list) {
1810                 struct elf_thread_status *tmp =
1811                                 list_entry(t, struct elf_thread_status, list);
1812
1813                 for (i = 0; i < tmp->num_notes; i++)
1814                         if (!writenote(&tmp->notes[i], file, foffset))
1815                                 return 0;
1816         }
1817
1818         return 1;
1819 }
1820
1821 static void free_note_info(struct elf_note_info *info)
1822 {
1823         while (!list_empty(&info->thread_list)) {
1824                 struct list_head *tmp = info->thread_list.next;
1825                 list_del(tmp);
1826                 kfree(list_entry(tmp, struct elf_thread_status, list));
1827         }
1828
1829         kfree(info->prstatus);
1830         kfree(info->psinfo);
1831         kfree(info->notes);
1832         kfree(info->fpu);
1833 #ifdef ELF_CORE_COPY_XFPREGS
1834         kfree(info->xfpu);
1835 #endif
1836 }
1837
1838 #endif
1839
1840 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1841                                         struct vm_area_struct *gate_vma)
1842 {
1843         struct vm_area_struct *ret = tsk->mm->mmap;
1844
1845         if (ret)
1846                 return ret;
1847         return gate_vma;
1848 }
1849 /*
1850  * Helper function for iterating across a vma list.  It ensures that the caller
1851  * will visit `gate_vma' prior to terminating the search.
1852  */
1853 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1854                                         struct vm_area_struct *gate_vma)
1855 {
1856         struct vm_area_struct *ret;
1857
1858         ret = this_vma->vm_next;
1859         if (ret)
1860                 return ret;
1861         if (this_vma == gate_vma)
1862                 return NULL;
1863         return gate_vma;
1864 }
1865
1866 /*
1867  * Actual dumper
1868  *
1869  * This is a two-pass process; first we find the offsets of the bits,
1870  * and then they are actually written out.  If we run out of core limit
1871  * we just truncate.
1872  */
1873 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1874 {
1875         int has_dumped = 0;
1876         mm_segment_t fs;
1877         int segs;
1878         size_t size = 0;
1879         struct vm_area_struct *vma, *gate_vma;
1880         struct elfhdr *elf = NULL;
1881         loff_t offset = 0, dataoff, foffset;
1882         unsigned long mm_flags;
1883         struct elf_note_info info;
1884
1885         /*
1886          * We no longer stop all VM operations.
1887          * 
1888          * This is because those proceses that could possibly change map_count
1889          * or the mmap / vma pages are now blocked in do_exit on current
1890          * finishing this core dump.
1891          *
1892          * Only ptrace can touch these memory addresses, but it doesn't change
1893          * the map_count or the pages allocated. So no possibility of crashing
1894          * exists while dumping the mm->vm_next areas to the core file.
1895          */
1896   
1897         /* alloc memory for large data structures: too large to be on stack */
1898         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1899         if (!elf)
1900                 goto out;
1901         
1902         segs = current->mm->map_count;
1903 #ifdef ELF_CORE_EXTRA_PHDRS
1904         segs += ELF_CORE_EXTRA_PHDRS;
1905 #endif
1906
1907         gate_vma = get_gate_vma(current);
1908         if (gate_vma != NULL)
1909                 segs++;
1910
1911         /*
1912          * Collect all the non-memory information about the process for the
1913          * notes.  This also sets up the file header.
1914          */
1915         if (!fill_note_info(elf, segs + 1, /* including notes section */
1916                             &info, signr, regs))
1917                 goto cleanup;
1918
1919         has_dumped = 1;
1920         current->flags |= PF_DUMPCORE;
1921   
1922         fs = get_fs();
1923         set_fs(KERNEL_DS);
1924
1925         DUMP_WRITE(elf, sizeof(*elf));
1926         offset += sizeof(*elf);                         /* Elf header */
1927         offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1928         foffset = offset;
1929
1930         /* Write notes phdr entry */
1931         {
1932                 struct elf_phdr phdr;
1933                 size_t sz = get_note_info_size(&info);
1934
1935                 sz += elf_coredump_extra_notes_size();
1936
1937                 fill_elf_note_phdr(&phdr, sz, offset);
1938                 offset += sz;
1939                 DUMP_WRITE(&phdr, sizeof(phdr));
1940         }
1941
1942         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1943
1944         /*
1945          * We must use the same mm->flags while dumping core to avoid
1946          * inconsistency between the program headers and bodies, otherwise an
1947          * unusable core file can be generated.
1948          */
1949         mm_flags = current->mm->flags;
1950
1951         /* Write program headers for segments dump */
1952         for (vma = first_vma(current, gate_vma); vma != NULL;
1953                         vma = next_vma(vma, gate_vma)) {
1954                 struct elf_phdr phdr;
1955
1956                 phdr.p_type = PT_LOAD;
1957                 phdr.p_offset = offset;
1958                 phdr.p_vaddr = vma->vm_start;
1959                 phdr.p_paddr = 0;
1960                 phdr.p_filesz = vma_dump_size(vma, mm_flags);
1961                 phdr.p_memsz = vma->vm_end - vma->vm_start;
1962                 offset += phdr.p_filesz;
1963                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1964                 if (vma->vm_flags & VM_WRITE)
1965                         phdr.p_flags |= PF_W;
1966                 if (vma->vm_flags & VM_EXEC)
1967                         phdr.p_flags |= PF_X;
1968                 phdr.p_align = ELF_EXEC_PAGESIZE;
1969
1970                 DUMP_WRITE(&phdr, sizeof(phdr));
1971         }
1972
1973 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1974         ELF_CORE_WRITE_EXTRA_PHDRS;
1975 #endif
1976
1977         /* write out the notes section */
1978         if (!write_note_info(&info, file, &foffset))
1979                 goto end_coredump;
1980
1981         if (elf_coredump_extra_notes_write(file, &foffset))
1982                 goto end_coredump;
1983
1984         /* Align to page */
1985         DUMP_SEEK(dataoff - foffset);
1986
1987         for (vma = first_vma(current, gate_vma); vma != NULL;
1988                         vma = next_vma(vma, gate_vma)) {
1989                 unsigned long addr;
1990                 unsigned long end;
1991
1992                 end = vma->vm_start + vma_dump_size(vma, mm_flags);
1993
1994                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
1995                         struct page *page;
1996                         struct vm_area_struct *tmp_vma;
1997
1998                         if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1999                                                 &page, &tmp_vma) <= 0) {
2000                                 DUMP_SEEK(PAGE_SIZE);
2001                         } else {
2002                                 if (page == ZERO_PAGE(0)) {
2003                                         if (!dump_seek(file, PAGE_SIZE)) {
2004                                                 page_cache_release(page);
2005                                                 goto end_coredump;
2006                                         }
2007                                 } else {
2008                                         void *kaddr;
2009                                         flush_cache_page(tmp_vma, addr,
2010                                                          page_to_pfn(page));
2011                                         kaddr = kmap(page);
2012                                         if ((size += PAGE_SIZE) > limit ||
2013                                             !dump_write(file, kaddr,
2014                                             PAGE_SIZE)) {
2015                                                 kunmap(page);
2016                                                 page_cache_release(page);
2017                                                 goto end_coredump;
2018                                         }
2019                                         kunmap(page);
2020                                 }
2021                                 page_cache_release(page);
2022                         }
2023                 }
2024         }
2025
2026 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2027         ELF_CORE_WRITE_EXTRA_DATA;
2028 #endif
2029
2030 end_coredump:
2031         set_fs(fs);
2032
2033 cleanup:
2034         free_note_info(&info);
2035         kfree(elf);
2036 out:
2037         return has_dumped;
2038 }
2039
2040 #endif          /* USE_ELF_CORE_DUMP */
2041
2042 static int __init init_elf_binfmt(void)
2043 {
2044         return register_binfmt(&elf_format);
2045 }
2046
2047 static void __exit exit_elf_binfmt(void)
2048 {
2049         /* Remove the COFF and ELF loaders. */
2050         unregister_binfmt(&elf_format);
2051 }
2052
2053 core_initcall(init_elf_binfmt);
2054 module_exit(exit_elf_binfmt);
2055 MODULE_LICENSE("GPL");