reiserfs: convert j_flush_sem to mutex
[linux-2.6] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/fcntl.h>
25 #include <linux/ptrace.h>
26 #include <linux/slab.h>
27 #include <linux/shm.h>
28 #include <linux/personality.h>
29 #include <linux/elfcore.h>
30 #include <linux/init.h>
31 #include <linux/highuid.h>
32 #include <linux/smp.h>
33 #include <linux/compiler.h>
34 #include <linux/highmem.h>
35 #include <linux/pagemap.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/random.h>
39 #include <linux/elf.h>
40 #include <linux/utsname.h>
41 #include <asm/uaccess.h>
42 #include <asm/param.h>
43 #include <asm/page.h>
44
45 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
46 static int load_elf_library(struct file *);
47 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
48                                 int, int, unsigned long);
49
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
56 #else
57 #define elf_core_dump   NULL
58 #endif
59
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN   PAGE_SIZE
64 #endif
65
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS 0
68 #endif
69
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73
74 static struct linux_binfmt elf_format = {
75                 .module         = THIS_MODULE,
76                 .load_binary    = load_elf_binary,
77                 .load_shlib     = load_elf_library,
78                 .core_dump      = elf_core_dump,
79                 .min_coredump   = ELF_EXEC_PAGESIZE,
80                 .hasvdso        = 1
81 };
82
83 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87         start = ELF_PAGEALIGN(start);
88         end = ELF_PAGEALIGN(end);
89         if (end > start) {
90                 unsigned long addr;
91                 down_write(&current->mm->mmap_sem);
92                 addr = do_brk(start, end - start);
93                 up_write(&current->mm->mmap_sem);
94                 if (BAD_ADDR(addr))
95                         return addr;
96         }
97         current->mm->start_brk = current->mm->brk = end;
98         return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108         unsigned long nbyte;
109
110         nbyte = ELF_PAGEOFFSET(elf_bss);
111         if (nbyte) {
112                 nbyte = ELF_MIN_ALIGN - nbyte;
113                 if (clear_user((void __user *) elf_bss, nbyte))
114                         return -EFAULT;
115         }
116         return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126         old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130         (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 static int
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136                 unsigned long load_addr, unsigned long interp_load_addr)
137 {
138         unsigned long p = bprm->p;
139         int argc = bprm->argc;
140         int envc = bprm->envc;
141         elf_addr_t __user *argv;
142         elf_addr_t __user *envp;
143         elf_addr_t __user *sp;
144         elf_addr_t __user *u_platform;
145         const char *k_platform = ELF_PLATFORM;
146         int items;
147         elf_addr_t *elf_info;
148         int ei_index = 0;
149         struct task_struct *tsk = current;
150         struct vm_area_struct *vma;
151
152         /*
153          * In some cases (e.g. Hyper-Threading), we want to avoid L1
154          * evictions by the processes running on the same package. One
155          * thing we can do is to shuffle the initial stack for them.
156          */
157
158         p = arch_align_stack(p);
159
160         /*
161          * If this architecture has a platform capability string, copy it
162          * to userspace.  In some cases (Sparc), this info is impossible
163          * for userspace to get any other way, in others (i386) it is
164          * merely difficult.
165          */
166         u_platform = NULL;
167         if (k_platform) {
168                 size_t len = strlen(k_platform) + 1;
169
170                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
171                 if (__copy_to_user(u_platform, k_platform, len))
172                         return -EFAULT;
173         }
174
175         /* Create the ELF interpreter info */
176         elf_info = (elf_addr_t *)current->mm->saved_auxv;
177         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
178 #define NEW_AUX_ENT(id, val) \
179         do { \
180                 elf_info[ei_index++] = id; \
181                 elf_info[ei_index++] = val; \
182         } while (0)
183
184 #ifdef ARCH_DLINFO
185         /* 
186          * ARCH_DLINFO must come first so PPC can do its special alignment of
187          * AUXV.
188          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
189          * ARCH_DLINFO changes
190          */
191         ARCH_DLINFO;
192 #endif
193         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
194         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
195         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
196         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
197         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
198         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
199         NEW_AUX_ENT(AT_BASE, interp_load_addr);
200         NEW_AUX_ENT(AT_FLAGS, 0);
201         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
202         NEW_AUX_ENT(AT_UID, tsk->uid);
203         NEW_AUX_ENT(AT_EUID, tsk->euid);
204         NEW_AUX_ENT(AT_GID, tsk->gid);
205         NEW_AUX_ENT(AT_EGID, tsk->egid);
206         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
207         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
208         if (k_platform) {
209                 NEW_AUX_ENT(AT_PLATFORM,
210                             (elf_addr_t)(unsigned long)u_platform);
211         }
212         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
213                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
214         }
215 #undef NEW_AUX_ENT
216         /* AT_NULL is zero; clear the rest too */
217         memset(&elf_info[ei_index], 0,
218                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
219
220         /* And advance past the AT_NULL entry.  */
221         ei_index += 2;
222
223         sp = STACK_ADD(p, ei_index);
224
225         items = (argc + 1) + (envc + 1) + 1;
226         bprm->p = STACK_ROUND(sp, items);
227
228         /* Point sp at the lowest address on the stack */
229 #ifdef CONFIG_STACK_GROWSUP
230         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
231         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
232 #else
233         sp = (elf_addr_t __user *)bprm->p;
234 #endif
235
236
237         /*
238          * Grow the stack manually; some architectures have a limit on how
239          * far ahead a user-space access may be in order to grow the stack.
240          */
241         vma = find_extend_vma(current->mm, bprm->p);
242         if (!vma)
243                 return -EFAULT;
244
245         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
246         if (__put_user(argc, sp++))
247                 return -EFAULT;
248         argv = sp;
249         envp = argv + argc + 1;
250
251         /* Populate argv and envp */
252         p = current->mm->arg_end = current->mm->arg_start;
253         while (argc-- > 0) {
254                 size_t len;
255                 if (__put_user((elf_addr_t)p, argv++))
256                         return -EFAULT;
257                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
258                 if (!len || len > MAX_ARG_STRLEN)
259                         return -EINVAL;
260                 p += len;
261         }
262         if (__put_user(0, argv))
263                 return -EFAULT;
264         current->mm->arg_end = current->mm->env_start = p;
265         while (envc-- > 0) {
266                 size_t len;
267                 if (__put_user((elf_addr_t)p, envp++))
268                         return -EFAULT;
269                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
270                 if (!len || len > MAX_ARG_STRLEN)
271                         return -EINVAL;
272                 p += len;
273         }
274         if (__put_user(0, envp))
275                 return -EFAULT;
276         current->mm->env_end = p;
277
278         /* Put the elf_info on the stack in the right place.  */
279         sp = (elf_addr_t __user *)envp + 1;
280         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281                 return -EFAULT;
282         return 0;
283 }
284
285 #ifndef elf_map
286
287 static unsigned long elf_map(struct file *filep, unsigned long addr,
288                 struct elf_phdr *eppnt, int prot, int type,
289                 unsigned long total_size)
290 {
291         unsigned long map_addr;
292         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
293         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
294         addr = ELF_PAGESTART(addr);
295         size = ELF_PAGEALIGN(size);
296
297         /* mmap() will return -EINVAL if given a zero size, but a
298          * segment with zero filesize is perfectly valid */
299         if (!size)
300                 return addr;
301
302         down_write(&current->mm->mmap_sem);
303         /*
304         * total_size is the size of the ELF (interpreter) image.
305         * The _first_ mmap needs to know the full size, otherwise
306         * randomization might put this image into an overlapping
307         * position with the ELF binary image. (since size < total_size)
308         * So we first map the 'big' image - and unmap the remainder at
309         * the end. (which unmap is needed for ELF images with holes.)
310         */
311         if (total_size) {
312                 total_size = ELF_PAGEALIGN(total_size);
313                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
314                 if (!BAD_ADDR(map_addr))
315                         do_munmap(current->mm, map_addr+size, total_size-size);
316         } else
317                 map_addr = do_mmap(filep, addr, size, prot, type, off);
318
319         up_write(&current->mm->mmap_sem);
320         return(map_addr);
321 }
322
323 #endif /* !elf_map */
324
325 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
326 {
327         int i, first_idx = -1, last_idx = -1;
328
329         for (i = 0; i < nr; i++) {
330                 if (cmds[i].p_type == PT_LOAD) {
331                         last_idx = i;
332                         if (first_idx == -1)
333                                 first_idx = i;
334                 }
335         }
336         if (first_idx == -1)
337                 return 0;
338
339         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
340                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
341 }
342
343
344 /* This is much more generalized than the library routine read function,
345    so we keep this separate.  Technically the library read function
346    is only provided so that we can read a.out libraries that have
347    an ELF header */
348
349 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
350                 struct file *interpreter, unsigned long *interp_map_addr,
351                 unsigned long no_base)
352 {
353         struct elf_phdr *elf_phdata;
354         struct elf_phdr *eppnt;
355         unsigned long load_addr = 0;
356         int load_addr_set = 0;
357         unsigned long last_bss = 0, elf_bss = 0;
358         unsigned long error = ~0UL;
359         unsigned long total_size;
360         int retval, i, size;
361
362         /* First of all, some simple consistency checks */
363         if (interp_elf_ex->e_type != ET_EXEC &&
364             interp_elf_ex->e_type != ET_DYN)
365                 goto out;
366         if (!elf_check_arch(interp_elf_ex))
367                 goto out;
368         if (!interpreter->f_op || !interpreter->f_op->mmap)
369                 goto out;
370
371         /*
372          * If the size of this structure has changed, then punt, since
373          * we will be doing the wrong thing.
374          */
375         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
376                 goto out;
377         if (interp_elf_ex->e_phnum < 1 ||
378                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
379                 goto out;
380
381         /* Now read in all of the header information */
382         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
383         if (size > ELF_MIN_ALIGN)
384                 goto out;
385         elf_phdata = kmalloc(size, GFP_KERNEL);
386         if (!elf_phdata)
387                 goto out;
388
389         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
390                              (char *)elf_phdata,size);
391         error = -EIO;
392         if (retval != size) {
393                 if (retval < 0)
394                         error = retval; 
395                 goto out_close;
396         }
397
398         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
399         if (!total_size) {
400                 error = -EINVAL;
401                 goto out_close;
402         }
403
404         eppnt = elf_phdata;
405         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
406                 if (eppnt->p_type == PT_LOAD) {
407                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
408                         int elf_prot = 0;
409                         unsigned long vaddr = 0;
410                         unsigned long k, map_addr;
411
412                         if (eppnt->p_flags & PF_R)
413                                 elf_prot = PROT_READ;
414                         if (eppnt->p_flags & PF_W)
415                                 elf_prot |= PROT_WRITE;
416                         if (eppnt->p_flags & PF_X)
417                                 elf_prot |= PROT_EXEC;
418                         vaddr = eppnt->p_vaddr;
419                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
420                                 elf_type |= MAP_FIXED;
421                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
422                                 load_addr = -vaddr;
423
424                         map_addr = elf_map(interpreter, load_addr + vaddr,
425                                         eppnt, elf_prot, elf_type, total_size);
426                         total_size = 0;
427                         if (!*interp_map_addr)
428                                 *interp_map_addr = map_addr;
429                         error = map_addr;
430                         if (BAD_ADDR(map_addr))
431                                 goto out_close;
432
433                         if (!load_addr_set &&
434                             interp_elf_ex->e_type == ET_DYN) {
435                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
436                                 load_addr_set = 1;
437                         }
438
439                         /*
440                          * Check to see if the section's size will overflow the
441                          * allowed task size. Note that p_filesz must always be
442                          * <= p_memsize so it's only necessary to check p_memsz.
443                          */
444                         k = load_addr + eppnt->p_vaddr;
445                         if (BAD_ADDR(k) ||
446                             eppnt->p_filesz > eppnt->p_memsz ||
447                             eppnt->p_memsz > TASK_SIZE ||
448                             TASK_SIZE - eppnt->p_memsz < k) {
449                                 error = -ENOMEM;
450                                 goto out_close;
451                         }
452
453                         /*
454                          * Find the end of the file mapping for this phdr, and
455                          * keep track of the largest address we see for this.
456                          */
457                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
458                         if (k > elf_bss)
459                                 elf_bss = k;
460
461                         /*
462                          * Do the same thing for the memory mapping - between
463                          * elf_bss and last_bss is the bss section.
464                          */
465                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
466                         if (k > last_bss)
467                                 last_bss = k;
468                 }
469         }
470
471         /*
472          * Now fill out the bss section.  First pad the last page up
473          * to the page boundary, and then perform a mmap to make sure
474          * that there are zero-mapped pages up to and including the 
475          * last bss page.
476          */
477         if (padzero(elf_bss)) {
478                 error = -EFAULT;
479                 goto out_close;
480         }
481
482         /* What we have mapped so far */
483         elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
484
485         /* Map the last of the bss segment */
486         if (last_bss > elf_bss) {
487                 down_write(&current->mm->mmap_sem);
488                 error = do_brk(elf_bss, last_bss - elf_bss);
489                 up_write(&current->mm->mmap_sem);
490                 if (BAD_ADDR(error))
491                         goto out_close;
492         }
493
494         error = load_addr;
495
496 out_close:
497         kfree(elf_phdata);
498 out:
499         return error;
500 }
501
502 /*
503  * These are the functions used to load ELF style executables and shared
504  * libraries.  There is no binary dependent code anywhere else.
505  */
506
507 #define INTERPRETER_NONE 0
508 #define INTERPRETER_ELF 2
509
510 #ifndef STACK_RND_MASK
511 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
512 #endif
513
514 static unsigned long randomize_stack_top(unsigned long stack_top)
515 {
516         unsigned int random_variable = 0;
517
518         if ((current->flags & PF_RANDOMIZE) &&
519                 !(current->personality & ADDR_NO_RANDOMIZE)) {
520                 random_variable = get_random_int() & STACK_RND_MASK;
521                 random_variable <<= PAGE_SHIFT;
522         }
523 #ifdef CONFIG_STACK_GROWSUP
524         return PAGE_ALIGN(stack_top) + random_variable;
525 #else
526         return PAGE_ALIGN(stack_top) - random_variable;
527 #endif
528 }
529
530 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
531 {
532         struct file *interpreter = NULL; /* to shut gcc up */
533         unsigned long load_addr = 0, load_bias = 0;
534         int load_addr_set = 0;
535         char * elf_interpreter = NULL;
536         unsigned long error;
537         struct elf_phdr *elf_ppnt, *elf_phdata;
538         unsigned long elf_bss, elf_brk;
539         int elf_exec_fileno;
540         int retval, i;
541         unsigned int size;
542         unsigned long elf_entry;
543         unsigned long interp_load_addr = 0;
544         unsigned long start_code, end_code, start_data, end_data;
545         unsigned long reloc_func_desc = 0;
546         int executable_stack = EXSTACK_DEFAULT;
547         unsigned long def_flags = 0;
548         struct {
549                 struct elfhdr elf_ex;
550                 struct elfhdr interp_elf_ex;
551         } *loc;
552
553         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
554         if (!loc) {
555                 retval = -ENOMEM;
556                 goto out_ret;
557         }
558         
559         /* Get the exec-header */
560         loc->elf_ex = *((struct elfhdr *)bprm->buf);
561
562         retval = -ENOEXEC;
563         /* First of all, some simple consistency checks */
564         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
565                 goto out;
566
567         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
568                 goto out;
569         if (!elf_check_arch(&loc->elf_ex))
570                 goto out;
571         if (!bprm->file->f_op||!bprm->file->f_op->mmap)
572                 goto out;
573
574         /* Now read in all of the header information */
575         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
576                 goto out;
577         if (loc->elf_ex.e_phnum < 1 ||
578                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
579                 goto out;
580         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
581         retval = -ENOMEM;
582         elf_phdata = kmalloc(size, GFP_KERNEL);
583         if (!elf_phdata)
584                 goto out;
585
586         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
587                              (char *)elf_phdata, size);
588         if (retval != size) {
589                 if (retval >= 0)
590                         retval = -EIO;
591                 goto out_free_ph;
592         }
593
594         retval = get_unused_fd();
595         if (retval < 0)
596                 goto out_free_ph;
597         get_file(bprm->file);
598         fd_install(elf_exec_fileno = retval, bprm->file);
599
600         elf_ppnt = elf_phdata;
601         elf_bss = 0;
602         elf_brk = 0;
603
604         start_code = ~0UL;
605         end_code = 0;
606         start_data = 0;
607         end_data = 0;
608
609         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
610                 if (elf_ppnt->p_type == PT_INTERP) {
611                         /* This is the program interpreter used for
612                          * shared libraries - for now assume that this
613                          * is an a.out format binary
614                          */
615                         retval = -ENOEXEC;
616                         if (elf_ppnt->p_filesz > PATH_MAX || 
617                             elf_ppnt->p_filesz < 2)
618                                 goto out_free_file;
619
620                         retval = -ENOMEM;
621                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
622                                                   GFP_KERNEL);
623                         if (!elf_interpreter)
624                                 goto out_free_file;
625
626                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
627                                              elf_interpreter,
628                                              elf_ppnt->p_filesz);
629                         if (retval != elf_ppnt->p_filesz) {
630                                 if (retval >= 0)
631                                         retval = -EIO;
632                                 goto out_free_interp;
633                         }
634                         /* make sure path is NULL terminated */
635                         retval = -ENOEXEC;
636                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
637                                 goto out_free_interp;
638
639                         /*
640                          * The early SET_PERSONALITY here is so that the lookup
641                          * for the interpreter happens in the namespace of the 
642                          * to-be-execed image.  SET_PERSONALITY can select an
643                          * alternate root.
644                          *
645                          * However, SET_PERSONALITY is NOT allowed to switch
646                          * this task into the new images's memory mapping
647                          * policy - that is, TASK_SIZE must still evaluate to
648                          * that which is appropriate to the execing application.
649                          * This is because exit_mmap() needs to have TASK_SIZE
650                          * evaluate to the size of the old image.
651                          *
652                          * So if (say) a 64-bit application is execing a 32-bit
653                          * application it is the architecture's responsibility
654                          * to defer changing the value of TASK_SIZE until the
655                          * switch really is going to happen - do this in
656                          * flush_thread().      - akpm
657                          */
658                         SET_PERSONALITY(loc->elf_ex, 0);
659
660                         interpreter = open_exec(elf_interpreter);
661                         retval = PTR_ERR(interpreter);
662                         if (IS_ERR(interpreter))
663                                 goto out_free_interp;
664
665                         /*
666                          * If the binary is not readable then enforce
667                          * mm->dumpable = 0 regardless of the interpreter's
668                          * permissions.
669                          */
670                         if (file_permission(interpreter, MAY_READ) < 0)
671                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
672
673                         retval = kernel_read(interpreter, 0, bprm->buf,
674                                              BINPRM_BUF_SIZE);
675                         if (retval != BINPRM_BUF_SIZE) {
676                                 if (retval >= 0)
677                                         retval = -EIO;
678                                 goto out_free_dentry;
679                         }
680
681                         /* Get the exec headers */
682                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
683                         break;
684                 }
685                 elf_ppnt++;
686         }
687
688         elf_ppnt = elf_phdata;
689         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
690                 if (elf_ppnt->p_type == PT_GNU_STACK) {
691                         if (elf_ppnt->p_flags & PF_X)
692                                 executable_stack = EXSTACK_ENABLE_X;
693                         else
694                                 executable_stack = EXSTACK_DISABLE_X;
695                         break;
696                 }
697
698         /* Some simple consistency checks for the interpreter */
699         if (elf_interpreter) {
700                 retval = -ELIBBAD;
701                 /* Not an ELF interpreter */
702                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
703                         goto out_free_dentry;
704                 /* Verify the interpreter has a valid arch */
705                 if (!elf_check_arch(&loc->interp_elf_ex))
706                         goto out_free_dentry;
707         } else {
708                 /* Executables without an interpreter also need a personality  */
709                 SET_PERSONALITY(loc->elf_ex, 0);
710         }
711
712         /* Flush all traces of the currently running executable */
713         retval = flush_old_exec(bprm);
714         if (retval)
715                 goto out_free_dentry;
716
717         /* OK, This is the point of no return */
718         current->flags &= ~PF_FORKNOEXEC;
719         current->mm->def_flags = def_flags;
720
721         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
722            may depend on the personality.  */
723         SET_PERSONALITY(loc->elf_ex, 0);
724         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
725                 current->personality |= READ_IMPLIES_EXEC;
726
727         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
728                 current->flags |= PF_RANDOMIZE;
729         arch_pick_mmap_layout(current->mm);
730
731         /* Do this so that we can load the interpreter, if need be.  We will
732            change some of these later */
733         current->mm->free_area_cache = current->mm->mmap_base;
734         current->mm->cached_hole_size = 0;
735         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
736                                  executable_stack);
737         if (retval < 0) {
738                 send_sig(SIGKILL, current, 0);
739                 goto out_free_dentry;
740         }
741         
742         current->mm->start_stack = bprm->p;
743
744         /* Now we do a little grungy work by mmaping the ELF image into
745            the correct location in memory. */
746         for(i = 0, elf_ppnt = elf_phdata;
747             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
748                 int elf_prot = 0, elf_flags;
749                 unsigned long k, vaddr;
750
751                 if (elf_ppnt->p_type != PT_LOAD)
752                         continue;
753
754                 if (unlikely (elf_brk > elf_bss)) {
755                         unsigned long nbyte;
756                     
757                         /* There was a PT_LOAD segment with p_memsz > p_filesz
758                            before this one. Map anonymous pages, if needed,
759                            and clear the area.  */
760                         retval = set_brk (elf_bss + load_bias,
761                                           elf_brk + load_bias);
762                         if (retval) {
763                                 send_sig(SIGKILL, current, 0);
764                                 goto out_free_dentry;
765                         }
766                         nbyte = ELF_PAGEOFFSET(elf_bss);
767                         if (nbyte) {
768                                 nbyte = ELF_MIN_ALIGN - nbyte;
769                                 if (nbyte > elf_brk - elf_bss)
770                                         nbyte = elf_brk - elf_bss;
771                                 if (clear_user((void __user *)elf_bss +
772                                                         load_bias, nbyte)) {
773                                         /*
774                                          * This bss-zeroing can fail if the ELF
775                                          * file specifies odd protections. So
776                                          * we don't check the return value
777                                          */
778                                 }
779                         }
780                 }
781
782                 if (elf_ppnt->p_flags & PF_R)
783                         elf_prot |= PROT_READ;
784                 if (elf_ppnt->p_flags & PF_W)
785                         elf_prot |= PROT_WRITE;
786                 if (elf_ppnt->p_flags & PF_X)
787                         elf_prot |= PROT_EXEC;
788
789                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
790
791                 vaddr = elf_ppnt->p_vaddr;
792                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
793                         elf_flags |= MAP_FIXED;
794                 } else if (loc->elf_ex.e_type == ET_DYN) {
795                         /* Try and get dynamic programs out of the way of the
796                          * default mmap base, as well as whatever program they
797                          * might try to exec.  This is because the brk will
798                          * follow the loader, and is not movable.  */
799 #ifdef CONFIG_X86
800                         load_bias = 0;
801 #else
802                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
803 #endif
804                 }
805
806                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
807                                 elf_prot, elf_flags, 0);
808                 if (BAD_ADDR(error)) {
809                         send_sig(SIGKILL, current, 0);
810                         retval = IS_ERR((void *)error) ?
811                                 PTR_ERR((void*)error) : -EINVAL;
812                         goto out_free_dentry;
813                 }
814
815                 if (!load_addr_set) {
816                         load_addr_set = 1;
817                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
818                         if (loc->elf_ex.e_type == ET_DYN) {
819                                 load_bias += error -
820                                              ELF_PAGESTART(load_bias + vaddr);
821                                 load_addr += load_bias;
822                                 reloc_func_desc = load_bias;
823                         }
824                 }
825                 k = elf_ppnt->p_vaddr;
826                 if (k < start_code)
827                         start_code = k;
828                 if (start_data < k)
829                         start_data = k;
830
831                 /*
832                  * Check to see if the section's size will overflow the
833                  * allowed task size. Note that p_filesz must always be
834                  * <= p_memsz so it is only necessary to check p_memsz.
835                  */
836                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
837                     elf_ppnt->p_memsz > TASK_SIZE ||
838                     TASK_SIZE - elf_ppnt->p_memsz < k) {
839                         /* set_brk can never work. Avoid overflows. */
840                         send_sig(SIGKILL, current, 0);
841                         retval = -EINVAL;
842                         goto out_free_dentry;
843                 }
844
845                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
846
847                 if (k > elf_bss)
848                         elf_bss = k;
849                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
850                         end_code = k;
851                 if (end_data < k)
852                         end_data = k;
853                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
854                 if (k > elf_brk)
855                         elf_brk = k;
856         }
857
858         loc->elf_ex.e_entry += load_bias;
859         elf_bss += load_bias;
860         elf_brk += load_bias;
861         start_code += load_bias;
862         end_code += load_bias;
863         start_data += load_bias;
864         end_data += load_bias;
865
866         /* Calling set_brk effectively mmaps the pages that we need
867          * for the bss and break sections.  We must do this before
868          * mapping in the interpreter, to make sure it doesn't wind
869          * up getting placed where the bss needs to go.
870          */
871         retval = set_brk(elf_bss, elf_brk);
872         if (retval) {
873                 send_sig(SIGKILL, current, 0);
874                 goto out_free_dentry;
875         }
876         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
877                 send_sig(SIGSEGV, current, 0);
878                 retval = -EFAULT; /* Nobody gets to see this, but.. */
879                 goto out_free_dentry;
880         }
881
882         if (elf_interpreter) {
883                 unsigned long uninitialized_var(interp_map_addr);
884
885                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
886                                             interpreter,
887                                             &interp_map_addr,
888                                             load_bias);
889                 if (!IS_ERR((void *)elf_entry)) {
890                         /*
891                          * load_elf_interp() returns relocation
892                          * adjustment
893                          */
894                         interp_load_addr = elf_entry;
895                         elf_entry += loc->interp_elf_ex.e_entry;
896                 }
897                 if (BAD_ADDR(elf_entry)) {
898                         force_sig(SIGSEGV, current);
899                         retval = IS_ERR((void *)elf_entry) ?
900                                         (int)elf_entry : -EINVAL;
901                         goto out_free_dentry;
902                 }
903                 reloc_func_desc = interp_load_addr;
904
905                 allow_write_access(interpreter);
906                 fput(interpreter);
907                 kfree(elf_interpreter);
908         } else {
909                 elf_entry = loc->elf_ex.e_entry;
910                 if (BAD_ADDR(elf_entry)) {
911                         force_sig(SIGSEGV, current);
912                         retval = -EINVAL;
913                         goto out_free_dentry;
914                 }
915         }
916
917         kfree(elf_phdata);
918
919         sys_close(elf_exec_fileno);
920
921         set_binfmt(&elf_format);
922
923 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
924         retval = arch_setup_additional_pages(bprm, executable_stack);
925         if (retval < 0) {
926                 send_sig(SIGKILL, current, 0);
927                 goto out;
928         }
929 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
930
931         compute_creds(bprm);
932         current->flags &= ~PF_FORKNOEXEC;
933         retval = create_elf_tables(bprm, &loc->elf_ex,
934                           load_addr, interp_load_addr);
935         if (retval < 0) {
936                 send_sig(SIGKILL, current, 0);
937                 goto out;
938         }
939         /* N.B. passed_fileno might not be initialized? */
940         current->mm->end_code = end_code;
941         current->mm->start_code = start_code;
942         current->mm->start_data = start_data;
943         current->mm->end_data = end_data;
944         current->mm->start_stack = bprm->p;
945
946 #ifdef arch_randomize_brk
947         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
948                 current->mm->brk = current->mm->start_brk =
949                         arch_randomize_brk(current->mm);
950 #endif
951
952         if (current->personality & MMAP_PAGE_ZERO) {
953                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
954                    and some applications "depend" upon this behavior.
955                    Since we do not have the power to recompile these, we
956                    emulate the SVr4 behavior. Sigh. */
957                 down_write(&current->mm->mmap_sem);
958                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
959                                 MAP_FIXED | MAP_PRIVATE, 0);
960                 up_write(&current->mm->mmap_sem);
961         }
962
963 #ifdef ELF_PLAT_INIT
964         /*
965          * The ABI may specify that certain registers be set up in special
966          * ways (on i386 %edx is the address of a DT_FINI function, for
967          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
968          * that the e_entry field is the address of the function descriptor
969          * for the startup routine, rather than the address of the startup
970          * routine itself.  This macro performs whatever initialization to
971          * the regs structure is required as well as any relocations to the
972          * function descriptor entries when executing dynamically links apps.
973          */
974         ELF_PLAT_INIT(regs, reloc_func_desc);
975 #endif
976
977         start_thread(regs, elf_entry, bprm->p);
978         if (unlikely(current->ptrace & PT_PTRACED)) {
979                 if (current->ptrace & PT_TRACE_EXEC)
980                         ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
981                 else
982                         send_sig(SIGTRAP, current, 0);
983         }
984         retval = 0;
985 out:
986         kfree(loc);
987 out_ret:
988         return retval;
989
990         /* error cleanup */
991 out_free_dentry:
992         allow_write_access(interpreter);
993         if (interpreter)
994                 fput(interpreter);
995 out_free_interp:
996         kfree(elf_interpreter);
997 out_free_file:
998         sys_close(elf_exec_fileno);
999 out_free_ph:
1000         kfree(elf_phdata);
1001         goto out;
1002 }
1003
1004 /* This is really simpleminded and specialized - we are loading an
1005    a.out library that is given an ELF header. */
1006 static int load_elf_library(struct file *file)
1007 {
1008         struct elf_phdr *elf_phdata;
1009         struct elf_phdr *eppnt;
1010         unsigned long elf_bss, bss, len;
1011         int retval, error, i, j;
1012         struct elfhdr elf_ex;
1013
1014         error = -ENOEXEC;
1015         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1016         if (retval != sizeof(elf_ex))
1017                 goto out;
1018
1019         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1020                 goto out;
1021
1022         /* First of all, some simple consistency checks */
1023         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1024             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1025                 goto out;
1026
1027         /* Now read in all of the header information */
1028
1029         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1030         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1031
1032         error = -ENOMEM;
1033         elf_phdata = kmalloc(j, GFP_KERNEL);
1034         if (!elf_phdata)
1035                 goto out;
1036
1037         eppnt = elf_phdata;
1038         error = -ENOEXEC;
1039         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1040         if (retval != j)
1041                 goto out_free_ph;
1042
1043         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1044                 if ((eppnt + i)->p_type == PT_LOAD)
1045                         j++;
1046         if (j != 1)
1047                 goto out_free_ph;
1048
1049         while (eppnt->p_type != PT_LOAD)
1050                 eppnt++;
1051
1052         /* Now use mmap to map the library into memory. */
1053         down_write(&current->mm->mmap_sem);
1054         error = do_mmap(file,
1055                         ELF_PAGESTART(eppnt->p_vaddr),
1056                         (eppnt->p_filesz +
1057                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1058                         PROT_READ | PROT_WRITE | PROT_EXEC,
1059                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1060                         (eppnt->p_offset -
1061                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1062         up_write(&current->mm->mmap_sem);
1063         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1064                 goto out_free_ph;
1065
1066         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1067         if (padzero(elf_bss)) {
1068                 error = -EFAULT;
1069                 goto out_free_ph;
1070         }
1071
1072         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1073                             ELF_MIN_ALIGN - 1);
1074         bss = eppnt->p_memsz + eppnt->p_vaddr;
1075         if (bss > len) {
1076                 down_write(&current->mm->mmap_sem);
1077                 do_brk(len, bss - len);
1078                 up_write(&current->mm->mmap_sem);
1079         }
1080         error = 0;
1081
1082 out_free_ph:
1083         kfree(elf_phdata);
1084 out:
1085         return error;
1086 }
1087
1088 /*
1089  * Note that some platforms still use traditional core dumps and not
1090  * the ELF core dump.  Each platform can select it as appropriate.
1091  */
1092 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1093
1094 /*
1095  * ELF core dumper
1096  *
1097  * Modelled on fs/exec.c:aout_core_dump()
1098  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1099  */
1100 /*
1101  * These are the only things you should do on a core-file: use only these
1102  * functions to write out all the necessary info.
1103  */
1104 static int dump_write(struct file *file, const void *addr, int nr)
1105 {
1106         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1107 }
1108
1109 static int dump_seek(struct file *file, loff_t off)
1110 {
1111         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1112                 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1113                         return 0;
1114         } else {
1115                 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1116                 if (!buf)
1117                         return 0;
1118                 while (off > 0) {
1119                         unsigned long n = off;
1120                         if (n > PAGE_SIZE)
1121                                 n = PAGE_SIZE;
1122                         if (!dump_write(file, buf, n))
1123                                 return 0;
1124                         off -= n;
1125                 }
1126                 free_page((unsigned long)buf);
1127         }
1128         return 1;
1129 }
1130
1131 /*
1132  * Decide what to dump of a segment, part, all or none.
1133  */
1134 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1135                                    unsigned long mm_flags)
1136 {
1137         /* The vma can be set up to tell us the answer directly.  */
1138         if (vma->vm_flags & VM_ALWAYSDUMP)
1139                 goto whole;
1140
1141         /* Do not dump I/O mapped devices or special mappings */
1142         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1143                 return 0;
1144
1145 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1146
1147         /* By default, dump shared memory if mapped from an anonymous file. */
1148         if (vma->vm_flags & VM_SHARED) {
1149                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1150                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1151                         goto whole;
1152                 return 0;
1153         }
1154
1155         /* Dump segments that have been written to.  */
1156         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1157                 goto whole;
1158         if (vma->vm_file == NULL)
1159                 return 0;
1160
1161         if (FILTER(MAPPED_PRIVATE))
1162                 goto whole;
1163
1164         /*
1165          * If this looks like the beginning of a DSO or executable mapping,
1166          * check for an ELF header.  If we find one, dump the first page to
1167          * aid in determining what was mapped here.
1168          */
1169         if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1170                 u32 __user *header = (u32 __user *) vma->vm_start;
1171                 u32 word;
1172                 /*
1173                  * Doing it this way gets the constant folded by GCC.
1174                  */
1175                 union {
1176                         u32 cmp;
1177                         char elfmag[SELFMAG];
1178                 } magic;
1179                 BUILD_BUG_ON(SELFMAG != sizeof word);
1180                 magic.elfmag[EI_MAG0] = ELFMAG0;
1181                 magic.elfmag[EI_MAG1] = ELFMAG1;
1182                 magic.elfmag[EI_MAG2] = ELFMAG2;
1183                 magic.elfmag[EI_MAG3] = ELFMAG3;
1184                 if (get_user(word, header) == 0 && word == magic.cmp)
1185                         return PAGE_SIZE;
1186         }
1187
1188 #undef  FILTER
1189
1190         return 0;
1191
1192 whole:
1193         return vma->vm_end - vma->vm_start;
1194 }
1195
1196 /* An ELF note in memory */
1197 struct memelfnote
1198 {
1199         const char *name;
1200         int type;
1201         unsigned int datasz;
1202         void *data;
1203 };
1204
1205 static int notesize(struct memelfnote *en)
1206 {
1207         int sz;
1208
1209         sz = sizeof(struct elf_note);
1210         sz += roundup(strlen(en->name) + 1, 4);
1211         sz += roundup(en->datasz, 4);
1212
1213         return sz;
1214 }
1215
1216 #define DUMP_WRITE(addr, nr, foffset)   \
1217         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1218
1219 static int alignfile(struct file *file, loff_t *foffset)
1220 {
1221         static const char buf[4] = { 0, };
1222         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1223         return 1;
1224 }
1225
1226 static int writenote(struct memelfnote *men, struct file *file,
1227                         loff_t *foffset)
1228 {
1229         struct elf_note en;
1230         en.n_namesz = strlen(men->name) + 1;
1231         en.n_descsz = men->datasz;
1232         en.n_type = men->type;
1233
1234         DUMP_WRITE(&en, sizeof(en), foffset);
1235         DUMP_WRITE(men->name, en.n_namesz, foffset);
1236         if (!alignfile(file, foffset))
1237                 return 0;
1238         DUMP_WRITE(men->data, men->datasz, foffset);
1239         if (!alignfile(file, foffset))
1240                 return 0;
1241
1242         return 1;
1243 }
1244 #undef DUMP_WRITE
1245
1246 #define DUMP_WRITE(addr, nr)    \
1247         if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1248                 goto end_coredump;
1249 #define DUMP_SEEK(off)  \
1250         if (!dump_seek(file, (off))) \
1251                 goto end_coredump;
1252
1253 static void fill_elf_header(struct elfhdr *elf, int segs,
1254                             u16 machine, u32 flags, u8 osabi)
1255 {
1256         memset(elf, 0, sizeof(*elf));
1257
1258         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1259         elf->e_ident[EI_CLASS] = ELF_CLASS;
1260         elf->e_ident[EI_DATA] = ELF_DATA;
1261         elf->e_ident[EI_VERSION] = EV_CURRENT;
1262         elf->e_ident[EI_OSABI] = ELF_OSABI;
1263
1264         elf->e_type = ET_CORE;
1265         elf->e_machine = machine;
1266         elf->e_version = EV_CURRENT;
1267         elf->e_phoff = sizeof(struct elfhdr);
1268         elf->e_flags = flags;
1269         elf->e_ehsize = sizeof(struct elfhdr);
1270         elf->e_phentsize = sizeof(struct elf_phdr);
1271         elf->e_phnum = segs;
1272
1273         return;
1274 }
1275
1276 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1277 {
1278         phdr->p_type = PT_NOTE;
1279         phdr->p_offset = offset;
1280         phdr->p_vaddr = 0;
1281         phdr->p_paddr = 0;
1282         phdr->p_filesz = sz;
1283         phdr->p_memsz = 0;
1284         phdr->p_flags = 0;
1285         phdr->p_align = 0;
1286         return;
1287 }
1288
1289 static void fill_note(struct memelfnote *note, const char *name, int type, 
1290                 unsigned int sz, void *data)
1291 {
1292         note->name = name;
1293         note->type = type;
1294         note->datasz = sz;
1295         note->data = data;
1296         return;
1297 }
1298
1299 /*
1300  * fill up all the fields in prstatus from the given task struct, except
1301  * registers which need to be filled up separately.
1302  */
1303 static void fill_prstatus(struct elf_prstatus *prstatus,
1304                 struct task_struct *p, long signr)
1305 {
1306         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1307         prstatus->pr_sigpend = p->pending.signal.sig[0];
1308         prstatus->pr_sighold = p->blocked.sig[0];
1309         prstatus->pr_pid = task_pid_vnr(p);
1310         prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1311         prstatus->pr_pgrp = task_pgrp_vnr(p);
1312         prstatus->pr_sid = task_session_vnr(p);
1313         if (thread_group_leader(p)) {
1314                 /*
1315                  * This is the record for the group leader.  Add in the
1316                  * cumulative times of previous dead threads.  This total
1317                  * won't include the time of each live thread whose state
1318                  * is included in the core dump.  The final total reported
1319                  * to our parent process when it calls wait4 will include
1320                  * those sums as well as the little bit more time it takes
1321                  * this and each other thread to finish dying after the
1322                  * core dump synchronization phase.
1323                  */
1324                 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1325                                    &prstatus->pr_utime);
1326                 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1327                                    &prstatus->pr_stime);
1328         } else {
1329                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1330                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1331         }
1332         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1333         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1334 }
1335
1336 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1337                        struct mm_struct *mm)
1338 {
1339         unsigned int i, len;
1340         
1341         /* first copy the parameters from user space */
1342         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1343
1344         len = mm->arg_end - mm->arg_start;
1345         if (len >= ELF_PRARGSZ)
1346                 len = ELF_PRARGSZ-1;
1347         if (copy_from_user(&psinfo->pr_psargs,
1348                            (const char __user *)mm->arg_start, len))
1349                 return -EFAULT;
1350         for(i = 0; i < len; i++)
1351                 if (psinfo->pr_psargs[i] == 0)
1352                         psinfo->pr_psargs[i] = ' ';
1353         psinfo->pr_psargs[len] = 0;
1354
1355         psinfo->pr_pid = task_pid_vnr(p);
1356         psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1357         psinfo->pr_pgrp = task_pgrp_vnr(p);
1358         psinfo->pr_sid = task_session_vnr(p);
1359
1360         i = p->state ? ffz(~p->state) + 1 : 0;
1361         psinfo->pr_state = i;
1362         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1363         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1364         psinfo->pr_nice = task_nice(p);
1365         psinfo->pr_flag = p->flags;
1366         SET_UID(psinfo->pr_uid, p->uid);
1367         SET_GID(psinfo->pr_gid, p->gid);
1368         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1369         
1370         return 0;
1371 }
1372
1373 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1374 {
1375         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1376         int i = 0;
1377         do
1378                 i += 2;
1379         while (auxv[i - 2] != AT_NULL);
1380         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1381 }
1382
1383 #ifdef CORE_DUMP_USE_REGSET
1384 #include <linux/regset.h>
1385
1386 struct elf_thread_core_info {
1387         struct elf_thread_core_info *next;
1388         struct task_struct *task;
1389         struct elf_prstatus prstatus;
1390         struct memelfnote notes[0];
1391 };
1392
1393 struct elf_note_info {
1394         struct elf_thread_core_info *thread;
1395         struct memelfnote psinfo;
1396         struct memelfnote auxv;
1397         size_t size;
1398         int thread_notes;
1399 };
1400
1401 /*
1402  * When a regset has a writeback hook, we call it on each thread before
1403  * dumping user memory.  On register window machines, this makes sure the
1404  * user memory backing the register data is up to date before we read it.
1405  */
1406 static void do_thread_regset_writeback(struct task_struct *task,
1407                                        const struct user_regset *regset)
1408 {
1409         if (regset->writeback)
1410                 regset->writeback(task, regset, 1);
1411 }
1412
1413 static int fill_thread_core_info(struct elf_thread_core_info *t,
1414                                  const struct user_regset_view *view,
1415                                  long signr, size_t *total)
1416 {
1417         unsigned int i;
1418
1419         /*
1420          * NT_PRSTATUS is the one special case, because the regset data
1421          * goes into the pr_reg field inside the note contents, rather
1422          * than being the whole note contents.  We fill the reset in here.
1423          * We assume that regset 0 is NT_PRSTATUS.
1424          */
1425         fill_prstatus(&t->prstatus, t->task, signr);
1426         (void) view->regsets[0].get(t->task, &view->regsets[0],
1427                                     0, sizeof(t->prstatus.pr_reg),
1428                                     &t->prstatus.pr_reg, NULL);
1429
1430         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1431                   sizeof(t->prstatus), &t->prstatus);
1432         *total += notesize(&t->notes[0]);
1433
1434         do_thread_regset_writeback(t->task, &view->regsets[0]);
1435
1436         /*
1437          * Each other regset might generate a note too.  For each regset
1438          * that has no core_note_type or is inactive, we leave t->notes[i]
1439          * all zero and we'll know to skip writing it later.
1440          */
1441         for (i = 1; i < view->n; ++i) {
1442                 const struct user_regset *regset = &view->regsets[i];
1443                 do_thread_regset_writeback(t->task, regset);
1444                 if (regset->core_note_type &&
1445                     (!regset->active || regset->active(t->task, regset))) {
1446                         int ret;
1447                         size_t size = regset->n * regset->size;
1448                         void *data = kmalloc(size, GFP_KERNEL);
1449                         if (unlikely(!data))
1450                                 return 0;
1451                         ret = regset->get(t->task, regset,
1452                                           0, size, data, NULL);
1453                         if (unlikely(ret))
1454                                 kfree(data);
1455                         else {
1456                                 if (regset->core_note_type != NT_PRFPREG)
1457                                         fill_note(&t->notes[i], "LINUX",
1458                                                   regset->core_note_type,
1459                                                   size, data);
1460                                 else {
1461                                         t->prstatus.pr_fpvalid = 1;
1462                                         fill_note(&t->notes[i], "CORE",
1463                                                   NT_PRFPREG, size, data);
1464                                 }
1465                                 *total += notesize(&t->notes[i]);
1466                         }
1467                 }
1468         }
1469
1470         return 1;
1471 }
1472
1473 static int fill_note_info(struct elfhdr *elf, int phdrs,
1474                           struct elf_note_info *info,
1475                           long signr, struct pt_regs *regs)
1476 {
1477         struct task_struct *dump_task = current;
1478         const struct user_regset_view *view = task_user_regset_view(dump_task);
1479         struct elf_thread_core_info *t;
1480         struct elf_prpsinfo *psinfo;
1481         struct task_struct *g, *p;
1482         unsigned int i;
1483
1484         info->size = 0;
1485         info->thread = NULL;
1486
1487         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1488         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1489
1490         if (psinfo == NULL)
1491                 return 0;
1492
1493         /*
1494          * Figure out how many notes we're going to need for each thread.
1495          */
1496         info->thread_notes = 0;
1497         for (i = 0; i < view->n; ++i)
1498                 if (view->regsets[i].core_note_type != 0)
1499                         ++info->thread_notes;
1500
1501         /*
1502          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1503          * since it is our one special case.
1504          */
1505         if (unlikely(info->thread_notes == 0) ||
1506             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1507                 WARN_ON(1);
1508                 return 0;
1509         }
1510
1511         /*
1512          * Initialize the ELF file header.
1513          */
1514         fill_elf_header(elf, phdrs,
1515                         view->e_machine, view->e_flags, view->ei_osabi);
1516
1517         /*
1518          * Allocate a structure for each thread.
1519          */
1520         rcu_read_lock();
1521         do_each_thread(g, p)
1522                 if (p->mm == dump_task->mm) {
1523                         t = kzalloc(offsetof(struct elf_thread_core_info,
1524                                              notes[info->thread_notes]),
1525                                     GFP_ATOMIC);
1526                         if (unlikely(!t)) {
1527                                 rcu_read_unlock();
1528                                 return 0;
1529                         }
1530                         t->task = p;
1531                         if (p == dump_task || !info->thread) {
1532                                 t->next = info->thread;
1533                                 info->thread = t;
1534                         } else {
1535                                 /*
1536                                  * Make sure to keep the original task at
1537                                  * the head of the list.
1538                                  */
1539                                 t->next = info->thread->next;
1540                                 info->thread->next = t;
1541                         }
1542                 }
1543         while_each_thread(g, p);
1544         rcu_read_unlock();
1545
1546         /*
1547          * Now fill in each thread's information.
1548          */
1549         for (t = info->thread; t != NULL; t = t->next)
1550                 if (!fill_thread_core_info(t, view, signr, &info->size))
1551                         return 0;
1552
1553         /*
1554          * Fill in the two process-wide notes.
1555          */
1556         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1557         info->size += notesize(&info->psinfo);
1558
1559         fill_auxv_note(&info->auxv, current->mm);
1560         info->size += notesize(&info->auxv);
1561
1562         return 1;
1563 }
1564
1565 static size_t get_note_info_size(struct elf_note_info *info)
1566 {
1567         return info->size;
1568 }
1569
1570 /*
1571  * Write all the notes for each thread.  When writing the first thread, the
1572  * process-wide notes are interleaved after the first thread-specific note.
1573  */
1574 static int write_note_info(struct elf_note_info *info,
1575                            struct file *file, loff_t *foffset)
1576 {
1577         bool first = 1;
1578         struct elf_thread_core_info *t = info->thread;
1579
1580         do {
1581                 int i;
1582
1583                 if (!writenote(&t->notes[0], file, foffset))
1584                         return 0;
1585
1586                 if (first && !writenote(&info->psinfo, file, foffset))
1587                         return 0;
1588                 if (first && !writenote(&info->auxv, file, foffset))
1589                         return 0;
1590
1591                 for (i = 1; i < info->thread_notes; ++i)
1592                         if (t->notes[i].data &&
1593                             !writenote(&t->notes[i], file, foffset))
1594                                 return 0;
1595
1596                 first = 0;
1597                 t = t->next;
1598         } while (t);
1599
1600         return 1;
1601 }
1602
1603 static void free_note_info(struct elf_note_info *info)
1604 {
1605         struct elf_thread_core_info *threads = info->thread;
1606         while (threads) {
1607                 unsigned int i;
1608                 struct elf_thread_core_info *t = threads;
1609                 threads = t->next;
1610                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1611                 for (i = 1; i < info->thread_notes; ++i)
1612                         kfree(t->notes[i].data);
1613                 kfree(t);
1614         }
1615         kfree(info->psinfo.data);
1616 }
1617
1618 #else
1619
1620 /* Here is the structure in which status of each thread is captured. */
1621 struct elf_thread_status
1622 {
1623         struct list_head list;
1624         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1625         elf_fpregset_t fpu;             /* NT_PRFPREG */
1626         struct task_struct *thread;
1627 #ifdef ELF_CORE_COPY_XFPREGS
1628         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1629 #endif
1630         struct memelfnote notes[3];
1631         int num_notes;
1632 };
1633
1634 /*
1635  * In order to add the specific thread information for the elf file format,
1636  * we need to keep a linked list of every threads pr_status and then create
1637  * a single section for them in the final core file.
1638  */
1639 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1640 {
1641         int sz = 0;
1642         struct task_struct *p = t->thread;
1643         t->num_notes = 0;
1644
1645         fill_prstatus(&t->prstatus, p, signr);
1646         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1647         
1648         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1649                   &(t->prstatus));
1650         t->num_notes++;
1651         sz += notesize(&t->notes[0]);
1652
1653         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1654                                                                 &t->fpu))) {
1655                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1656                           &(t->fpu));
1657                 t->num_notes++;
1658                 sz += notesize(&t->notes[1]);
1659         }
1660
1661 #ifdef ELF_CORE_COPY_XFPREGS
1662         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1663                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1664                           sizeof(t->xfpu), &t->xfpu);
1665                 t->num_notes++;
1666                 sz += notesize(&t->notes[2]);
1667         }
1668 #endif  
1669         return sz;
1670 }
1671
1672 struct elf_note_info {
1673         struct memelfnote *notes;
1674         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1675         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1676         struct list_head thread_list;
1677         elf_fpregset_t *fpu;
1678 #ifdef ELF_CORE_COPY_XFPREGS
1679         elf_fpxregset_t *xfpu;
1680 #endif
1681         int thread_status_size;
1682         int numnote;
1683 };
1684
1685 static int fill_note_info(struct elfhdr *elf, int phdrs,
1686                           struct elf_note_info *info,
1687                           long signr, struct pt_regs *regs)
1688 {
1689 #define NUM_NOTES       6
1690         struct list_head *t;
1691         struct task_struct *g, *p;
1692
1693         info->notes = NULL;
1694         info->prstatus = NULL;
1695         info->psinfo = NULL;
1696         info->fpu = NULL;
1697 #ifdef ELF_CORE_COPY_XFPREGS
1698         info->xfpu = NULL;
1699 #endif
1700         INIT_LIST_HEAD(&info->thread_list);
1701
1702         info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1703                               GFP_KERNEL);
1704         if (!info->notes)
1705                 return 0;
1706         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1707         if (!info->psinfo)
1708                 return 0;
1709         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1710         if (!info->prstatus)
1711                 return 0;
1712         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1713         if (!info->fpu)
1714                 return 0;
1715 #ifdef ELF_CORE_COPY_XFPREGS
1716         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1717         if (!info->xfpu)
1718                 return 0;
1719 #endif
1720
1721         info->thread_status_size = 0;
1722         if (signr) {
1723                 struct elf_thread_status *ets;
1724                 rcu_read_lock();
1725                 do_each_thread(g, p)
1726                         if (current->mm == p->mm && current != p) {
1727                                 ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
1728                                 if (!ets) {
1729                                         rcu_read_unlock();
1730                                         return 0;
1731                                 }
1732                                 ets->thread = p;
1733                                 list_add(&ets->list, &info->thread_list);
1734                         }
1735                 while_each_thread(g, p);
1736                 rcu_read_unlock();
1737                 list_for_each(t, &info->thread_list) {
1738                         int sz;
1739
1740                         ets = list_entry(t, struct elf_thread_status, list);
1741                         sz = elf_dump_thread_status(signr, ets);
1742                         info->thread_status_size += sz;
1743                 }
1744         }
1745         /* now collect the dump for the current */
1746         memset(info->prstatus, 0, sizeof(*info->prstatus));
1747         fill_prstatus(info->prstatus, current, signr);
1748         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1749
1750         /* Set up header */
1751         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1752
1753         /*
1754          * Set up the notes in similar form to SVR4 core dumps made
1755          * with info from their /proc.
1756          */
1757
1758         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1759                   sizeof(*info->prstatus), info->prstatus);
1760         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1761         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1762                   sizeof(*info->psinfo), info->psinfo);
1763
1764         info->numnote = 2;
1765
1766         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1767
1768         /* Try to dump the FPU. */
1769         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1770                                                                info->fpu);
1771         if (info->prstatus->pr_fpvalid)
1772                 fill_note(info->notes + info->numnote++,
1773                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1774 #ifdef ELF_CORE_COPY_XFPREGS
1775         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1776                 fill_note(info->notes + info->numnote++,
1777                           "LINUX", ELF_CORE_XFPREG_TYPE,
1778                           sizeof(*info->xfpu), info->xfpu);
1779 #endif
1780
1781         return 1;
1782
1783 #undef NUM_NOTES
1784 }
1785
1786 static size_t get_note_info_size(struct elf_note_info *info)
1787 {
1788         int sz = 0;
1789         int i;
1790
1791         for (i = 0; i < info->numnote; i++)
1792                 sz += notesize(info->notes + i);
1793
1794         sz += info->thread_status_size;
1795
1796         return sz;
1797 }
1798
1799 static int write_note_info(struct elf_note_info *info,
1800                            struct file *file, loff_t *foffset)
1801 {
1802         int i;
1803         struct list_head *t;
1804
1805         for (i = 0; i < info->numnote; i++)
1806                 if (!writenote(info->notes + i, file, foffset))
1807                         return 0;
1808
1809         /* write out the thread status notes section */
1810         list_for_each(t, &info->thread_list) {
1811                 struct elf_thread_status *tmp =
1812                                 list_entry(t, struct elf_thread_status, list);
1813
1814                 for (i = 0; i < tmp->num_notes; i++)
1815                         if (!writenote(&tmp->notes[i], file, foffset))
1816                                 return 0;
1817         }
1818
1819         return 1;
1820 }
1821
1822 static void free_note_info(struct elf_note_info *info)
1823 {
1824         while (!list_empty(&info->thread_list)) {
1825                 struct list_head *tmp = info->thread_list.next;
1826                 list_del(tmp);
1827                 kfree(list_entry(tmp, struct elf_thread_status, list));
1828         }
1829
1830         kfree(info->prstatus);
1831         kfree(info->psinfo);
1832         kfree(info->notes);
1833         kfree(info->fpu);
1834 #ifdef ELF_CORE_COPY_XFPREGS
1835         kfree(info->xfpu);
1836 #endif
1837 }
1838
1839 #endif
1840
1841 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1842                                         struct vm_area_struct *gate_vma)
1843 {
1844         struct vm_area_struct *ret = tsk->mm->mmap;
1845
1846         if (ret)
1847                 return ret;
1848         return gate_vma;
1849 }
1850 /*
1851  * Helper function for iterating across a vma list.  It ensures that the caller
1852  * will visit `gate_vma' prior to terminating the search.
1853  */
1854 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1855                                         struct vm_area_struct *gate_vma)
1856 {
1857         struct vm_area_struct *ret;
1858
1859         ret = this_vma->vm_next;
1860         if (ret)
1861                 return ret;
1862         if (this_vma == gate_vma)
1863                 return NULL;
1864         return gate_vma;
1865 }
1866
1867 /*
1868  * Actual dumper
1869  *
1870  * This is a two-pass process; first we find the offsets of the bits,
1871  * and then they are actually written out.  If we run out of core limit
1872  * we just truncate.
1873  */
1874 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1875 {
1876         int has_dumped = 0;
1877         mm_segment_t fs;
1878         int segs;
1879         size_t size = 0;
1880         struct vm_area_struct *vma, *gate_vma;
1881         struct elfhdr *elf = NULL;
1882         loff_t offset = 0, dataoff, foffset;
1883         unsigned long mm_flags;
1884         struct elf_note_info info;
1885
1886         /*
1887          * We no longer stop all VM operations.
1888          * 
1889          * This is because those proceses that could possibly change map_count
1890          * or the mmap / vma pages are now blocked in do_exit on current
1891          * finishing this core dump.
1892          *
1893          * Only ptrace can touch these memory addresses, but it doesn't change
1894          * the map_count or the pages allocated. So no possibility of crashing
1895          * exists while dumping the mm->vm_next areas to the core file.
1896          */
1897   
1898         /* alloc memory for large data structures: too large to be on stack */
1899         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1900         if (!elf)
1901                 goto out;
1902         
1903         segs = current->mm->map_count;
1904 #ifdef ELF_CORE_EXTRA_PHDRS
1905         segs += ELF_CORE_EXTRA_PHDRS;
1906 #endif
1907
1908         gate_vma = get_gate_vma(current);
1909         if (gate_vma != NULL)
1910                 segs++;
1911
1912         /*
1913          * Collect all the non-memory information about the process for the
1914          * notes.  This also sets up the file header.
1915          */
1916         if (!fill_note_info(elf, segs + 1, /* including notes section */
1917                             &info, signr, regs))
1918                 goto cleanup;
1919
1920         has_dumped = 1;
1921         current->flags |= PF_DUMPCORE;
1922   
1923         fs = get_fs();
1924         set_fs(KERNEL_DS);
1925
1926         DUMP_WRITE(elf, sizeof(*elf));
1927         offset += sizeof(*elf);                         /* Elf header */
1928         offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1929         foffset = offset;
1930
1931         /* Write notes phdr entry */
1932         {
1933                 struct elf_phdr phdr;
1934                 size_t sz = get_note_info_size(&info);
1935
1936                 sz += elf_coredump_extra_notes_size();
1937
1938                 fill_elf_note_phdr(&phdr, sz, offset);
1939                 offset += sz;
1940                 DUMP_WRITE(&phdr, sizeof(phdr));
1941         }
1942
1943         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1944
1945         /*
1946          * We must use the same mm->flags while dumping core to avoid
1947          * inconsistency between the program headers and bodies, otherwise an
1948          * unusable core file can be generated.
1949          */
1950         mm_flags = current->mm->flags;
1951
1952         /* Write program headers for segments dump */
1953         for (vma = first_vma(current, gate_vma); vma != NULL;
1954                         vma = next_vma(vma, gate_vma)) {
1955                 struct elf_phdr phdr;
1956
1957                 phdr.p_type = PT_LOAD;
1958                 phdr.p_offset = offset;
1959                 phdr.p_vaddr = vma->vm_start;
1960                 phdr.p_paddr = 0;
1961                 phdr.p_filesz = vma_dump_size(vma, mm_flags);
1962                 phdr.p_memsz = vma->vm_end - vma->vm_start;
1963                 offset += phdr.p_filesz;
1964                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1965                 if (vma->vm_flags & VM_WRITE)
1966                         phdr.p_flags |= PF_W;
1967                 if (vma->vm_flags & VM_EXEC)
1968                         phdr.p_flags |= PF_X;
1969                 phdr.p_align = ELF_EXEC_PAGESIZE;
1970
1971                 DUMP_WRITE(&phdr, sizeof(phdr));
1972         }
1973
1974 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1975         ELF_CORE_WRITE_EXTRA_PHDRS;
1976 #endif
1977
1978         /* write out the notes section */
1979         if (!write_note_info(&info, file, &foffset))
1980                 goto end_coredump;
1981
1982         if (elf_coredump_extra_notes_write(file, &foffset))
1983                 goto end_coredump;
1984
1985         /* Align to page */
1986         DUMP_SEEK(dataoff - foffset);
1987
1988         for (vma = first_vma(current, gate_vma); vma != NULL;
1989                         vma = next_vma(vma, gate_vma)) {
1990                 unsigned long addr;
1991                 unsigned long end;
1992
1993                 end = vma->vm_start + vma_dump_size(vma, mm_flags);
1994
1995                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
1996                         struct page *page;
1997                         struct vm_area_struct *tmp_vma;
1998
1999                         if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2000                                                 &page, &tmp_vma) <= 0) {
2001                                 DUMP_SEEK(PAGE_SIZE);
2002                         } else {
2003                                 if (page == ZERO_PAGE(0)) {
2004                                         if (!dump_seek(file, PAGE_SIZE)) {
2005                                                 page_cache_release(page);
2006                                                 goto end_coredump;
2007                                         }
2008                                 } else {
2009                                         void *kaddr;
2010                                         flush_cache_page(tmp_vma, addr,
2011                                                          page_to_pfn(page));
2012                                         kaddr = kmap(page);
2013                                         if ((size += PAGE_SIZE) > limit ||
2014                                             !dump_write(file, kaddr,
2015                                             PAGE_SIZE)) {
2016                                                 kunmap(page);
2017                                                 page_cache_release(page);
2018                                                 goto end_coredump;
2019                                         }
2020                                         kunmap(page);
2021                                 }
2022                                 page_cache_release(page);
2023                         }
2024                 }
2025         }
2026
2027 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2028         ELF_CORE_WRITE_EXTRA_DATA;
2029 #endif
2030
2031 end_coredump:
2032         set_fs(fs);
2033
2034 cleanup:
2035         free_note_info(&info);
2036         kfree(elf);
2037 out:
2038         return has_dumped;
2039 }
2040
2041 #endif          /* USE_ELF_CORE_DUMP */
2042
2043 static int __init init_elf_binfmt(void)
2044 {
2045         return register_binfmt(&elf_format);
2046 }
2047
2048 static void __exit exit_elf_binfmt(void)
2049 {
2050         /* Remove the COFF and ELF loaders. */
2051         unregister_binfmt(&elf_format);
2052 }
2053
2054 core_initcall(init_elf_binfmt);
2055 module_exit(exit_elf_binfmt);
2056 MODULE_LICENSE("GPL");