Merge commit 'v2.6.30-rc1' into core/urgent
[linux-2.6] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <asm/uaccess.h>
35 #include <asm/param.h>
36 #include <asm/page.h>
37
38 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
39 static int load_elf_library(struct file *);
40 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
41                                 int, int, unsigned long);
42
43 /*
44  * If we don't support core dumping, then supply a NULL so we
45  * don't even try.
46  */
47 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
48 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
49 #else
50 #define elf_core_dump   NULL
51 #endif
52
53 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
54 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
55 #else
56 #define ELF_MIN_ALIGN   PAGE_SIZE
57 #endif
58
59 #ifndef ELF_CORE_EFLAGS
60 #define ELF_CORE_EFLAGS 0
61 #endif
62
63 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
64 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
65 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
66
67 static struct linux_binfmt elf_format = {
68                 .module         = THIS_MODULE,
69                 .load_binary    = load_elf_binary,
70                 .load_shlib     = load_elf_library,
71                 .core_dump      = elf_core_dump,
72                 .min_coredump   = ELF_EXEC_PAGESIZE,
73                 .hasvdso        = 1
74 };
75
76 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
77
78 static int set_brk(unsigned long start, unsigned long end)
79 {
80         start = ELF_PAGEALIGN(start);
81         end = ELF_PAGEALIGN(end);
82         if (end > start) {
83                 unsigned long addr;
84                 down_write(&current->mm->mmap_sem);
85                 addr = do_brk(start, end - start);
86                 up_write(&current->mm->mmap_sem);
87                 if (BAD_ADDR(addr))
88                         return addr;
89         }
90         current->mm->start_brk = current->mm->brk = end;
91         return 0;
92 }
93
94 /* We need to explicitly zero any fractional pages
95    after the data section (i.e. bss).  This would
96    contain the junk from the file that should not
97    be in memory
98  */
99 static int padzero(unsigned long elf_bss)
100 {
101         unsigned long nbyte;
102
103         nbyte = ELF_PAGEOFFSET(elf_bss);
104         if (nbyte) {
105                 nbyte = ELF_MIN_ALIGN - nbyte;
106                 if (clear_user((void __user *) elf_bss, nbyte))
107                         return -EFAULT;
108         }
109         return 0;
110 }
111
112 /* Let's use some macros to make this stack manipulation a little clearer */
113 #ifdef CONFIG_STACK_GROWSUP
114 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
115 #define STACK_ROUND(sp, items) \
116         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
117 #define STACK_ALLOC(sp, len) ({ \
118         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
119         old_sp; })
120 #else
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
122 #define STACK_ROUND(sp, items) \
123         (((unsigned long) (sp - items)) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
125 #endif
126
127 #ifndef ELF_BASE_PLATFORM
128 /*
129  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
130  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
131  * will be copied to the user stack in the same manner as AT_PLATFORM.
132  */
133 #define ELF_BASE_PLATFORM NULL
134 #endif
135
136 static int
137 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
138                 unsigned long load_addr, unsigned long interp_load_addr)
139 {
140         unsigned long p = bprm->p;
141         int argc = bprm->argc;
142         int envc = bprm->envc;
143         elf_addr_t __user *argv;
144         elf_addr_t __user *envp;
145         elf_addr_t __user *sp;
146         elf_addr_t __user *u_platform;
147         elf_addr_t __user *u_base_platform;
148         elf_addr_t __user *u_rand_bytes;
149         const char *k_platform = ELF_PLATFORM;
150         const char *k_base_platform = ELF_BASE_PLATFORM;
151         unsigned char k_rand_bytes[16];
152         int items;
153         elf_addr_t *elf_info;
154         int ei_index = 0;
155         const struct cred *cred = current_cred();
156         struct vm_area_struct *vma;
157
158         /*
159          * In some cases (e.g. Hyper-Threading), we want to avoid L1
160          * evictions by the processes running on the same package. One
161          * thing we can do is to shuffle the initial stack for them.
162          */
163
164         p = arch_align_stack(p);
165
166         /*
167          * If this architecture has a platform capability string, copy it
168          * to userspace.  In some cases (Sparc), this info is impossible
169          * for userspace to get any other way, in others (i386) it is
170          * merely difficult.
171          */
172         u_platform = NULL;
173         if (k_platform) {
174                 size_t len = strlen(k_platform) + 1;
175
176                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
177                 if (__copy_to_user(u_platform, k_platform, len))
178                         return -EFAULT;
179         }
180
181         /*
182          * If this architecture has a "base" platform capability
183          * string, copy it to userspace.
184          */
185         u_base_platform = NULL;
186         if (k_base_platform) {
187                 size_t len = strlen(k_base_platform) + 1;
188
189                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190                 if (__copy_to_user(u_base_platform, k_base_platform, len))
191                         return -EFAULT;
192         }
193
194         /*
195          * Generate 16 random bytes for userspace PRNG seeding.
196          */
197         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
198         u_rand_bytes = (elf_addr_t __user *)
199                        STACK_ALLOC(p, sizeof(k_rand_bytes));
200         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
201                 return -EFAULT;
202
203         /* Create the ELF interpreter info */
204         elf_info = (elf_addr_t *)current->mm->saved_auxv;
205         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
206 #define NEW_AUX_ENT(id, val) \
207         do { \
208                 elf_info[ei_index++] = id; \
209                 elf_info[ei_index++] = val; \
210         } while (0)
211
212 #ifdef ARCH_DLINFO
213         /* 
214          * ARCH_DLINFO must come first so PPC can do its special alignment of
215          * AUXV.
216          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
217          * ARCH_DLINFO changes
218          */
219         ARCH_DLINFO;
220 #endif
221         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
222         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
223         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
224         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
225         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
226         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
227         NEW_AUX_ENT(AT_BASE, interp_load_addr);
228         NEW_AUX_ENT(AT_FLAGS, 0);
229         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
230         NEW_AUX_ENT(AT_UID, cred->uid);
231         NEW_AUX_ENT(AT_EUID, cred->euid);
232         NEW_AUX_ENT(AT_GID, cred->gid);
233         NEW_AUX_ENT(AT_EGID, cred->egid);
234         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
235         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
236         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
237         if (k_platform) {
238                 NEW_AUX_ENT(AT_PLATFORM,
239                             (elf_addr_t)(unsigned long)u_platform);
240         }
241         if (k_base_platform) {
242                 NEW_AUX_ENT(AT_BASE_PLATFORM,
243                             (elf_addr_t)(unsigned long)u_base_platform);
244         }
245         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
246                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
247         }
248 #undef NEW_AUX_ENT
249         /* AT_NULL is zero; clear the rest too */
250         memset(&elf_info[ei_index], 0,
251                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
252
253         /* And advance past the AT_NULL entry.  */
254         ei_index += 2;
255
256         sp = STACK_ADD(p, ei_index);
257
258         items = (argc + 1) + (envc + 1) + 1;
259         bprm->p = STACK_ROUND(sp, items);
260
261         /* Point sp at the lowest address on the stack */
262 #ifdef CONFIG_STACK_GROWSUP
263         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
264         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
265 #else
266         sp = (elf_addr_t __user *)bprm->p;
267 #endif
268
269
270         /*
271          * Grow the stack manually; some architectures have a limit on how
272          * far ahead a user-space access may be in order to grow the stack.
273          */
274         vma = find_extend_vma(current->mm, bprm->p);
275         if (!vma)
276                 return -EFAULT;
277
278         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
279         if (__put_user(argc, sp++))
280                 return -EFAULT;
281         argv = sp;
282         envp = argv + argc + 1;
283
284         /* Populate argv and envp */
285         p = current->mm->arg_end = current->mm->arg_start;
286         while (argc-- > 0) {
287                 size_t len;
288                 if (__put_user((elf_addr_t)p, argv++))
289                         return -EFAULT;
290                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
291                 if (!len || len > MAX_ARG_STRLEN)
292                         return -EINVAL;
293                 p += len;
294         }
295         if (__put_user(0, argv))
296                 return -EFAULT;
297         current->mm->arg_end = current->mm->env_start = p;
298         while (envc-- > 0) {
299                 size_t len;
300                 if (__put_user((elf_addr_t)p, envp++))
301                         return -EFAULT;
302                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
303                 if (!len || len > MAX_ARG_STRLEN)
304                         return -EINVAL;
305                 p += len;
306         }
307         if (__put_user(0, envp))
308                 return -EFAULT;
309         current->mm->env_end = p;
310
311         /* Put the elf_info on the stack in the right place.  */
312         sp = (elf_addr_t __user *)envp + 1;
313         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
314                 return -EFAULT;
315         return 0;
316 }
317
318 #ifndef elf_map
319
320 static unsigned long elf_map(struct file *filep, unsigned long addr,
321                 struct elf_phdr *eppnt, int prot, int type,
322                 unsigned long total_size)
323 {
324         unsigned long map_addr;
325         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
326         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
327         addr = ELF_PAGESTART(addr);
328         size = ELF_PAGEALIGN(size);
329
330         /* mmap() will return -EINVAL if given a zero size, but a
331          * segment with zero filesize is perfectly valid */
332         if (!size)
333                 return addr;
334
335         down_write(&current->mm->mmap_sem);
336         /*
337         * total_size is the size of the ELF (interpreter) image.
338         * The _first_ mmap needs to know the full size, otherwise
339         * randomization might put this image into an overlapping
340         * position with the ELF binary image. (since size < total_size)
341         * So we first map the 'big' image - and unmap the remainder at
342         * the end. (which unmap is needed for ELF images with holes.)
343         */
344         if (total_size) {
345                 total_size = ELF_PAGEALIGN(total_size);
346                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
347                 if (!BAD_ADDR(map_addr))
348                         do_munmap(current->mm, map_addr+size, total_size-size);
349         } else
350                 map_addr = do_mmap(filep, addr, size, prot, type, off);
351
352         up_write(&current->mm->mmap_sem);
353         return(map_addr);
354 }
355
356 #endif /* !elf_map */
357
358 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
359 {
360         int i, first_idx = -1, last_idx = -1;
361
362         for (i = 0; i < nr; i++) {
363                 if (cmds[i].p_type == PT_LOAD) {
364                         last_idx = i;
365                         if (first_idx == -1)
366                                 first_idx = i;
367                 }
368         }
369         if (first_idx == -1)
370                 return 0;
371
372         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
373                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
374 }
375
376
377 /* This is much more generalized than the library routine read function,
378    so we keep this separate.  Technically the library read function
379    is only provided so that we can read a.out libraries that have
380    an ELF header */
381
382 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
383                 struct file *interpreter, unsigned long *interp_map_addr,
384                 unsigned long no_base)
385 {
386         struct elf_phdr *elf_phdata;
387         struct elf_phdr *eppnt;
388         unsigned long load_addr = 0;
389         int load_addr_set = 0;
390         unsigned long last_bss = 0, elf_bss = 0;
391         unsigned long error = ~0UL;
392         unsigned long total_size;
393         int retval, i, size;
394
395         /* First of all, some simple consistency checks */
396         if (interp_elf_ex->e_type != ET_EXEC &&
397             interp_elf_ex->e_type != ET_DYN)
398                 goto out;
399         if (!elf_check_arch(interp_elf_ex))
400                 goto out;
401         if (!interpreter->f_op || !interpreter->f_op->mmap)
402                 goto out;
403
404         /*
405          * If the size of this structure has changed, then punt, since
406          * we will be doing the wrong thing.
407          */
408         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
409                 goto out;
410         if (interp_elf_ex->e_phnum < 1 ||
411                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
412                 goto out;
413
414         /* Now read in all of the header information */
415         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
416         if (size > ELF_MIN_ALIGN)
417                 goto out;
418         elf_phdata = kmalloc(size, GFP_KERNEL);
419         if (!elf_phdata)
420                 goto out;
421
422         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
423                              (char *)elf_phdata,size);
424         error = -EIO;
425         if (retval != size) {
426                 if (retval < 0)
427                         error = retval; 
428                 goto out_close;
429         }
430
431         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
432         if (!total_size) {
433                 error = -EINVAL;
434                 goto out_close;
435         }
436
437         eppnt = elf_phdata;
438         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
439                 if (eppnt->p_type == PT_LOAD) {
440                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
441                         int elf_prot = 0;
442                         unsigned long vaddr = 0;
443                         unsigned long k, map_addr;
444
445                         if (eppnt->p_flags & PF_R)
446                                 elf_prot = PROT_READ;
447                         if (eppnt->p_flags & PF_W)
448                                 elf_prot |= PROT_WRITE;
449                         if (eppnt->p_flags & PF_X)
450                                 elf_prot |= PROT_EXEC;
451                         vaddr = eppnt->p_vaddr;
452                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
453                                 elf_type |= MAP_FIXED;
454                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
455                                 load_addr = -vaddr;
456
457                         map_addr = elf_map(interpreter, load_addr + vaddr,
458                                         eppnt, elf_prot, elf_type, total_size);
459                         total_size = 0;
460                         if (!*interp_map_addr)
461                                 *interp_map_addr = map_addr;
462                         error = map_addr;
463                         if (BAD_ADDR(map_addr))
464                                 goto out_close;
465
466                         if (!load_addr_set &&
467                             interp_elf_ex->e_type == ET_DYN) {
468                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
469                                 load_addr_set = 1;
470                         }
471
472                         /*
473                          * Check to see if the section's size will overflow the
474                          * allowed task size. Note that p_filesz must always be
475                          * <= p_memsize so it's only necessary to check p_memsz.
476                          */
477                         k = load_addr + eppnt->p_vaddr;
478                         if (BAD_ADDR(k) ||
479                             eppnt->p_filesz > eppnt->p_memsz ||
480                             eppnt->p_memsz > TASK_SIZE ||
481                             TASK_SIZE - eppnt->p_memsz < k) {
482                                 error = -ENOMEM;
483                                 goto out_close;
484                         }
485
486                         /*
487                          * Find the end of the file mapping for this phdr, and
488                          * keep track of the largest address we see for this.
489                          */
490                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
491                         if (k > elf_bss)
492                                 elf_bss = k;
493
494                         /*
495                          * Do the same thing for the memory mapping - between
496                          * elf_bss and last_bss is the bss section.
497                          */
498                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
499                         if (k > last_bss)
500                                 last_bss = k;
501                 }
502         }
503
504         /*
505          * Now fill out the bss section.  First pad the last page up
506          * to the page boundary, and then perform a mmap to make sure
507          * that there are zero-mapped pages up to and including the 
508          * last bss page.
509          */
510         if (padzero(elf_bss)) {
511                 error = -EFAULT;
512                 goto out_close;
513         }
514
515         /* What we have mapped so far */
516         elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
517
518         /* Map the last of the bss segment */
519         if (last_bss > elf_bss) {
520                 down_write(&current->mm->mmap_sem);
521                 error = do_brk(elf_bss, last_bss - elf_bss);
522                 up_write(&current->mm->mmap_sem);
523                 if (BAD_ADDR(error))
524                         goto out_close;
525         }
526
527         error = load_addr;
528
529 out_close:
530         kfree(elf_phdata);
531 out:
532         return error;
533 }
534
535 /*
536  * These are the functions used to load ELF style executables and shared
537  * libraries.  There is no binary dependent code anywhere else.
538  */
539
540 #define INTERPRETER_NONE 0
541 #define INTERPRETER_ELF 2
542
543 #ifndef STACK_RND_MASK
544 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
545 #endif
546
547 static unsigned long randomize_stack_top(unsigned long stack_top)
548 {
549         unsigned int random_variable = 0;
550
551         if ((current->flags & PF_RANDOMIZE) &&
552                 !(current->personality & ADDR_NO_RANDOMIZE)) {
553                 random_variable = get_random_int() & STACK_RND_MASK;
554                 random_variable <<= PAGE_SHIFT;
555         }
556 #ifdef CONFIG_STACK_GROWSUP
557         return PAGE_ALIGN(stack_top) + random_variable;
558 #else
559         return PAGE_ALIGN(stack_top) - random_variable;
560 #endif
561 }
562
563 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
564 {
565         struct file *interpreter = NULL; /* to shut gcc up */
566         unsigned long load_addr = 0, load_bias = 0;
567         int load_addr_set = 0;
568         char * elf_interpreter = NULL;
569         unsigned long error;
570         struct elf_phdr *elf_ppnt, *elf_phdata;
571         unsigned long elf_bss, elf_brk;
572         int retval, i;
573         unsigned int size;
574         unsigned long elf_entry;
575         unsigned long interp_load_addr = 0;
576         unsigned long start_code, end_code, start_data, end_data;
577         unsigned long reloc_func_desc = 0;
578         int executable_stack = EXSTACK_DEFAULT;
579         unsigned long def_flags = 0;
580         struct {
581                 struct elfhdr elf_ex;
582                 struct elfhdr interp_elf_ex;
583         } *loc;
584
585         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
586         if (!loc) {
587                 retval = -ENOMEM;
588                 goto out_ret;
589         }
590         
591         /* Get the exec-header */
592         loc->elf_ex = *((struct elfhdr *)bprm->buf);
593
594         retval = -ENOEXEC;
595         /* First of all, some simple consistency checks */
596         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
597                 goto out;
598
599         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
600                 goto out;
601         if (!elf_check_arch(&loc->elf_ex))
602                 goto out;
603         if (!bprm->file->f_op||!bprm->file->f_op->mmap)
604                 goto out;
605
606         /* Now read in all of the header information */
607         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
608                 goto out;
609         if (loc->elf_ex.e_phnum < 1 ||
610                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
611                 goto out;
612         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
613         retval = -ENOMEM;
614         elf_phdata = kmalloc(size, GFP_KERNEL);
615         if (!elf_phdata)
616                 goto out;
617
618         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
619                              (char *)elf_phdata, size);
620         if (retval != size) {
621                 if (retval >= 0)
622                         retval = -EIO;
623                 goto out_free_ph;
624         }
625
626         elf_ppnt = elf_phdata;
627         elf_bss = 0;
628         elf_brk = 0;
629
630         start_code = ~0UL;
631         end_code = 0;
632         start_data = 0;
633         end_data = 0;
634
635         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
636                 if (elf_ppnt->p_type == PT_INTERP) {
637                         /* This is the program interpreter used for
638                          * shared libraries - for now assume that this
639                          * is an a.out format binary
640                          */
641                         retval = -ENOEXEC;
642                         if (elf_ppnt->p_filesz > PATH_MAX || 
643                             elf_ppnt->p_filesz < 2)
644                                 goto out_free_ph;
645
646                         retval = -ENOMEM;
647                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
648                                                   GFP_KERNEL);
649                         if (!elf_interpreter)
650                                 goto out_free_ph;
651
652                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
653                                              elf_interpreter,
654                                              elf_ppnt->p_filesz);
655                         if (retval != elf_ppnt->p_filesz) {
656                                 if (retval >= 0)
657                                         retval = -EIO;
658                                 goto out_free_interp;
659                         }
660                         /* make sure path is NULL terminated */
661                         retval = -ENOEXEC;
662                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
663                                 goto out_free_interp;
664
665                         /*
666                          * The early SET_PERSONALITY here is so that the lookup
667                          * for the interpreter happens in the namespace of the 
668                          * to-be-execed image.  SET_PERSONALITY can select an
669                          * alternate root.
670                          *
671                          * However, SET_PERSONALITY is NOT allowed to switch
672                          * this task into the new images's memory mapping
673                          * policy - that is, TASK_SIZE must still evaluate to
674                          * that which is appropriate to the execing application.
675                          * This is because exit_mmap() needs to have TASK_SIZE
676                          * evaluate to the size of the old image.
677                          *
678                          * So if (say) a 64-bit application is execing a 32-bit
679                          * application it is the architecture's responsibility
680                          * to defer changing the value of TASK_SIZE until the
681                          * switch really is going to happen - do this in
682                          * flush_thread().      - akpm
683                          */
684                         SET_PERSONALITY(loc->elf_ex);
685
686                         interpreter = open_exec(elf_interpreter);
687                         retval = PTR_ERR(interpreter);
688                         if (IS_ERR(interpreter))
689                                 goto out_free_interp;
690
691                         /*
692                          * If the binary is not readable then enforce
693                          * mm->dumpable = 0 regardless of the interpreter's
694                          * permissions.
695                          */
696                         if (file_permission(interpreter, MAY_READ) < 0)
697                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
698
699                         retval = kernel_read(interpreter, 0, bprm->buf,
700                                              BINPRM_BUF_SIZE);
701                         if (retval != BINPRM_BUF_SIZE) {
702                                 if (retval >= 0)
703                                         retval = -EIO;
704                                 goto out_free_dentry;
705                         }
706
707                         /* Get the exec headers */
708                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
709                         break;
710                 }
711                 elf_ppnt++;
712         }
713
714         elf_ppnt = elf_phdata;
715         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
716                 if (elf_ppnt->p_type == PT_GNU_STACK) {
717                         if (elf_ppnt->p_flags & PF_X)
718                                 executable_stack = EXSTACK_ENABLE_X;
719                         else
720                                 executable_stack = EXSTACK_DISABLE_X;
721                         break;
722                 }
723
724         /* Some simple consistency checks for the interpreter */
725         if (elf_interpreter) {
726                 retval = -ELIBBAD;
727                 /* Not an ELF interpreter */
728                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
729                         goto out_free_dentry;
730                 /* Verify the interpreter has a valid arch */
731                 if (!elf_check_arch(&loc->interp_elf_ex))
732                         goto out_free_dentry;
733         } else {
734                 /* Executables without an interpreter also need a personality  */
735                 SET_PERSONALITY(loc->elf_ex);
736         }
737
738         /* Flush all traces of the currently running executable */
739         retval = flush_old_exec(bprm);
740         if (retval)
741                 goto out_free_dentry;
742
743         /* OK, This is the point of no return */
744         current->flags &= ~PF_FORKNOEXEC;
745         current->mm->def_flags = def_flags;
746
747         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
748            may depend on the personality.  */
749         SET_PERSONALITY(loc->elf_ex);
750         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
751                 current->personality |= READ_IMPLIES_EXEC;
752
753         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
754                 current->flags |= PF_RANDOMIZE;
755         arch_pick_mmap_layout(current->mm);
756
757         /* Do this so that we can load the interpreter, if need be.  We will
758            change some of these later */
759         current->mm->free_area_cache = current->mm->mmap_base;
760         current->mm->cached_hole_size = 0;
761         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
762                                  executable_stack);
763         if (retval < 0) {
764                 send_sig(SIGKILL, current, 0);
765                 goto out_free_dentry;
766         }
767         
768         current->mm->start_stack = bprm->p;
769
770         /* Now we do a little grungy work by mmaping the ELF image into
771            the correct location in memory. */
772         for(i = 0, elf_ppnt = elf_phdata;
773             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
774                 int elf_prot = 0, elf_flags;
775                 unsigned long k, vaddr;
776
777                 if (elf_ppnt->p_type != PT_LOAD)
778                         continue;
779
780                 if (unlikely (elf_brk > elf_bss)) {
781                         unsigned long nbyte;
782                     
783                         /* There was a PT_LOAD segment with p_memsz > p_filesz
784                            before this one. Map anonymous pages, if needed,
785                            and clear the area.  */
786                         retval = set_brk (elf_bss + load_bias,
787                                           elf_brk + load_bias);
788                         if (retval) {
789                                 send_sig(SIGKILL, current, 0);
790                                 goto out_free_dentry;
791                         }
792                         nbyte = ELF_PAGEOFFSET(elf_bss);
793                         if (nbyte) {
794                                 nbyte = ELF_MIN_ALIGN - nbyte;
795                                 if (nbyte > elf_brk - elf_bss)
796                                         nbyte = elf_brk - elf_bss;
797                                 if (clear_user((void __user *)elf_bss +
798                                                         load_bias, nbyte)) {
799                                         /*
800                                          * This bss-zeroing can fail if the ELF
801                                          * file specifies odd protections. So
802                                          * we don't check the return value
803                                          */
804                                 }
805                         }
806                 }
807
808                 if (elf_ppnt->p_flags & PF_R)
809                         elf_prot |= PROT_READ;
810                 if (elf_ppnt->p_flags & PF_W)
811                         elf_prot |= PROT_WRITE;
812                 if (elf_ppnt->p_flags & PF_X)
813                         elf_prot |= PROT_EXEC;
814
815                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
816
817                 vaddr = elf_ppnt->p_vaddr;
818                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
819                         elf_flags |= MAP_FIXED;
820                 } else if (loc->elf_ex.e_type == ET_DYN) {
821                         /* Try and get dynamic programs out of the way of the
822                          * default mmap base, as well as whatever program they
823                          * might try to exec.  This is because the brk will
824                          * follow the loader, and is not movable.  */
825 #ifdef CONFIG_X86
826                         load_bias = 0;
827 #else
828                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
829 #endif
830                 }
831
832                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
833                                 elf_prot, elf_flags, 0);
834                 if (BAD_ADDR(error)) {
835                         send_sig(SIGKILL, current, 0);
836                         retval = IS_ERR((void *)error) ?
837                                 PTR_ERR((void*)error) : -EINVAL;
838                         goto out_free_dentry;
839                 }
840
841                 if (!load_addr_set) {
842                         load_addr_set = 1;
843                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
844                         if (loc->elf_ex.e_type == ET_DYN) {
845                                 load_bias += error -
846                                              ELF_PAGESTART(load_bias + vaddr);
847                                 load_addr += load_bias;
848                                 reloc_func_desc = load_bias;
849                         }
850                 }
851                 k = elf_ppnt->p_vaddr;
852                 if (k < start_code)
853                         start_code = k;
854                 if (start_data < k)
855                         start_data = k;
856
857                 /*
858                  * Check to see if the section's size will overflow the
859                  * allowed task size. Note that p_filesz must always be
860                  * <= p_memsz so it is only necessary to check p_memsz.
861                  */
862                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
863                     elf_ppnt->p_memsz > TASK_SIZE ||
864                     TASK_SIZE - elf_ppnt->p_memsz < k) {
865                         /* set_brk can never work. Avoid overflows. */
866                         send_sig(SIGKILL, current, 0);
867                         retval = -EINVAL;
868                         goto out_free_dentry;
869                 }
870
871                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
872
873                 if (k > elf_bss)
874                         elf_bss = k;
875                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
876                         end_code = k;
877                 if (end_data < k)
878                         end_data = k;
879                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
880                 if (k > elf_brk)
881                         elf_brk = k;
882         }
883
884         loc->elf_ex.e_entry += load_bias;
885         elf_bss += load_bias;
886         elf_brk += load_bias;
887         start_code += load_bias;
888         end_code += load_bias;
889         start_data += load_bias;
890         end_data += load_bias;
891
892         /* Calling set_brk effectively mmaps the pages that we need
893          * for the bss and break sections.  We must do this before
894          * mapping in the interpreter, to make sure it doesn't wind
895          * up getting placed where the bss needs to go.
896          */
897         retval = set_brk(elf_bss, elf_brk);
898         if (retval) {
899                 send_sig(SIGKILL, current, 0);
900                 goto out_free_dentry;
901         }
902         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
903                 send_sig(SIGSEGV, current, 0);
904                 retval = -EFAULT; /* Nobody gets to see this, but.. */
905                 goto out_free_dentry;
906         }
907
908         if (elf_interpreter) {
909                 unsigned long uninitialized_var(interp_map_addr);
910
911                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
912                                             interpreter,
913                                             &interp_map_addr,
914                                             load_bias);
915                 if (!IS_ERR((void *)elf_entry)) {
916                         /*
917                          * load_elf_interp() returns relocation
918                          * adjustment
919                          */
920                         interp_load_addr = elf_entry;
921                         elf_entry += loc->interp_elf_ex.e_entry;
922                 }
923                 if (BAD_ADDR(elf_entry)) {
924                         force_sig(SIGSEGV, current);
925                         retval = IS_ERR((void *)elf_entry) ?
926                                         (int)elf_entry : -EINVAL;
927                         goto out_free_dentry;
928                 }
929                 reloc_func_desc = interp_load_addr;
930
931                 allow_write_access(interpreter);
932                 fput(interpreter);
933                 kfree(elf_interpreter);
934         } else {
935                 elf_entry = loc->elf_ex.e_entry;
936                 if (BAD_ADDR(elf_entry)) {
937                         force_sig(SIGSEGV, current);
938                         retval = -EINVAL;
939                         goto out_free_dentry;
940                 }
941         }
942
943         kfree(elf_phdata);
944
945         set_binfmt(&elf_format);
946
947 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
948         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
949         if (retval < 0) {
950                 send_sig(SIGKILL, current, 0);
951                 goto out;
952         }
953 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
954
955         install_exec_creds(bprm);
956         current->flags &= ~PF_FORKNOEXEC;
957         retval = create_elf_tables(bprm, &loc->elf_ex,
958                           load_addr, interp_load_addr);
959         if (retval < 0) {
960                 send_sig(SIGKILL, current, 0);
961                 goto out;
962         }
963         /* N.B. passed_fileno might not be initialized? */
964         current->mm->end_code = end_code;
965         current->mm->start_code = start_code;
966         current->mm->start_data = start_data;
967         current->mm->end_data = end_data;
968         current->mm->start_stack = bprm->p;
969
970 #ifdef arch_randomize_brk
971         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
972                 current->mm->brk = current->mm->start_brk =
973                         arch_randomize_brk(current->mm);
974 #endif
975
976         if (current->personality & MMAP_PAGE_ZERO) {
977                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
978                    and some applications "depend" upon this behavior.
979                    Since we do not have the power to recompile these, we
980                    emulate the SVr4 behavior. Sigh. */
981                 down_write(&current->mm->mmap_sem);
982                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
983                                 MAP_FIXED | MAP_PRIVATE, 0);
984                 up_write(&current->mm->mmap_sem);
985         }
986
987 #ifdef ELF_PLAT_INIT
988         /*
989          * The ABI may specify that certain registers be set up in special
990          * ways (on i386 %edx is the address of a DT_FINI function, for
991          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
992          * that the e_entry field is the address of the function descriptor
993          * for the startup routine, rather than the address of the startup
994          * routine itself.  This macro performs whatever initialization to
995          * the regs structure is required as well as any relocations to the
996          * function descriptor entries when executing dynamically links apps.
997          */
998         ELF_PLAT_INIT(regs, reloc_func_desc);
999 #endif
1000
1001         start_thread(regs, elf_entry, bprm->p);
1002         retval = 0;
1003 out:
1004         kfree(loc);
1005 out_ret:
1006         return retval;
1007
1008         /* error cleanup */
1009 out_free_dentry:
1010         allow_write_access(interpreter);
1011         if (interpreter)
1012                 fput(interpreter);
1013 out_free_interp:
1014         kfree(elf_interpreter);
1015 out_free_ph:
1016         kfree(elf_phdata);
1017         goto out;
1018 }
1019
1020 /* This is really simpleminded and specialized - we are loading an
1021    a.out library that is given an ELF header. */
1022 static int load_elf_library(struct file *file)
1023 {
1024         struct elf_phdr *elf_phdata;
1025         struct elf_phdr *eppnt;
1026         unsigned long elf_bss, bss, len;
1027         int retval, error, i, j;
1028         struct elfhdr elf_ex;
1029
1030         error = -ENOEXEC;
1031         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1032         if (retval != sizeof(elf_ex))
1033                 goto out;
1034
1035         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1036                 goto out;
1037
1038         /* First of all, some simple consistency checks */
1039         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1040             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1041                 goto out;
1042
1043         /* Now read in all of the header information */
1044
1045         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1046         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1047
1048         error = -ENOMEM;
1049         elf_phdata = kmalloc(j, GFP_KERNEL);
1050         if (!elf_phdata)
1051                 goto out;
1052
1053         eppnt = elf_phdata;
1054         error = -ENOEXEC;
1055         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1056         if (retval != j)
1057                 goto out_free_ph;
1058
1059         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1060                 if ((eppnt + i)->p_type == PT_LOAD)
1061                         j++;
1062         if (j != 1)
1063                 goto out_free_ph;
1064
1065         while (eppnt->p_type != PT_LOAD)
1066                 eppnt++;
1067
1068         /* Now use mmap to map the library into memory. */
1069         down_write(&current->mm->mmap_sem);
1070         error = do_mmap(file,
1071                         ELF_PAGESTART(eppnt->p_vaddr),
1072                         (eppnt->p_filesz +
1073                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1074                         PROT_READ | PROT_WRITE | PROT_EXEC,
1075                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1076                         (eppnt->p_offset -
1077                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1078         up_write(&current->mm->mmap_sem);
1079         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1080                 goto out_free_ph;
1081
1082         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1083         if (padzero(elf_bss)) {
1084                 error = -EFAULT;
1085                 goto out_free_ph;
1086         }
1087
1088         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1089                             ELF_MIN_ALIGN - 1);
1090         bss = eppnt->p_memsz + eppnt->p_vaddr;
1091         if (bss > len) {
1092                 down_write(&current->mm->mmap_sem);
1093                 do_brk(len, bss - len);
1094                 up_write(&current->mm->mmap_sem);
1095         }
1096         error = 0;
1097
1098 out_free_ph:
1099         kfree(elf_phdata);
1100 out:
1101         return error;
1102 }
1103
1104 /*
1105  * Note that some platforms still use traditional core dumps and not
1106  * the ELF core dump.  Each platform can select it as appropriate.
1107  */
1108 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1109
1110 /*
1111  * ELF core dumper
1112  *
1113  * Modelled on fs/exec.c:aout_core_dump()
1114  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1115  */
1116 /*
1117  * These are the only things you should do on a core-file: use only these
1118  * functions to write out all the necessary info.
1119  */
1120 static int dump_write(struct file *file, const void *addr, int nr)
1121 {
1122         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1123 }
1124
1125 static int dump_seek(struct file *file, loff_t off)
1126 {
1127         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1128                 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1129                         return 0;
1130         } else {
1131                 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1132                 if (!buf)
1133                         return 0;
1134                 while (off > 0) {
1135                         unsigned long n = off;
1136                         if (n > PAGE_SIZE)
1137                                 n = PAGE_SIZE;
1138                         if (!dump_write(file, buf, n))
1139                                 return 0;
1140                         off -= n;
1141                 }
1142                 free_page((unsigned long)buf);
1143         }
1144         return 1;
1145 }
1146
1147 /*
1148  * Decide what to dump of a segment, part, all or none.
1149  */
1150 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1151                                    unsigned long mm_flags)
1152 {
1153 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1154
1155         /* The vma can be set up to tell us the answer directly.  */
1156         if (vma->vm_flags & VM_ALWAYSDUMP)
1157                 goto whole;
1158
1159         /* Hugetlb memory check */
1160         if (vma->vm_flags & VM_HUGETLB) {
1161                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1162                         goto whole;
1163                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1164                         goto whole;
1165         }
1166
1167         /* Do not dump I/O mapped devices or special mappings */
1168         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1169                 return 0;
1170
1171         /* By default, dump shared memory if mapped from an anonymous file. */
1172         if (vma->vm_flags & VM_SHARED) {
1173                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1174                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1175                         goto whole;
1176                 return 0;
1177         }
1178
1179         /* Dump segments that have been written to.  */
1180         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1181                 goto whole;
1182         if (vma->vm_file == NULL)
1183                 return 0;
1184
1185         if (FILTER(MAPPED_PRIVATE))
1186                 goto whole;
1187
1188         /*
1189          * If this looks like the beginning of a DSO or executable mapping,
1190          * check for an ELF header.  If we find one, dump the first page to
1191          * aid in determining what was mapped here.
1192          */
1193         if (FILTER(ELF_HEADERS) &&
1194             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1195                 u32 __user *header = (u32 __user *) vma->vm_start;
1196                 u32 word;
1197                 mm_segment_t fs = get_fs();
1198                 /*
1199                  * Doing it this way gets the constant folded by GCC.
1200                  */
1201                 union {
1202                         u32 cmp;
1203                         char elfmag[SELFMAG];
1204                 } magic;
1205                 BUILD_BUG_ON(SELFMAG != sizeof word);
1206                 magic.elfmag[EI_MAG0] = ELFMAG0;
1207                 magic.elfmag[EI_MAG1] = ELFMAG1;
1208                 magic.elfmag[EI_MAG2] = ELFMAG2;
1209                 magic.elfmag[EI_MAG3] = ELFMAG3;
1210                 /*
1211                  * Switch to the user "segment" for get_user(),
1212                  * then put back what elf_core_dump() had in place.
1213                  */
1214                 set_fs(USER_DS);
1215                 if (unlikely(get_user(word, header)))
1216                         word = 0;
1217                 set_fs(fs);
1218                 if (word == magic.cmp)
1219                         return PAGE_SIZE;
1220         }
1221
1222 #undef  FILTER
1223
1224         return 0;
1225
1226 whole:
1227         return vma->vm_end - vma->vm_start;
1228 }
1229
1230 /* An ELF note in memory */
1231 struct memelfnote
1232 {
1233         const char *name;
1234         int type;
1235         unsigned int datasz;
1236         void *data;
1237 };
1238
1239 static int notesize(struct memelfnote *en)
1240 {
1241         int sz;
1242
1243         sz = sizeof(struct elf_note);
1244         sz += roundup(strlen(en->name) + 1, 4);
1245         sz += roundup(en->datasz, 4);
1246
1247         return sz;
1248 }
1249
1250 #define DUMP_WRITE(addr, nr, foffset)   \
1251         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1252
1253 static int alignfile(struct file *file, loff_t *foffset)
1254 {
1255         static const char buf[4] = { 0, };
1256         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1257         return 1;
1258 }
1259
1260 static int writenote(struct memelfnote *men, struct file *file,
1261                         loff_t *foffset)
1262 {
1263         struct elf_note en;
1264         en.n_namesz = strlen(men->name) + 1;
1265         en.n_descsz = men->datasz;
1266         en.n_type = men->type;
1267
1268         DUMP_WRITE(&en, sizeof(en), foffset);
1269         DUMP_WRITE(men->name, en.n_namesz, foffset);
1270         if (!alignfile(file, foffset))
1271                 return 0;
1272         DUMP_WRITE(men->data, men->datasz, foffset);
1273         if (!alignfile(file, foffset))
1274                 return 0;
1275
1276         return 1;
1277 }
1278 #undef DUMP_WRITE
1279
1280 #define DUMP_WRITE(addr, nr)    \
1281         if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1282                 goto end_coredump;
1283 #define DUMP_SEEK(off)  \
1284         if (!dump_seek(file, (off))) \
1285                 goto end_coredump;
1286
1287 static void fill_elf_header(struct elfhdr *elf, int segs,
1288                             u16 machine, u32 flags, u8 osabi)
1289 {
1290         memset(elf, 0, sizeof(*elf));
1291
1292         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1293         elf->e_ident[EI_CLASS] = ELF_CLASS;
1294         elf->e_ident[EI_DATA] = ELF_DATA;
1295         elf->e_ident[EI_VERSION] = EV_CURRENT;
1296         elf->e_ident[EI_OSABI] = ELF_OSABI;
1297
1298         elf->e_type = ET_CORE;
1299         elf->e_machine = machine;
1300         elf->e_version = EV_CURRENT;
1301         elf->e_phoff = sizeof(struct elfhdr);
1302         elf->e_flags = flags;
1303         elf->e_ehsize = sizeof(struct elfhdr);
1304         elf->e_phentsize = sizeof(struct elf_phdr);
1305         elf->e_phnum = segs;
1306
1307         return;
1308 }
1309
1310 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1311 {
1312         phdr->p_type = PT_NOTE;
1313         phdr->p_offset = offset;
1314         phdr->p_vaddr = 0;
1315         phdr->p_paddr = 0;
1316         phdr->p_filesz = sz;
1317         phdr->p_memsz = 0;
1318         phdr->p_flags = 0;
1319         phdr->p_align = 0;
1320         return;
1321 }
1322
1323 static void fill_note(struct memelfnote *note, const char *name, int type, 
1324                 unsigned int sz, void *data)
1325 {
1326         note->name = name;
1327         note->type = type;
1328         note->datasz = sz;
1329         note->data = data;
1330         return;
1331 }
1332
1333 /*
1334  * fill up all the fields in prstatus from the given task struct, except
1335  * registers which need to be filled up separately.
1336  */
1337 static void fill_prstatus(struct elf_prstatus *prstatus,
1338                 struct task_struct *p, long signr)
1339 {
1340         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1341         prstatus->pr_sigpend = p->pending.signal.sig[0];
1342         prstatus->pr_sighold = p->blocked.sig[0];
1343         prstatus->pr_pid = task_pid_vnr(p);
1344         prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1345         prstatus->pr_pgrp = task_pgrp_vnr(p);
1346         prstatus->pr_sid = task_session_vnr(p);
1347         if (thread_group_leader(p)) {
1348                 struct task_cputime cputime;
1349
1350                 /*
1351                  * This is the record for the group leader.  It shows the
1352                  * group-wide total, not its individual thread total.
1353                  */
1354                 thread_group_cputime(p, &cputime);
1355                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1356                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1357         } else {
1358                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1359                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1360         }
1361         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1362         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1363 }
1364
1365 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1366                        struct mm_struct *mm)
1367 {
1368         const struct cred *cred;
1369         unsigned int i, len;
1370         
1371         /* first copy the parameters from user space */
1372         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1373
1374         len = mm->arg_end - mm->arg_start;
1375         if (len >= ELF_PRARGSZ)
1376                 len = ELF_PRARGSZ-1;
1377         if (copy_from_user(&psinfo->pr_psargs,
1378                            (const char __user *)mm->arg_start, len))
1379                 return -EFAULT;
1380         for(i = 0; i < len; i++)
1381                 if (psinfo->pr_psargs[i] == 0)
1382                         psinfo->pr_psargs[i] = ' ';
1383         psinfo->pr_psargs[len] = 0;
1384
1385         psinfo->pr_pid = task_pid_vnr(p);
1386         psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1387         psinfo->pr_pgrp = task_pgrp_vnr(p);
1388         psinfo->pr_sid = task_session_vnr(p);
1389
1390         i = p->state ? ffz(~p->state) + 1 : 0;
1391         psinfo->pr_state = i;
1392         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1393         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1394         psinfo->pr_nice = task_nice(p);
1395         psinfo->pr_flag = p->flags;
1396         rcu_read_lock();
1397         cred = __task_cred(p);
1398         SET_UID(psinfo->pr_uid, cred->uid);
1399         SET_GID(psinfo->pr_gid, cred->gid);
1400         rcu_read_unlock();
1401         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1402         
1403         return 0;
1404 }
1405
1406 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1407 {
1408         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1409         int i = 0;
1410         do
1411                 i += 2;
1412         while (auxv[i - 2] != AT_NULL);
1413         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1414 }
1415
1416 #ifdef CORE_DUMP_USE_REGSET
1417 #include <linux/regset.h>
1418
1419 struct elf_thread_core_info {
1420         struct elf_thread_core_info *next;
1421         struct task_struct *task;
1422         struct elf_prstatus prstatus;
1423         struct memelfnote notes[0];
1424 };
1425
1426 struct elf_note_info {
1427         struct elf_thread_core_info *thread;
1428         struct memelfnote psinfo;
1429         struct memelfnote auxv;
1430         size_t size;
1431         int thread_notes;
1432 };
1433
1434 /*
1435  * When a regset has a writeback hook, we call it on each thread before
1436  * dumping user memory.  On register window machines, this makes sure the
1437  * user memory backing the register data is up to date before we read it.
1438  */
1439 static void do_thread_regset_writeback(struct task_struct *task,
1440                                        const struct user_regset *regset)
1441 {
1442         if (regset->writeback)
1443                 regset->writeback(task, regset, 1);
1444 }
1445
1446 static int fill_thread_core_info(struct elf_thread_core_info *t,
1447                                  const struct user_regset_view *view,
1448                                  long signr, size_t *total)
1449 {
1450         unsigned int i;
1451
1452         /*
1453          * NT_PRSTATUS is the one special case, because the regset data
1454          * goes into the pr_reg field inside the note contents, rather
1455          * than being the whole note contents.  We fill the reset in here.
1456          * We assume that regset 0 is NT_PRSTATUS.
1457          */
1458         fill_prstatus(&t->prstatus, t->task, signr);
1459         (void) view->regsets[0].get(t->task, &view->regsets[0],
1460                                     0, sizeof(t->prstatus.pr_reg),
1461                                     &t->prstatus.pr_reg, NULL);
1462
1463         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1464                   sizeof(t->prstatus), &t->prstatus);
1465         *total += notesize(&t->notes[0]);
1466
1467         do_thread_regset_writeback(t->task, &view->regsets[0]);
1468
1469         /*
1470          * Each other regset might generate a note too.  For each regset
1471          * that has no core_note_type or is inactive, we leave t->notes[i]
1472          * all zero and we'll know to skip writing it later.
1473          */
1474         for (i = 1; i < view->n; ++i) {
1475                 const struct user_regset *regset = &view->regsets[i];
1476                 do_thread_regset_writeback(t->task, regset);
1477                 if (regset->core_note_type &&
1478                     (!regset->active || regset->active(t->task, regset))) {
1479                         int ret;
1480                         size_t size = regset->n * regset->size;
1481                         void *data = kmalloc(size, GFP_KERNEL);
1482                         if (unlikely(!data))
1483                                 return 0;
1484                         ret = regset->get(t->task, regset,
1485                                           0, size, data, NULL);
1486                         if (unlikely(ret))
1487                                 kfree(data);
1488                         else {
1489                                 if (regset->core_note_type != NT_PRFPREG)
1490                                         fill_note(&t->notes[i], "LINUX",
1491                                                   regset->core_note_type,
1492                                                   size, data);
1493                                 else {
1494                                         t->prstatus.pr_fpvalid = 1;
1495                                         fill_note(&t->notes[i], "CORE",
1496                                                   NT_PRFPREG, size, data);
1497                                 }
1498                                 *total += notesize(&t->notes[i]);
1499                         }
1500                 }
1501         }
1502
1503         return 1;
1504 }
1505
1506 static int fill_note_info(struct elfhdr *elf, int phdrs,
1507                           struct elf_note_info *info,
1508                           long signr, struct pt_regs *regs)
1509 {
1510         struct task_struct *dump_task = current;
1511         const struct user_regset_view *view = task_user_regset_view(dump_task);
1512         struct elf_thread_core_info *t;
1513         struct elf_prpsinfo *psinfo;
1514         struct core_thread *ct;
1515         unsigned int i;
1516
1517         info->size = 0;
1518         info->thread = NULL;
1519
1520         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1521         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1522
1523         if (psinfo == NULL)
1524                 return 0;
1525
1526         /*
1527          * Figure out how many notes we're going to need for each thread.
1528          */
1529         info->thread_notes = 0;
1530         for (i = 0; i < view->n; ++i)
1531                 if (view->regsets[i].core_note_type != 0)
1532                         ++info->thread_notes;
1533
1534         /*
1535          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1536          * since it is our one special case.
1537          */
1538         if (unlikely(info->thread_notes == 0) ||
1539             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1540                 WARN_ON(1);
1541                 return 0;
1542         }
1543
1544         /*
1545          * Initialize the ELF file header.
1546          */
1547         fill_elf_header(elf, phdrs,
1548                         view->e_machine, view->e_flags, view->ei_osabi);
1549
1550         /*
1551          * Allocate a structure for each thread.
1552          */
1553         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1554                 t = kzalloc(offsetof(struct elf_thread_core_info,
1555                                      notes[info->thread_notes]),
1556                             GFP_KERNEL);
1557                 if (unlikely(!t))
1558                         return 0;
1559
1560                 t->task = ct->task;
1561                 if (ct->task == dump_task || !info->thread) {
1562                         t->next = info->thread;
1563                         info->thread = t;
1564                 } else {
1565                         /*
1566                          * Make sure to keep the original task at
1567                          * the head of the list.
1568                          */
1569                         t->next = info->thread->next;
1570                         info->thread->next = t;
1571                 }
1572         }
1573
1574         /*
1575          * Now fill in each thread's information.
1576          */
1577         for (t = info->thread; t != NULL; t = t->next)
1578                 if (!fill_thread_core_info(t, view, signr, &info->size))
1579                         return 0;
1580
1581         /*
1582          * Fill in the two process-wide notes.
1583          */
1584         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1585         info->size += notesize(&info->psinfo);
1586
1587         fill_auxv_note(&info->auxv, current->mm);
1588         info->size += notesize(&info->auxv);
1589
1590         return 1;
1591 }
1592
1593 static size_t get_note_info_size(struct elf_note_info *info)
1594 {
1595         return info->size;
1596 }
1597
1598 /*
1599  * Write all the notes for each thread.  When writing the first thread, the
1600  * process-wide notes are interleaved after the first thread-specific note.
1601  */
1602 static int write_note_info(struct elf_note_info *info,
1603                            struct file *file, loff_t *foffset)
1604 {
1605         bool first = 1;
1606         struct elf_thread_core_info *t = info->thread;
1607
1608         do {
1609                 int i;
1610
1611                 if (!writenote(&t->notes[0], file, foffset))
1612                         return 0;
1613
1614                 if (first && !writenote(&info->psinfo, file, foffset))
1615                         return 0;
1616                 if (first && !writenote(&info->auxv, file, foffset))
1617                         return 0;
1618
1619                 for (i = 1; i < info->thread_notes; ++i)
1620                         if (t->notes[i].data &&
1621                             !writenote(&t->notes[i], file, foffset))
1622                                 return 0;
1623
1624                 first = 0;
1625                 t = t->next;
1626         } while (t);
1627
1628         return 1;
1629 }
1630
1631 static void free_note_info(struct elf_note_info *info)
1632 {
1633         struct elf_thread_core_info *threads = info->thread;
1634         while (threads) {
1635                 unsigned int i;
1636                 struct elf_thread_core_info *t = threads;
1637                 threads = t->next;
1638                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1639                 for (i = 1; i < info->thread_notes; ++i)
1640                         kfree(t->notes[i].data);
1641                 kfree(t);
1642         }
1643         kfree(info->psinfo.data);
1644 }
1645
1646 #else
1647
1648 /* Here is the structure in which status of each thread is captured. */
1649 struct elf_thread_status
1650 {
1651         struct list_head list;
1652         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1653         elf_fpregset_t fpu;             /* NT_PRFPREG */
1654         struct task_struct *thread;
1655 #ifdef ELF_CORE_COPY_XFPREGS
1656         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1657 #endif
1658         struct memelfnote notes[3];
1659         int num_notes;
1660 };
1661
1662 /*
1663  * In order to add the specific thread information for the elf file format,
1664  * we need to keep a linked list of every threads pr_status and then create
1665  * a single section for them in the final core file.
1666  */
1667 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1668 {
1669         int sz = 0;
1670         struct task_struct *p = t->thread;
1671         t->num_notes = 0;
1672
1673         fill_prstatus(&t->prstatus, p, signr);
1674         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1675         
1676         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1677                   &(t->prstatus));
1678         t->num_notes++;
1679         sz += notesize(&t->notes[0]);
1680
1681         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1682                                                                 &t->fpu))) {
1683                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1684                           &(t->fpu));
1685                 t->num_notes++;
1686                 sz += notesize(&t->notes[1]);
1687         }
1688
1689 #ifdef ELF_CORE_COPY_XFPREGS
1690         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1691                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1692                           sizeof(t->xfpu), &t->xfpu);
1693                 t->num_notes++;
1694                 sz += notesize(&t->notes[2]);
1695         }
1696 #endif  
1697         return sz;
1698 }
1699
1700 struct elf_note_info {
1701         struct memelfnote *notes;
1702         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1703         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1704         struct list_head thread_list;
1705         elf_fpregset_t *fpu;
1706 #ifdef ELF_CORE_COPY_XFPREGS
1707         elf_fpxregset_t *xfpu;
1708 #endif
1709         int thread_status_size;
1710         int numnote;
1711 };
1712
1713 static int fill_note_info(struct elfhdr *elf, int phdrs,
1714                           struct elf_note_info *info,
1715                           long signr, struct pt_regs *regs)
1716 {
1717 #define NUM_NOTES       6
1718         struct list_head *t;
1719
1720         info->notes = NULL;
1721         info->prstatus = NULL;
1722         info->psinfo = NULL;
1723         info->fpu = NULL;
1724 #ifdef ELF_CORE_COPY_XFPREGS
1725         info->xfpu = NULL;
1726 #endif
1727         INIT_LIST_HEAD(&info->thread_list);
1728
1729         info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1730                               GFP_KERNEL);
1731         if (!info->notes)
1732                 return 0;
1733         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1734         if (!info->psinfo)
1735                 return 0;
1736         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1737         if (!info->prstatus)
1738                 return 0;
1739         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1740         if (!info->fpu)
1741                 return 0;
1742 #ifdef ELF_CORE_COPY_XFPREGS
1743         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1744         if (!info->xfpu)
1745                 return 0;
1746 #endif
1747
1748         info->thread_status_size = 0;
1749         if (signr) {
1750                 struct core_thread *ct;
1751                 struct elf_thread_status *ets;
1752
1753                 for (ct = current->mm->core_state->dumper.next;
1754                                                 ct; ct = ct->next) {
1755                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1756                         if (!ets)
1757                                 return 0;
1758
1759                         ets->thread = ct->task;
1760                         list_add(&ets->list, &info->thread_list);
1761                 }
1762
1763                 list_for_each(t, &info->thread_list) {
1764                         int sz;
1765
1766                         ets = list_entry(t, struct elf_thread_status, list);
1767                         sz = elf_dump_thread_status(signr, ets);
1768                         info->thread_status_size += sz;
1769                 }
1770         }
1771         /* now collect the dump for the current */
1772         memset(info->prstatus, 0, sizeof(*info->prstatus));
1773         fill_prstatus(info->prstatus, current, signr);
1774         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1775
1776         /* Set up header */
1777         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1778
1779         /*
1780          * Set up the notes in similar form to SVR4 core dumps made
1781          * with info from their /proc.
1782          */
1783
1784         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1785                   sizeof(*info->prstatus), info->prstatus);
1786         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1787         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1788                   sizeof(*info->psinfo), info->psinfo);
1789
1790         info->numnote = 2;
1791
1792         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1793
1794         /* Try to dump the FPU. */
1795         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1796                                                                info->fpu);
1797         if (info->prstatus->pr_fpvalid)
1798                 fill_note(info->notes + info->numnote++,
1799                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1800 #ifdef ELF_CORE_COPY_XFPREGS
1801         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1802                 fill_note(info->notes + info->numnote++,
1803                           "LINUX", ELF_CORE_XFPREG_TYPE,
1804                           sizeof(*info->xfpu), info->xfpu);
1805 #endif
1806
1807         return 1;
1808
1809 #undef NUM_NOTES
1810 }
1811
1812 static size_t get_note_info_size(struct elf_note_info *info)
1813 {
1814         int sz = 0;
1815         int i;
1816
1817         for (i = 0; i < info->numnote; i++)
1818                 sz += notesize(info->notes + i);
1819
1820         sz += info->thread_status_size;
1821
1822         return sz;
1823 }
1824
1825 static int write_note_info(struct elf_note_info *info,
1826                            struct file *file, loff_t *foffset)
1827 {
1828         int i;
1829         struct list_head *t;
1830
1831         for (i = 0; i < info->numnote; i++)
1832                 if (!writenote(info->notes + i, file, foffset))
1833                         return 0;
1834
1835         /* write out the thread status notes section */
1836         list_for_each(t, &info->thread_list) {
1837                 struct elf_thread_status *tmp =
1838                                 list_entry(t, struct elf_thread_status, list);
1839
1840                 for (i = 0; i < tmp->num_notes; i++)
1841                         if (!writenote(&tmp->notes[i], file, foffset))
1842                                 return 0;
1843         }
1844
1845         return 1;
1846 }
1847
1848 static void free_note_info(struct elf_note_info *info)
1849 {
1850         while (!list_empty(&info->thread_list)) {
1851                 struct list_head *tmp = info->thread_list.next;
1852                 list_del(tmp);
1853                 kfree(list_entry(tmp, struct elf_thread_status, list));
1854         }
1855
1856         kfree(info->prstatus);
1857         kfree(info->psinfo);
1858         kfree(info->notes);
1859         kfree(info->fpu);
1860 #ifdef ELF_CORE_COPY_XFPREGS
1861         kfree(info->xfpu);
1862 #endif
1863 }
1864
1865 #endif
1866
1867 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1868                                         struct vm_area_struct *gate_vma)
1869 {
1870         struct vm_area_struct *ret = tsk->mm->mmap;
1871
1872         if (ret)
1873                 return ret;
1874         return gate_vma;
1875 }
1876 /*
1877  * Helper function for iterating across a vma list.  It ensures that the caller
1878  * will visit `gate_vma' prior to terminating the search.
1879  */
1880 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1881                                         struct vm_area_struct *gate_vma)
1882 {
1883         struct vm_area_struct *ret;
1884
1885         ret = this_vma->vm_next;
1886         if (ret)
1887                 return ret;
1888         if (this_vma == gate_vma)
1889                 return NULL;
1890         return gate_vma;
1891 }
1892
1893 /*
1894  * Actual dumper
1895  *
1896  * This is a two-pass process; first we find the offsets of the bits,
1897  * and then they are actually written out.  If we run out of core limit
1898  * we just truncate.
1899  */
1900 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1901 {
1902         int has_dumped = 0;
1903         mm_segment_t fs;
1904         int segs;
1905         size_t size = 0;
1906         struct vm_area_struct *vma, *gate_vma;
1907         struct elfhdr *elf = NULL;
1908         loff_t offset = 0, dataoff, foffset;
1909         unsigned long mm_flags;
1910         struct elf_note_info info;
1911
1912         /*
1913          * We no longer stop all VM operations.
1914          * 
1915          * This is because those proceses that could possibly change map_count
1916          * or the mmap / vma pages are now blocked in do_exit on current
1917          * finishing this core dump.
1918          *
1919          * Only ptrace can touch these memory addresses, but it doesn't change
1920          * the map_count or the pages allocated. So no possibility of crashing
1921          * exists while dumping the mm->vm_next areas to the core file.
1922          */
1923   
1924         /* alloc memory for large data structures: too large to be on stack */
1925         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1926         if (!elf)
1927                 goto out;
1928         
1929         segs = current->mm->map_count;
1930 #ifdef ELF_CORE_EXTRA_PHDRS
1931         segs += ELF_CORE_EXTRA_PHDRS;
1932 #endif
1933
1934         gate_vma = get_gate_vma(current);
1935         if (gate_vma != NULL)
1936                 segs++;
1937
1938         /*
1939          * Collect all the non-memory information about the process for the
1940          * notes.  This also sets up the file header.
1941          */
1942         if (!fill_note_info(elf, segs + 1, /* including notes section */
1943                             &info, signr, regs))
1944                 goto cleanup;
1945
1946         has_dumped = 1;
1947         current->flags |= PF_DUMPCORE;
1948   
1949         fs = get_fs();
1950         set_fs(KERNEL_DS);
1951
1952         DUMP_WRITE(elf, sizeof(*elf));
1953         offset += sizeof(*elf);                         /* Elf header */
1954         offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1955         foffset = offset;
1956
1957         /* Write notes phdr entry */
1958         {
1959                 struct elf_phdr phdr;
1960                 size_t sz = get_note_info_size(&info);
1961
1962                 sz += elf_coredump_extra_notes_size();
1963
1964                 fill_elf_note_phdr(&phdr, sz, offset);
1965                 offset += sz;
1966                 DUMP_WRITE(&phdr, sizeof(phdr));
1967         }
1968
1969         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1970
1971         /*
1972          * We must use the same mm->flags while dumping core to avoid
1973          * inconsistency between the program headers and bodies, otherwise an
1974          * unusable core file can be generated.
1975          */
1976         mm_flags = current->mm->flags;
1977
1978         /* Write program headers for segments dump */
1979         for (vma = first_vma(current, gate_vma); vma != NULL;
1980                         vma = next_vma(vma, gate_vma)) {
1981                 struct elf_phdr phdr;
1982
1983                 phdr.p_type = PT_LOAD;
1984                 phdr.p_offset = offset;
1985                 phdr.p_vaddr = vma->vm_start;
1986                 phdr.p_paddr = 0;
1987                 phdr.p_filesz = vma_dump_size(vma, mm_flags);
1988                 phdr.p_memsz = vma->vm_end - vma->vm_start;
1989                 offset += phdr.p_filesz;
1990                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1991                 if (vma->vm_flags & VM_WRITE)
1992                         phdr.p_flags |= PF_W;
1993                 if (vma->vm_flags & VM_EXEC)
1994                         phdr.p_flags |= PF_X;
1995                 phdr.p_align = ELF_EXEC_PAGESIZE;
1996
1997                 DUMP_WRITE(&phdr, sizeof(phdr));
1998         }
1999
2000 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2001         ELF_CORE_WRITE_EXTRA_PHDRS;
2002 #endif
2003
2004         /* write out the notes section */
2005         if (!write_note_info(&info, file, &foffset))
2006                 goto end_coredump;
2007
2008         if (elf_coredump_extra_notes_write(file, &foffset))
2009                 goto end_coredump;
2010
2011         /* Align to page */
2012         DUMP_SEEK(dataoff - foffset);
2013
2014         for (vma = first_vma(current, gate_vma); vma != NULL;
2015                         vma = next_vma(vma, gate_vma)) {
2016                 unsigned long addr;
2017                 unsigned long end;
2018
2019                 end = vma->vm_start + vma_dump_size(vma, mm_flags);
2020
2021                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2022                         struct page *page;
2023                         struct vm_area_struct *tmp_vma;
2024
2025                         if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2026                                                 &page, &tmp_vma) <= 0) {
2027                                 DUMP_SEEK(PAGE_SIZE);
2028                         } else {
2029                                 if (page == ZERO_PAGE(0)) {
2030                                         if (!dump_seek(file, PAGE_SIZE)) {
2031                                                 page_cache_release(page);
2032                                                 goto end_coredump;
2033                                         }
2034                                 } else {
2035                                         void *kaddr;
2036                                         flush_cache_page(tmp_vma, addr,
2037                                                          page_to_pfn(page));
2038                                         kaddr = kmap(page);
2039                                         if ((size += PAGE_SIZE) > limit ||
2040                                             !dump_write(file, kaddr,
2041                                             PAGE_SIZE)) {
2042                                                 kunmap(page);
2043                                                 page_cache_release(page);
2044                                                 goto end_coredump;
2045                                         }
2046                                         kunmap(page);
2047                                 }
2048                                 page_cache_release(page);
2049                         }
2050                 }
2051         }
2052
2053 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2054         ELF_CORE_WRITE_EXTRA_DATA;
2055 #endif
2056
2057 end_coredump:
2058         set_fs(fs);
2059
2060 cleanup:
2061         free_note_info(&info);
2062         kfree(elf);
2063 out:
2064         return has_dumped;
2065 }
2066
2067 #endif          /* USE_ELF_CORE_DUMP */
2068
2069 static int __init init_elf_binfmt(void)
2070 {
2071         return register_binfmt(&elf_format);
2072 }
2073
2074 static void __exit exit_elf_binfmt(void)
2075 {
2076         /* Remove the COFF and ELF loaders. */
2077         unregister_binfmt(&elf_format);
2078 }
2079
2080 core_initcall(init_elf_binfmt);
2081 module_exit(exit_elf_binfmt);
2082 MODULE_LICENSE("GPL");