Fix performance regression on lmbench select benchmark
[linux-2.6] / fs / binfmt_aout.c
1 /*
2  *  linux/fs/binfmt_aout.c
3  *
4  *  Copyright (C) 1991, 1992, 1996  Linus Torvalds
5  */
6
7 #include <linux/module.h>
8
9 #include <linux/time.h>
10 #include <linux/kernel.h>
11 #include <linux/mm.h>
12 #include <linux/mman.h>
13 #include <linux/a.out.h>
14 #include <linux/errno.h>
15 #include <linux/signal.h>
16 #include <linux/string.h>
17 #include <linux/fs.h>
18 #include <linux/file.h>
19 #include <linux/stat.h>
20 #include <linux/fcntl.h>
21 #include <linux/ptrace.h>
22 #include <linux/user.h>
23 #include <linux/slab.h>
24 #include <linux/binfmts.h>
25 #include <linux/personality.h>
26 #include <linux/init.h>
27
28 #include <asm/system.h>
29 #include <asm/uaccess.h>
30 #include <asm/cacheflush.h>
31 #include <asm/a.out-core.h>
32
33 static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
34 static int load_aout_library(struct file*);
35 static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
36
37 static struct linux_binfmt aout_format = {
38         .module         = THIS_MODULE,
39         .load_binary    = load_aout_binary,
40         .load_shlib     = load_aout_library,
41         .core_dump      = aout_core_dump,
42         .min_coredump   = PAGE_SIZE
43 };
44
45 #define BAD_ADDR(x)     ((unsigned long)(x) >= TASK_SIZE)
46
47 static int set_brk(unsigned long start, unsigned long end)
48 {
49         start = PAGE_ALIGN(start);
50         end = PAGE_ALIGN(end);
51         if (end > start) {
52                 unsigned long addr;
53                 down_write(&current->mm->mmap_sem);
54                 addr = do_brk(start, end - start);
55                 up_write(&current->mm->mmap_sem);
56                 if (BAD_ADDR(addr))
57                         return addr;
58         }
59         return 0;
60 }
61
62 /*
63  * These are the only things you should do on a core-file: use only these
64  * macros to write out all the necessary info.
65  */
66
67 static int dump_write(struct file *file, const void *addr, int nr)
68 {
69         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
70 }
71
72 #define DUMP_WRITE(addr, nr)    \
73         if (!dump_write(file, (void *)(addr), (nr))) \
74                 goto end_coredump;
75
76 #define DUMP_SEEK(offset) \
77 if (file->f_op->llseek) { \
78         if (file->f_op->llseek(file,(offset),0) != (offset)) \
79                 goto end_coredump; \
80 } else file->f_pos = (offset)
81
82 /*
83  * Routine writes a core dump image in the current directory.
84  * Currently only a stub-function.
85  *
86  * Note that setuid/setgid files won't make a core-dump if the uid/gid
87  * changed due to the set[u|g]id. It's enforced by the "current->mm->dumpable"
88  * field, which also makes sure the core-dumps won't be recursive if the
89  * dumping of the process results in another error..
90  */
91
92 static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
93 {
94         mm_segment_t fs;
95         int has_dumped = 0;
96         unsigned long dump_start, dump_size;
97         struct user dump;
98 #if defined(__alpha__)
99 #       define START_DATA(u)    (u.start_data)
100 #elif defined(__arm__)
101 #       define START_DATA(u)    ((u.u_tsize << PAGE_SHIFT) + u.start_code)
102 #elif defined(__sparc__)
103 #       define START_DATA(u)    (u.u_tsize)
104 #elif defined(__i386__) || defined(__mc68000__) || defined(__arch_um__)
105 #       define START_DATA(u)    (u.u_tsize << PAGE_SHIFT)
106 #endif
107 #ifdef __sparc__
108 #       define START_STACK(u)   ((regs->u_regs[UREG_FP]) & ~(PAGE_SIZE - 1))
109 #else
110 #       define START_STACK(u)   (u.start_stack)
111 #endif
112
113         fs = get_fs();
114         set_fs(KERNEL_DS);
115         has_dumped = 1;
116         current->flags |= PF_DUMPCORE;
117         strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
118 #ifndef __sparc__
119         dump.u_ar0 = offsetof(struct user, regs);
120 #endif
121         dump.signal = signr;
122         aout_dump_thread(regs, &dump);
123
124 /* If the size of the dump file exceeds the rlimit, then see what would happen
125    if we wrote the stack, but not the data area.  */
126 #ifdef __sparc__
127         if ((dump.u_dsize + dump.u_ssize) > limit)
128                 dump.u_dsize = 0;
129 #else
130         if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit)
131                 dump.u_dsize = 0;
132 #endif
133
134 /* Make sure we have enough room to write the stack and data areas. */
135 #ifdef __sparc__
136         if (dump.u_ssize > limit)
137                 dump.u_ssize = 0;
138 #else
139         if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
140                 dump.u_ssize = 0;
141 #endif
142
143 /* make sure we actually have a data and stack area to dump */
144         set_fs(USER_DS);
145 #ifdef __sparc__
146         if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize))
147                 dump.u_dsize = 0;
148         if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize))
149                 dump.u_ssize = 0;
150 #else
151         if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
152                 dump.u_dsize = 0;
153         if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
154                 dump.u_ssize = 0;
155 #endif
156
157         set_fs(KERNEL_DS);
158 /* struct user */
159         DUMP_WRITE(&dump,sizeof(dump));
160 /* Now dump all of the user data.  Include malloced stuff as well */
161 #ifndef __sparc__
162         DUMP_SEEK(PAGE_SIZE);
163 #endif
164 /* now we start writing out the user space info */
165         set_fs(USER_DS);
166 /* Dump the data area */
167         if (dump.u_dsize != 0) {
168                 dump_start = START_DATA(dump);
169 #ifdef __sparc__
170                 dump_size = dump.u_dsize;
171 #else
172                 dump_size = dump.u_dsize << PAGE_SHIFT;
173 #endif
174                 DUMP_WRITE(dump_start,dump_size);
175         }
176 /* Now prepare to dump the stack area */
177         if (dump.u_ssize != 0) {
178                 dump_start = START_STACK(dump);
179 #ifdef __sparc__
180                 dump_size = dump.u_ssize;
181 #else
182                 dump_size = dump.u_ssize << PAGE_SHIFT;
183 #endif
184                 DUMP_WRITE(dump_start,dump_size);
185         }
186 /* Finally dump the task struct.  Not be used by gdb, but could be useful */
187         set_fs(KERNEL_DS);
188         DUMP_WRITE(current,sizeof(*current));
189 end_coredump:
190         set_fs(fs);
191         return has_dumped;
192 }
193
194 /*
195  * create_aout_tables() parses the env- and arg-strings in new user
196  * memory and creates the pointer tables from them, and puts their
197  * addresses on the "stack", returning the new stack pointer value.
198  */
199 static unsigned long __user *create_aout_tables(char __user *p, struct linux_binprm * bprm)
200 {
201         char __user * __user *argv;
202         char __user * __user *envp;
203         unsigned long __user *sp;
204         int argc = bprm->argc;
205         int envc = bprm->envc;
206
207         sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p);
208 #ifdef __sparc__
209         /* This imposes the proper stack alignment for a new process. */
210         sp = (void __user *) (((unsigned long) sp) & ~7);
211         if ((envc+argc+3)&1) --sp;
212 #endif
213 #ifdef __alpha__
214 /* whee.. test-programs are so much fun. */
215         put_user(0, --sp);
216         put_user(0, --sp);
217         if (bprm->loader) {
218                 put_user(0, --sp);
219                 put_user(0x3eb, --sp);
220                 put_user(bprm->loader, --sp);
221                 put_user(0x3ea, --sp);
222         }
223         put_user(bprm->exec, --sp);
224         put_user(0x3e9, --sp);
225 #endif
226         sp -= envc+1;
227         envp = (char __user * __user *) sp;
228         sp -= argc+1;
229         argv = (char __user * __user *) sp;
230 #if defined(__i386__) || defined(__mc68000__) || defined(__arm__) || defined(__arch_um__)
231         put_user((unsigned long) envp,--sp);
232         put_user((unsigned long) argv,--sp);
233 #endif
234         put_user(argc,--sp);
235         current->mm->arg_start = (unsigned long) p;
236         while (argc-->0) {
237                 char c;
238                 put_user(p,argv++);
239                 do {
240                         get_user(c,p++);
241                 } while (c);
242         }
243         put_user(NULL,argv);
244         current->mm->arg_end = current->mm->env_start = (unsigned long) p;
245         while (envc-->0) {
246                 char c;
247                 put_user(p,envp++);
248                 do {
249                         get_user(c,p++);
250                 } while (c);
251         }
252         put_user(NULL,envp);
253         current->mm->env_end = (unsigned long) p;
254         return sp;
255 }
256
257 /*
258  * These are the functions used to load a.out style executables and shared
259  * libraries.  There is no binary dependent code anywhere else.
260  */
261
262 static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
263 {
264         struct exec ex;
265         unsigned long error;
266         unsigned long fd_offset;
267         unsigned long rlim;
268         int retval;
269
270         ex = *((struct exec *) bprm->buf);              /* exec-header */
271         if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
272              N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
273             N_TRSIZE(ex) || N_DRSIZE(ex) ||
274             i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
275                 return -ENOEXEC;
276         }
277
278         /*
279          * Requires a mmap handler. This prevents people from using a.out
280          * as part of an exploit attack against /proc-related vulnerabilities.
281          */
282         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
283                 return -ENOEXEC;
284
285         fd_offset = N_TXTOFF(ex);
286
287         /* Check initial limits. This avoids letting people circumvent
288          * size limits imposed on them by creating programs with large
289          * arrays in the data or bss.
290          */
291         rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
292         if (rlim >= RLIM_INFINITY)
293                 rlim = ~0;
294         if (ex.a_data + ex.a_bss > rlim)
295                 return -ENOMEM;
296
297         /* Flush all traces of the currently running executable */
298         retval = flush_old_exec(bprm);
299         if (retval)
300                 return retval;
301
302         /* OK, This is the point of no return */
303 #if defined(__alpha__)
304         SET_AOUT_PERSONALITY(bprm, ex);
305 #elif defined(__sparc__)
306         set_personality(PER_SUNOS);
307 #if !defined(__sparc_v9__)
308         memcpy(&current->thread.core_exec, &ex, sizeof(struct exec));
309 #endif
310 #else
311         set_personality(PER_LINUX);
312 #endif
313
314         current->mm->end_code = ex.a_text +
315                 (current->mm->start_code = N_TXTADDR(ex));
316         current->mm->end_data = ex.a_data +
317                 (current->mm->start_data = N_DATADDR(ex));
318         current->mm->brk = ex.a_bss +
319                 (current->mm->start_brk = N_BSSADDR(ex));
320         current->mm->free_area_cache = current->mm->mmap_base;
321         current->mm->cached_hole_size = 0;
322
323         compute_creds(bprm);
324         current->flags &= ~PF_FORKNOEXEC;
325 #ifdef __sparc__
326         if (N_MAGIC(ex) == NMAGIC) {
327                 loff_t pos = fd_offset;
328                 /* Fuck me plenty... */
329                 /* <AOL></AOL> */
330                 down_write(&current->mm->mmap_sem);     
331                 error = do_brk(N_TXTADDR(ex), ex.a_text);
332                 up_write(&current->mm->mmap_sem);
333                 bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
334                           ex.a_text, &pos);
335                 down_write(&current->mm->mmap_sem);
336                 error = do_brk(N_DATADDR(ex), ex.a_data);
337                 up_write(&current->mm->mmap_sem);
338                 bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex),
339                           ex.a_data, &pos);
340                 goto beyond_if;
341         }
342 #endif
343
344         if (N_MAGIC(ex) == OMAGIC) {
345                 unsigned long text_addr, map_size;
346                 loff_t pos;
347
348                 text_addr = N_TXTADDR(ex);
349
350 #if defined(__alpha__) || defined(__sparc__)
351                 pos = fd_offset;
352                 map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1;
353 #else
354                 pos = 32;
355                 map_size = ex.a_text+ex.a_data;
356 #endif
357                 down_write(&current->mm->mmap_sem);
358                 error = do_brk(text_addr & PAGE_MASK, map_size);
359                 up_write(&current->mm->mmap_sem);
360                 if (error != (text_addr & PAGE_MASK)) {
361                         send_sig(SIGKILL, current, 0);
362                         return error;
363                 }
364
365                 error = bprm->file->f_op->read(bprm->file,
366                           (char __user *)text_addr,
367                           ex.a_text+ex.a_data, &pos);
368                 if ((signed long)error < 0) {
369                         send_sig(SIGKILL, current, 0);
370                         return error;
371                 }
372                          
373                 flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
374         } else {
375                 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
376                     (N_MAGIC(ex) != NMAGIC) && printk_ratelimit())
377                 {
378                         printk(KERN_NOTICE "executable not page aligned\n");
379                 }
380
381                 if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit())
382                 {
383                         printk(KERN_WARNING 
384                                "fd_offset is not page aligned. Please convert program: %s\n",
385                                bprm->file->f_path.dentry->d_name.name);
386                 }
387
388                 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
389                         loff_t pos = fd_offset;
390                         down_write(&current->mm->mmap_sem);
391                         do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
392                         up_write(&current->mm->mmap_sem);
393                         bprm->file->f_op->read(bprm->file,
394                                         (char __user *)N_TXTADDR(ex),
395                                         ex.a_text+ex.a_data, &pos);
396                         flush_icache_range((unsigned long) N_TXTADDR(ex),
397                                            (unsigned long) N_TXTADDR(ex) +
398                                            ex.a_text+ex.a_data);
399                         goto beyond_if;
400                 }
401
402                 down_write(&current->mm->mmap_sem);
403                 error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
404                         PROT_READ | PROT_EXEC,
405                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
406                         fd_offset);
407                 up_write(&current->mm->mmap_sem);
408
409                 if (error != N_TXTADDR(ex)) {
410                         send_sig(SIGKILL, current, 0);
411                         return error;
412                 }
413
414                 down_write(&current->mm->mmap_sem);
415                 error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
416                                 PROT_READ | PROT_WRITE | PROT_EXEC,
417                                 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
418                                 fd_offset + ex.a_text);
419                 up_write(&current->mm->mmap_sem);
420                 if (error != N_DATADDR(ex)) {
421                         send_sig(SIGKILL, current, 0);
422                         return error;
423                 }
424         }
425 beyond_if:
426         set_binfmt(&aout_format);
427
428         retval = set_brk(current->mm->start_brk, current->mm->brk);
429         if (retval < 0) {
430                 send_sig(SIGKILL, current, 0);
431                 return retval;
432         }
433
434         retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
435         if (retval < 0) { 
436                 /* Someone check-me: is this error path enough? */ 
437                 send_sig(SIGKILL, current, 0); 
438                 return retval;
439         }
440
441         current->mm->start_stack =
442                 (unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
443 #ifdef __alpha__
444         regs->gp = ex.a_gpvalue;
445 #endif
446         start_thread(regs, ex.a_entry, current->mm->start_stack);
447         if (unlikely(current->ptrace & PT_PTRACED)) {
448                 if (current->ptrace & PT_TRACE_EXEC)
449                         ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
450                 else
451                         send_sig(SIGTRAP, current, 0);
452         }
453         return 0;
454 }
455
456 static int load_aout_library(struct file *file)
457 {
458         struct inode * inode;
459         unsigned long bss, start_addr, len;
460         unsigned long error;
461         int retval;
462         struct exec ex;
463
464         inode = file->f_path.dentry->d_inode;
465
466         retval = -ENOEXEC;
467         error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
468         if (error != sizeof(ex))
469                 goto out;
470
471         /* We come in here for the regular a.out style of shared libraries */
472         if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
473             N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
474             i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
475                 goto out;
476         }
477
478         /*
479          * Requires a mmap handler. This prevents people from using a.out
480          * as part of an exploit attack against /proc-related vulnerabilities.
481          */
482         if (!file->f_op || !file->f_op->mmap)
483                 goto out;
484
485         if (N_FLAGS(ex))
486                 goto out;
487
488         /* For  QMAGIC, the starting address is 0x20 into the page.  We mask
489            this off to get the starting address for the page */
490
491         start_addr =  ex.a_entry & 0xfffff000;
492
493         if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
494                 loff_t pos = N_TXTOFF(ex);
495
496                 if (printk_ratelimit())
497                 {
498                         printk(KERN_WARNING 
499                                "N_TXTOFF is not page aligned. Please convert library: %s\n",
500                                file->f_path.dentry->d_name.name);
501                 }
502                 down_write(&current->mm->mmap_sem);
503                 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
504                 up_write(&current->mm->mmap_sem);
505                 
506                 file->f_op->read(file, (char __user *)start_addr,
507                         ex.a_text + ex.a_data, &pos);
508                 flush_icache_range((unsigned long) start_addr,
509                                    (unsigned long) start_addr + ex.a_text + ex.a_data);
510
511                 retval = 0;
512                 goto out;
513         }
514         /* Now use mmap to map the library into memory. */
515         down_write(&current->mm->mmap_sem);
516         error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
517                         PROT_READ | PROT_WRITE | PROT_EXEC,
518                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
519                         N_TXTOFF(ex));
520         up_write(&current->mm->mmap_sem);
521         retval = error;
522         if (error != start_addr)
523                 goto out;
524
525         len = PAGE_ALIGN(ex.a_text + ex.a_data);
526         bss = ex.a_text + ex.a_data + ex.a_bss;
527         if (bss > len) {
528                 down_write(&current->mm->mmap_sem);
529                 error = do_brk(start_addr + len, bss - len);
530                 up_write(&current->mm->mmap_sem);
531                 retval = error;
532                 if (error != start_addr + len)
533                         goto out;
534         }
535         retval = 0;
536 out:
537         return retval;
538 }
539
540 static int __init init_aout_binfmt(void)
541 {
542         return register_binfmt(&aout_format);
543 }
544
545 static void __exit exit_aout_binfmt(void)
546 {
547         unregister_binfmt(&aout_format);
548 }
549
550 core_initcall(init_aout_binfmt);
551 module_exit(exit_aout_binfmt);
552 MODULE_LICENSE("GPL");