NFS: "open code" the NFS direct write rescheduler
[linux-2.6] / fs / binfmt_aout.c
1 /*
2  *  linux/fs/binfmt_aout.c
3  *
4  *  Copyright (C) 1991, 1992, 1996  Linus Torvalds
5  */
6
7 #include <linux/module.h>
8
9 #include <linux/time.h>
10 #include <linux/kernel.h>
11 #include <linux/mm.h>
12 #include <linux/mman.h>
13 #include <linux/a.out.h>
14 #include <linux/errno.h>
15 #include <linux/signal.h>
16 #include <linux/string.h>
17 #include <linux/fs.h>
18 #include <linux/file.h>
19 #include <linux/stat.h>
20 #include <linux/fcntl.h>
21 #include <linux/ptrace.h>
22 #include <linux/user.h>
23 #include <linux/slab.h>
24 #include <linux/binfmts.h>
25 #include <linux/personality.h>
26 #include <linux/init.h>
27
28 #include <asm/system.h>
29 #include <asm/uaccess.h>
30 #include <asm/cacheflush.h>
31
32 static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
33 static int load_aout_library(struct file*);
34 static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file);
35
36 static struct linux_binfmt aout_format = {
37         .module         = THIS_MODULE,
38         .load_binary    = load_aout_binary,
39         .load_shlib     = load_aout_library,
40         .core_dump      = aout_core_dump,
41         .min_coredump   = PAGE_SIZE
42 };
43
44 #define BAD_ADDR(x)     ((unsigned long)(x) >= TASK_SIZE)
45
46 static int set_brk(unsigned long start, unsigned long end)
47 {
48         start = PAGE_ALIGN(start);
49         end = PAGE_ALIGN(end);
50         if (end > start) {
51                 unsigned long addr;
52                 down_write(&current->mm->mmap_sem);
53                 addr = do_brk(start, end - start);
54                 up_write(&current->mm->mmap_sem);
55                 if (BAD_ADDR(addr))
56                         return addr;
57         }
58         return 0;
59 }
60
61 /*
62  * These are the only things you should do on a core-file: use only these
63  * macros to write out all the necessary info.
64  */
65
66 static int dump_write(struct file *file, const void *addr, int nr)
67 {
68         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
69 }
70
71 #define DUMP_WRITE(addr, nr)    \
72         if (!dump_write(file, (void *)(addr), (nr))) \
73                 goto end_coredump;
74
75 #define DUMP_SEEK(offset) \
76 if (file->f_op->llseek) { \
77         if (file->f_op->llseek(file,(offset),0) != (offset)) \
78                 goto end_coredump; \
79 } else file->f_pos = (offset)
80
81 /*
82  * Routine writes a core dump image in the current directory.
83  * Currently only a stub-function.
84  *
85  * Note that setuid/setgid files won't make a core-dump if the uid/gid
86  * changed due to the set[u|g]id. It's enforced by the "current->mm->dumpable"
87  * field, which also makes sure the core-dumps won't be recursive if the
88  * dumping of the process results in another error..
89  */
90
91 static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file)
92 {
93         mm_segment_t fs;
94         int has_dumped = 0;
95         unsigned long dump_start, dump_size;
96         struct user dump;
97 #if defined(__alpha__)
98 #       define START_DATA(u)    (u.start_data)
99 #elif defined(__arm__)
100 #       define START_DATA(u)    ((u.u_tsize << PAGE_SHIFT) + u.start_code)
101 #elif defined(__sparc__)
102 #       define START_DATA(u)    (u.u_tsize)
103 #elif defined(__i386__) || defined(__mc68000__) || defined(__arch_um__)
104 #       define START_DATA(u)    (u.u_tsize << PAGE_SHIFT)
105 #endif
106 #ifdef __sparc__
107 #       define START_STACK(u)   ((regs->u_regs[UREG_FP]) & ~(PAGE_SIZE - 1))
108 #else
109 #       define START_STACK(u)   (u.start_stack)
110 #endif
111
112         fs = get_fs();
113         set_fs(KERNEL_DS);
114         has_dumped = 1;
115         current->flags |= PF_DUMPCORE;
116         strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
117 #ifndef __sparc__
118         dump.u_ar0 = (void *)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump)));
119 #endif
120         dump.signal = signr;
121         dump_thread(regs, &dump);
122
123 /* If the size of the dump file exceeds the rlimit, then see what would happen
124    if we wrote the stack, but not the data area.  */
125 #ifdef __sparc__
126         if ((dump.u_dsize+dump.u_ssize) >
127             current->signal->rlim[RLIMIT_CORE].rlim_cur)
128                 dump.u_dsize = 0;
129 #else
130         if ((dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE >
131             current->signal->rlim[RLIMIT_CORE].rlim_cur)
132                 dump.u_dsize = 0;
133 #endif
134
135 /* Make sure we have enough room to write the stack and data areas. */
136 #ifdef __sparc__
137         if ((dump.u_ssize) >
138             current->signal->rlim[RLIMIT_CORE].rlim_cur)
139                 dump.u_ssize = 0;
140 #else
141         if ((dump.u_ssize+1) * PAGE_SIZE >
142             current->signal->rlim[RLIMIT_CORE].rlim_cur)
143                 dump.u_ssize = 0;
144 #endif
145
146 /* make sure we actually have a data and stack area to dump */
147         set_fs(USER_DS);
148 #ifdef __sparc__
149         if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize))
150                 dump.u_dsize = 0;
151         if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize))
152                 dump.u_ssize = 0;
153 #else
154         if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
155                 dump.u_dsize = 0;
156         if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
157                 dump.u_ssize = 0;
158 #endif
159
160         set_fs(KERNEL_DS);
161 /* struct user */
162         DUMP_WRITE(&dump,sizeof(dump));
163 /* Now dump all of the user data.  Include malloced stuff as well */
164 #ifndef __sparc__
165         DUMP_SEEK(PAGE_SIZE);
166 #endif
167 /* now we start writing out the user space info */
168         set_fs(USER_DS);
169 /* Dump the data area */
170         if (dump.u_dsize != 0) {
171                 dump_start = START_DATA(dump);
172 #ifdef __sparc__
173                 dump_size = dump.u_dsize;
174 #else
175                 dump_size = dump.u_dsize << PAGE_SHIFT;
176 #endif
177                 DUMP_WRITE(dump_start,dump_size);
178         }
179 /* Now prepare to dump the stack area */
180         if (dump.u_ssize != 0) {
181                 dump_start = START_STACK(dump);
182 #ifdef __sparc__
183                 dump_size = dump.u_ssize;
184 #else
185                 dump_size = dump.u_ssize << PAGE_SHIFT;
186 #endif
187                 DUMP_WRITE(dump_start,dump_size);
188         }
189 /* Finally dump the task struct.  Not be used by gdb, but could be useful */
190         set_fs(KERNEL_DS);
191         DUMP_WRITE(current,sizeof(*current));
192 end_coredump:
193         set_fs(fs);
194         return has_dumped;
195 }
196
197 /*
198  * create_aout_tables() parses the env- and arg-strings in new user
199  * memory and creates the pointer tables from them, and puts their
200  * addresses on the "stack", returning the new stack pointer value.
201  */
202 static unsigned long __user *create_aout_tables(char __user *p, struct linux_binprm * bprm)
203 {
204         char __user * __user *argv;
205         char __user * __user *envp;
206         unsigned long __user *sp;
207         int argc = bprm->argc;
208         int envc = bprm->envc;
209
210         sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p);
211 #ifdef __sparc__
212         /* This imposes the proper stack alignment for a new process. */
213         sp = (void __user *) (((unsigned long) sp) & ~7);
214         if ((envc+argc+3)&1) --sp;
215 #endif
216 #ifdef __alpha__
217 /* whee.. test-programs are so much fun. */
218         put_user(0, --sp);
219         put_user(0, --sp);
220         if (bprm->loader) {
221                 put_user(0, --sp);
222                 put_user(0x3eb, --sp);
223                 put_user(bprm->loader, --sp);
224                 put_user(0x3ea, --sp);
225         }
226         put_user(bprm->exec, --sp);
227         put_user(0x3e9, --sp);
228 #endif
229         sp -= envc+1;
230         envp = (char __user * __user *) sp;
231         sp -= argc+1;
232         argv = (char __user * __user *) sp;
233 #if defined(__i386__) || defined(__mc68000__) || defined(__arm__) || defined(__arch_um__)
234         put_user((unsigned long) envp,--sp);
235         put_user((unsigned long) argv,--sp);
236 #endif
237         put_user(argc,--sp);
238         current->mm->arg_start = (unsigned long) p;
239         while (argc-->0) {
240                 char c;
241                 put_user(p,argv++);
242                 do {
243                         get_user(c,p++);
244                 } while (c);
245         }
246         put_user(NULL,argv);
247         current->mm->arg_end = current->mm->env_start = (unsigned long) p;
248         while (envc-->0) {
249                 char c;
250                 put_user(p,envp++);
251                 do {
252                         get_user(c,p++);
253                 } while (c);
254         }
255         put_user(NULL,envp);
256         current->mm->env_end = (unsigned long) p;
257         return sp;
258 }
259
260 /*
261  * These are the functions used to load a.out style executables and shared
262  * libraries.  There is no binary dependent code anywhere else.
263  */
264
265 static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
266 {
267         struct exec ex;
268         unsigned long error;
269         unsigned long fd_offset;
270         unsigned long rlim;
271         int retval;
272
273         ex = *((struct exec *) bprm->buf);              /* exec-header */
274         if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
275              N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
276             N_TRSIZE(ex) || N_DRSIZE(ex) ||
277             i_size_read(bprm->file->f_dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
278                 return -ENOEXEC;
279         }
280
281         fd_offset = N_TXTOFF(ex);
282
283         /* Check initial limits. This avoids letting people circumvent
284          * size limits imposed on them by creating programs with large
285          * arrays in the data or bss.
286          */
287         rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
288         if (rlim >= RLIM_INFINITY)
289                 rlim = ~0;
290         if (ex.a_data + ex.a_bss > rlim)
291                 return -ENOMEM;
292
293         /* Flush all traces of the currently running executable */
294         retval = flush_old_exec(bprm);
295         if (retval)
296                 return retval;
297
298         /* OK, This is the point of no return */
299 #if defined(__alpha__)
300         SET_AOUT_PERSONALITY(bprm, ex);
301 #elif defined(__sparc__)
302         set_personality(PER_SUNOS);
303 #if !defined(__sparc_v9__)
304         memcpy(&current->thread.core_exec, &ex, sizeof(struct exec));
305 #endif
306 #else
307         set_personality(PER_LINUX);
308 #endif
309
310         current->mm->end_code = ex.a_text +
311                 (current->mm->start_code = N_TXTADDR(ex));
312         current->mm->end_data = ex.a_data +
313                 (current->mm->start_data = N_DATADDR(ex));
314         current->mm->brk = ex.a_bss +
315                 (current->mm->start_brk = N_BSSADDR(ex));
316         current->mm->free_area_cache = current->mm->mmap_base;
317         current->mm->cached_hole_size = 0;
318
319         current->mm->mmap = NULL;
320         compute_creds(bprm);
321         current->flags &= ~PF_FORKNOEXEC;
322 #ifdef __sparc__
323         if (N_MAGIC(ex) == NMAGIC) {
324                 loff_t pos = fd_offset;
325                 /* Fuck me plenty... */
326                 /* <AOL></AOL> */
327                 down_write(&current->mm->mmap_sem);     
328                 error = do_brk(N_TXTADDR(ex), ex.a_text);
329                 up_write(&current->mm->mmap_sem);
330                 bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
331                           ex.a_text, &pos);
332                 down_write(&current->mm->mmap_sem);
333                 error = do_brk(N_DATADDR(ex), ex.a_data);
334                 up_write(&current->mm->mmap_sem);
335                 bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex),
336                           ex.a_data, &pos);
337                 goto beyond_if;
338         }
339 #endif
340
341         if (N_MAGIC(ex) == OMAGIC) {
342                 unsigned long text_addr, map_size;
343                 loff_t pos;
344
345                 text_addr = N_TXTADDR(ex);
346
347 #if defined(__alpha__) || defined(__sparc__)
348                 pos = fd_offset;
349                 map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1;
350 #else
351                 pos = 32;
352                 map_size = ex.a_text+ex.a_data;
353 #endif
354                 down_write(&current->mm->mmap_sem);
355                 error = do_brk(text_addr & PAGE_MASK, map_size);
356                 up_write(&current->mm->mmap_sem);
357                 if (error != (text_addr & PAGE_MASK)) {
358                         send_sig(SIGKILL, current, 0);
359                         return error;
360                 }
361
362                 error = bprm->file->f_op->read(bprm->file,
363                           (char __user *)text_addr,
364                           ex.a_text+ex.a_data, &pos);
365                 if ((signed long)error < 0) {
366                         send_sig(SIGKILL, current, 0);
367                         return error;
368                 }
369                          
370                 flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
371         } else {
372                 static unsigned long error_time, error_time2;
373                 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
374                     (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
375                 {
376                         printk(KERN_NOTICE "executable not page aligned\n");
377                         error_time2 = jiffies;
378                 }
379
380                 if ((fd_offset & ~PAGE_MASK) != 0 &&
381                     (jiffies-error_time) > 5*HZ)
382                 {
383                         printk(KERN_WARNING 
384                                "fd_offset is not page aligned. Please convert program: %s\n",
385                                bprm->file->f_dentry->d_name.name);
386                         error_time = jiffies;
387                 }
388
389                 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
390                         loff_t pos = fd_offset;
391                         down_write(&current->mm->mmap_sem);
392                         do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
393                         up_write(&current->mm->mmap_sem);
394                         bprm->file->f_op->read(bprm->file,
395                                         (char __user *)N_TXTADDR(ex),
396                                         ex.a_text+ex.a_data, &pos);
397                         flush_icache_range((unsigned long) N_TXTADDR(ex),
398                                            (unsigned long) N_TXTADDR(ex) +
399                                            ex.a_text+ex.a_data);
400                         goto beyond_if;
401                 }
402
403                 down_write(&current->mm->mmap_sem);
404                 error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
405                         PROT_READ | PROT_EXEC,
406                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
407                         fd_offset);
408                 up_write(&current->mm->mmap_sem);
409
410                 if (error != N_TXTADDR(ex)) {
411                         send_sig(SIGKILL, current, 0);
412                         return error;
413                 }
414
415                 down_write(&current->mm->mmap_sem);
416                 error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
417                                 PROT_READ | PROT_WRITE | PROT_EXEC,
418                                 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
419                                 fd_offset + ex.a_text);
420                 up_write(&current->mm->mmap_sem);
421                 if (error != N_DATADDR(ex)) {
422                         send_sig(SIGKILL, current, 0);
423                         return error;
424                 }
425         }
426 beyond_if:
427         set_binfmt(&aout_format);
428
429         retval = set_brk(current->mm->start_brk, current->mm->brk);
430         if (retval < 0) {
431                 send_sig(SIGKILL, current, 0);
432                 return retval;
433         }
434
435         retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
436         if (retval < 0) { 
437                 /* Someone check-me: is this error path enough? */ 
438                 send_sig(SIGKILL, current, 0); 
439                 return retval;
440         }
441
442         current->mm->start_stack =
443                 (unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
444 #ifdef __alpha__
445         regs->gp = ex.a_gpvalue;
446 #endif
447         start_thread(regs, ex.a_entry, current->mm->start_stack);
448         if (unlikely(current->ptrace & PT_PTRACED)) {
449                 if (current->ptrace & PT_TRACE_EXEC)
450                         ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
451                 else
452                         send_sig(SIGTRAP, current, 0);
453         }
454         return 0;
455 }
456
457 static int load_aout_library(struct file *file)
458 {
459         struct inode * inode;
460         unsigned long bss, start_addr, len;
461         unsigned long error;
462         int retval;
463         struct exec ex;
464
465         inode = file->f_dentry->d_inode;
466
467         retval = -ENOEXEC;
468         error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
469         if (error != sizeof(ex))
470                 goto out;
471
472         /* We come in here for the regular a.out style of shared libraries */
473         if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
474             N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
475             i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
476                 goto out;
477         }
478
479         if (N_FLAGS(ex))
480                 goto out;
481
482         /* For  QMAGIC, the starting address is 0x20 into the page.  We mask
483            this off to get the starting address for the page */
484
485         start_addr =  ex.a_entry & 0xfffff000;
486
487         if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
488                 static unsigned long error_time;
489                 loff_t pos = N_TXTOFF(ex);
490
491                 if ((jiffies-error_time) > 5*HZ)
492                 {
493                         printk(KERN_WARNING 
494                                "N_TXTOFF is not page aligned. Please convert library: %s\n",
495                                file->f_dentry->d_name.name);
496                         error_time = jiffies;
497                 }
498                 down_write(&current->mm->mmap_sem);
499                 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
500                 up_write(&current->mm->mmap_sem);
501                 
502                 file->f_op->read(file, (char __user *)start_addr,
503                         ex.a_text + ex.a_data, &pos);
504                 flush_icache_range((unsigned long) start_addr,
505                                    (unsigned long) start_addr + ex.a_text + ex.a_data);
506
507                 retval = 0;
508                 goto out;
509         }
510         /* Now use mmap to map the library into memory. */
511         down_write(&current->mm->mmap_sem);
512         error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
513                         PROT_READ | PROT_WRITE | PROT_EXEC,
514                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
515                         N_TXTOFF(ex));
516         up_write(&current->mm->mmap_sem);
517         retval = error;
518         if (error != start_addr)
519                 goto out;
520
521         len = PAGE_ALIGN(ex.a_text + ex.a_data);
522         bss = ex.a_text + ex.a_data + ex.a_bss;
523         if (bss > len) {
524                 down_write(&current->mm->mmap_sem);
525                 error = do_brk(start_addr + len, bss - len);
526                 up_write(&current->mm->mmap_sem);
527                 retval = error;
528                 if (error != start_addr + len)
529                         goto out;
530         }
531         retval = 0;
532 out:
533         return retval;
534 }
535
536 static int __init init_aout_binfmt(void)
537 {
538         return register_binfmt(&aout_format);
539 }
540
541 static void __exit exit_aout_binfmt(void)
542 {
543         unregister_binfmt(&aout_format);
544 }
545
546 core_initcall(init_aout_binfmt);
547 module_exit(exit_aout_binfmt);
548 MODULE_LICENSE("GPL");