Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
[linux-2.6] / fs / binfmt_aout.c
1 /*
2  *  linux/fs/binfmt_aout.c
3  *
4  *  Copyright (C) 1991, 1992, 1996  Linus Torvalds
5  */
6
7 #include <linux/module.h>
8
9 #include <linux/time.h>
10 #include <linux/kernel.h>
11 #include <linux/mm.h>
12 #include <linux/mman.h>
13 #include <linux/a.out.h>
14 #include <linux/errno.h>
15 #include <linux/signal.h>
16 #include <linux/string.h>
17 #include <linux/fs.h>
18 #include <linux/file.h>
19 #include <linux/stat.h>
20 #include <linux/fcntl.h>
21 #include <linux/ptrace.h>
22 #include <linux/user.h>
23 #include <linux/slab.h>
24 #include <linux/binfmts.h>
25 #include <linux/personality.h>
26 #include <linux/init.h>
27
28 #include <asm/system.h>
29 #include <asm/uaccess.h>
30 #include <asm/cacheflush.h>
31
32 static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
33 static int load_aout_library(struct file*);
34 static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file);
35
36 static struct linux_binfmt aout_format = {
37         .module         = THIS_MODULE,
38         .load_binary    = load_aout_binary,
39         .load_shlib     = load_aout_library,
40         .core_dump      = aout_core_dump,
41         .min_coredump   = PAGE_SIZE
42 };
43
44 #define BAD_ADDR(x)     ((unsigned long)(x) >= TASK_SIZE)
45
46 static int set_brk(unsigned long start, unsigned long end)
47 {
48         start = PAGE_ALIGN(start);
49         end = PAGE_ALIGN(end);
50         if (end > start) {
51                 unsigned long addr;
52                 down_write(&current->mm->mmap_sem);
53                 addr = do_brk(start, end - start);
54                 up_write(&current->mm->mmap_sem);
55                 if (BAD_ADDR(addr))
56                         return addr;
57         }
58         return 0;
59 }
60
61 /*
62  * These are the only things you should do on a core-file: use only these
63  * macros to write out all the necessary info.
64  */
65
66 static int dump_write(struct file *file, const void *addr, int nr)
67 {
68         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
69 }
70
71 #define DUMP_WRITE(addr, nr)    \
72         if (!dump_write(file, (void *)(addr), (nr))) \
73                 goto end_coredump;
74
75 #define DUMP_SEEK(offset) \
76 if (file->f_op->llseek) { \
77         if (file->f_op->llseek(file,(offset),0) != (offset)) \
78                 goto end_coredump; \
79 } else file->f_pos = (offset)
80
81 /*
82  * Routine writes a core dump image in the current directory.
83  * Currently only a stub-function.
84  *
85  * Note that setuid/setgid files won't make a core-dump if the uid/gid
86  * changed due to the set[u|g]id. It's enforced by the "current->mm->dumpable"
87  * field, which also makes sure the core-dumps won't be recursive if the
88  * dumping of the process results in another error..
89  */
90
91 static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file)
92 {
93         mm_segment_t fs;
94         int has_dumped = 0;
95         unsigned long dump_start, dump_size;
96         struct user dump;
97 #if defined(__alpha__)
98 #       define START_DATA(u)    (u.start_data)
99 #elif defined(__arm__)
100 #       define START_DATA(u)    ((u.u_tsize << PAGE_SHIFT) + u.start_code)
101 #elif defined(__sparc__)
102 #       define START_DATA(u)    (u.u_tsize)
103 #elif defined(__i386__) || defined(__mc68000__) || defined(__arch_um__)
104 #       define START_DATA(u)    (u.u_tsize << PAGE_SHIFT)
105 #endif
106 #ifdef __sparc__
107 #       define START_STACK(u)   ((regs->u_regs[UREG_FP]) & ~(PAGE_SIZE - 1))
108 #else
109 #       define START_STACK(u)   (u.start_stack)
110 #endif
111
112         fs = get_fs();
113         set_fs(KERNEL_DS);
114         has_dumped = 1;
115         current->flags |= PF_DUMPCORE;
116         strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
117 #ifndef __sparc__
118         dump.u_ar0 = (void *)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump)));
119 #endif
120         dump.signal = signr;
121         dump_thread(regs, &dump);
122
123 /* If the size of the dump file exceeds the rlimit, then see what would happen
124    if we wrote the stack, but not the data area.  */
125 #ifdef __sparc__
126         if ((dump.u_dsize+dump.u_ssize) >
127             current->signal->rlim[RLIMIT_CORE].rlim_cur)
128                 dump.u_dsize = 0;
129 #else
130         if ((dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE >
131             current->signal->rlim[RLIMIT_CORE].rlim_cur)
132                 dump.u_dsize = 0;
133 #endif
134
135 /* Make sure we have enough room to write the stack and data areas. */
136 #ifdef __sparc__
137         if ((dump.u_ssize) >
138             current->signal->rlim[RLIMIT_CORE].rlim_cur)
139                 dump.u_ssize = 0;
140 #else
141         if ((dump.u_ssize+1) * PAGE_SIZE >
142             current->signal->rlim[RLIMIT_CORE].rlim_cur)
143                 dump.u_ssize = 0;
144 #endif
145
146 /* make sure we actually have a data and stack area to dump */
147         set_fs(USER_DS);
148 #ifdef __sparc__
149         if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize))
150                 dump.u_dsize = 0;
151         if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize))
152                 dump.u_ssize = 0;
153 #else
154         if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
155                 dump.u_dsize = 0;
156         if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
157                 dump.u_ssize = 0;
158 #endif
159
160         set_fs(KERNEL_DS);
161 /* struct user */
162         DUMP_WRITE(&dump,sizeof(dump));
163 /* Now dump all of the user data.  Include malloced stuff as well */
164 #ifndef __sparc__
165         DUMP_SEEK(PAGE_SIZE);
166 #endif
167 /* now we start writing out the user space info */
168         set_fs(USER_DS);
169 /* Dump the data area */
170         if (dump.u_dsize != 0) {
171                 dump_start = START_DATA(dump);
172 #ifdef __sparc__
173                 dump_size = dump.u_dsize;
174 #else
175                 dump_size = dump.u_dsize << PAGE_SHIFT;
176 #endif
177                 DUMP_WRITE(dump_start,dump_size);
178         }
179 /* Now prepare to dump the stack area */
180         if (dump.u_ssize != 0) {
181                 dump_start = START_STACK(dump);
182 #ifdef __sparc__
183                 dump_size = dump.u_ssize;
184 #else
185                 dump_size = dump.u_ssize << PAGE_SHIFT;
186 #endif
187                 DUMP_WRITE(dump_start,dump_size);
188         }
189 /* Finally dump the task struct.  Not be used by gdb, but could be useful */
190         set_fs(KERNEL_DS);
191         DUMP_WRITE(current,sizeof(*current));
192 end_coredump:
193         set_fs(fs);
194         return has_dumped;
195 }
196
197 /*
198  * create_aout_tables() parses the env- and arg-strings in new user
199  * memory and creates the pointer tables from them, and puts their
200  * addresses on the "stack", returning the new stack pointer value.
201  */
202 static unsigned long __user *create_aout_tables(char __user *p, struct linux_binprm * bprm)
203 {
204         char __user * __user *argv;
205         char __user * __user *envp;
206         unsigned long __user *sp;
207         int argc = bprm->argc;
208         int envc = bprm->envc;
209
210         sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p);
211 #ifdef __sparc__
212         /* This imposes the proper stack alignment for a new process. */
213         sp = (void __user *) (((unsigned long) sp) & ~7);
214         if ((envc+argc+3)&1) --sp;
215 #endif
216 #ifdef __alpha__
217 /* whee.. test-programs are so much fun. */
218         put_user(0, --sp);
219         put_user(0, --sp);
220         if (bprm->loader) {
221                 put_user(0, --sp);
222                 put_user(0x3eb, --sp);
223                 put_user(bprm->loader, --sp);
224                 put_user(0x3ea, --sp);
225         }
226         put_user(bprm->exec, --sp);
227         put_user(0x3e9, --sp);
228 #endif
229         sp -= envc+1;
230         envp = (char __user * __user *) sp;
231         sp -= argc+1;
232         argv = (char __user * __user *) sp;
233 #if defined(__i386__) || defined(__mc68000__) || defined(__arm__) || defined(__arch_um__)
234         put_user((unsigned long) envp,--sp);
235         put_user((unsigned long) argv,--sp);
236 #endif
237         put_user(argc,--sp);
238         current->mm->arg_start = (unsigned long) p;
239         while (argc-->0) {
240                 char c;
241                 put_user(p,argv++);
242                 do {
243                         get_user(c,p++);
244                 } while (c);
245         }
246         put_user(NULL,argv);
247         current->mm->arg_end = current->mm->env_start = (unsigned long) p;
248         while (envc-->0) {
249                 char c;
250                 put_user(p,envp++);
251                 do {
252                         get_user(c,p++);
253                 } while (c);
254         }
255         put_user(NULL,envp);
256         current->mm->env_end = (unsigned long) p;
257         return sp;
258 }
259
260 /*
261  * These are the functions used to load a.out style executables and shared
262  * libraries.  There is no binary dependent code anywhere else.
263  */
264
265 static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
266 {
267         struct exec ex;
268         unsigned long error;
269         unsigned long fd_offset;
270         unsigned long rlim;
271         int retval;
272
273         ex = *((struct exec *) bprm->buf);              /* exec-header */
274         if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
275              N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
276             N_TRSIZE(ex) || N_DRSIZE(ex) ||
277             i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
278                 return -ENOEXEC;
279         }
280
281         /*
282          * Requires a mmap handler. This prevents people from using a.out
283          * as part of an exploit attack against /proc-related vulnerabilities.
284          */
285         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
286                 return -ENOEXEC;
287
288         fd_offset = N_TXTOFF(ex);
289
290         /* Check initial limits. This avoids letting people circumvent
291          * size limits imposed on them by creating programs with large
292          * arrays in the data or bss.
293          */
294         rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
295         if (rlim >= RLIM_INFINITY)
296                 rlim = ~0;
297         if (ex.a_data + ex.a_bss > rlim)
298                 return -ENOMEM;
299
300         /* Flush all traces of the currently running executable */
301         retval = flush_old_exec(bprm);
302         if (retval)
303                 return retval;
304
305         /* OK, This is the point of no return */
306 #if defined(__alpha__)
307         SET_AOUT_PERSONALITY(bprm, ex);
308 #elif defined(__sparc__)
309         set_personality(PER_SUNOS);
310 #if !defined(__sparc_v9__)
311         memcpy(&current->thread.core_exec, &ex, sizeof(struct exec));
312 #endif
313 #else
314         set_personality(PER_LINUX);
315 #endif
316
317         current->mm->end_code = ex.a_text +
318                 (current->mm->start_code = N_TXTADDR(ex));
319         current->mm->end_data = ex.a_data +
320                 (current->mm->start_data = N_DATADDR(ex));
321         current->mm->brk = ex.a_bss +
322                 (current->mm->start_brk = N_BSSADDR(ex));
323         current->mm->free_area_cache = current->mm->mmap_base;
324         current->mm->cached_hole_size = 0;
325
326         current->mm->mmap = NULL;
327         compute_creds(bprm);
328         current->flags &= ~PF_FORKNOEXEC;
329 #ifdef __sparc__
330         if (N_MAGIC(ex) == NMAGIC) {
331                 loff_t pos = fd_offset;
332                 /* Fuck me plenty... */
333                 /* <AOL></AOL> */
334                 down_write(&current->mm->mmap_sem);     
335                 error = do_brk(N_TXTADDR(ex), ex.a_text);
336                 up_write(&current->mm->mmap_sem);
337                 bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
338                           ex.a_text, &pos);
339                 down_write(&current->mm->mmap_sem);
340                 error = do_brk(N_DATADDR(ex), ex.a_data);
341                 up_write(&current->mm->mmap_sem);
342                 bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex),
343                           ex.a_data, &pos);
344                 goto beyond_if;
345         }
346 #endif
347
348         if (N_MAGIC(ex) == OMAGIC) {
349                 unsigned long text_addr, map_size;
350                 loff_t pos;
351
352                 text_addr = N_TXTADDR(ex);
353
354 #if defined(__alpha__) || defined(__sparc__)
355                 pos = fd_offset;
356                 map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1;
357 #else
358                 pos = 32;
359                 map_size = ex.a_text+ex.a_data;
360 #endif
361                 down_write(&current->mm->mmap_sem);
362                 error = do_brk(text_addr & PAGE_MASK, map_size);
363                 up_write(&current->mm->mmap_sem);
364                 if (error != (text_addr & PAGE_MASK)) {
365                         send_sig(SIGKILL, current, 0);
366                         return error;
367                 }
368
369                 error = bprm->file->f_op->read(bprm->file,
370                           (char __user *)text_addr,
371                           ex.a_text+ex.a_data, &pos);
372                 if ((signed long)error < 0) {
373                         send_sig(SIGKILL, current, 0);
374                         return error;
375                 }
376                          
377                 flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
378         } else {
379                 static unsigned long error_time, error_time2;
380                 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
381                     (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
382                 {
383                         printk(KERN_NOTICE "executable not page aligned\n");
384                         error_time2 = jiffies;
385                 }
386
387                 if ((fd_offset & ~PAGE_MASK) != 0 &&
388                     (jiffies-error_time) > 5*HZ)
389                 {
390                         printk(KERN_WARNING 
391                                "fd_offset is not page aligned. Please convert program: %s\n",
392                                bprm->file->f_path.dentry->d_name.name);
393                         error_time = jiffies;
394                 }
395
396                 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
397                         loff_t pos = fd_offset;
398                         down_write(&current->mm->mmap_sem);
399                         do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
400                         up_write(&current->mm->mmap_sem);
401                         bprm->file->f_op->read(bprm->file,
402                                         (char __user *)N_TXTADDR(ex),
403                                         ex.a_text+ex.a_data, &pos);
404                         flush_icache_range((unsigned long) N_TXTADDR(ex),
405                                            (unsigned long) N_TXTADDR(ex) +
406                                            ex.a_text+ex.a_data);
407                         goto beyond_if;
408                 }
409
410                 down_write(&current->mm->mmap_sem);
411                 error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
412                         PROT_READ | PROT_EXEC,
413                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
414                         fd_offset);
415                 up_write(&current->mm->mmap_sem);
416
417                 if (error != N_TXTADDR(ex)) {
418                         send_sig(SIGKILL, current, 0);
419                         return error;
420                 }
421
422                 down_write(&current->mm->mmap_sem);
423                 error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
424                                 PROT_READ | PROT_WRITE | PROT_EXEC,
425                                 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
426                                 fd_offset + ex.a_text);
427                 up_write(&current->mm->mmap_sem);
428                 if (error != N_DATADDR(ex)) {
429                         send_sig(SIGKILL, current, 0);
430                         return error;
431                 }
432         }
433 beyond_if:
434         set_binfmt(&aout_format);
435
436         retval = set_brk(current->mm->start_brk, current->mm->brk);
437         if (retval < 0) {
438                 send_sig(SIGKILL, current, 0);
439                 return retval;
440         }
441
442         retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
443         if (retval < 0) { 
444                 /* Someone check-me: is this error path enough? */ 
445                 send_sig(SIGKILL, current, 0); 
446                 return retval;
447         }
448
449         current->mm->start_stack =
450                 (unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
451 #ifdef __alpha__
452         regs->gp = ex.a_gpvalue;
453 #endif
454         start_thread(regs, ex.a_entry, current->mm->start_stack);
455         if (unlikely(current->ptrace & PT_PTRACED)) {
456                 if (current->ptrace & PT_TRACE_EXEC)
457                         ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
458                 else
459                         send_sig(SIGTRAP, current, 0);
460         }
461         return 0;
462 }
463
464 static int load_aout_library(struct file *file)
465 {
466         struct inode * inode;
467         unsigned long bss, start_addr, len;
468         unsigned long error;
469         int retval;
470         struct exec ex;
471
472         inode = file->f_path.dentry->d_inode;
473
474         retval = -ENOEXEC;
475         error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
476         if (error != sizeof(ex))
477                 goto out;
478
479         /* We come in here for the regular a.out style of shared libraries */
480         if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
481             N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
482             i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
483                 goto out;
484         }
485
486         /*
487          * Requires a mmap handler. This prevents people from using a.out
488          * as part of an exploit attack against /proc-related vulnerabilities.
489          */
490         if (!file->f_op || !file->f_op->mmap)
491                 goto out;
492
493         if (N_FLAGS(ex))
494                 goto out;
495
496         /* For  QMAGIC, the starting address is 0x20 into the page.  We mask
497            this off to get the starting address for the page */
498
499         start_addr =  ex.a_entry & 0xfffff000;
500
501         if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
502                 static unsigned long error_time;
503                 loff_t pos = N_TXTOFF(ex);
504
505                 if ((jiffies-error_time) > 5*HZ)
506                 {
507                         printk(KERN_WARNING 
508                                "N_TXTOFF is not page aligned. Please convert library: %s\n",
509                                file->f_path.dentry->d_name.name);
510                         error_time = jiffies;
511                 }
512                 down_write(&current->mm->mmap_sem);
513                 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
514                 up_write(&current->mm->mmap_sem);
515                 
516                 file->f_op->read(file, (char __user *)start_addr,
517                         ex.a_text + ex.a_data, &pos);
518                 flush_icache_range((unsigned long) start_addr,
519                                    (unsigned long) start_addr + ex.a_text + ex.a_data);
520
521                 retval = 0;
522                 goto out;
523         }
524         /* Now use mmap to map the library into memory. */
525         down_write(&current->mm->mmap_sem);
526         error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
527                         PROT_READ | PROT_WRITE | PROT_EXEC,
528                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
529                         N_TXTOFF(ex));
530         up_write(&current->mm->mmap_sem);
531         retval = error;
532         if (error != start_addr)
533                 goto out;
534
535         len = PAGE_ALIGN(ex.a_text + ex.a_data);
536         bss = ex.a_text + ex.a_data + ex.a_bss;
537         if (bss > len) {
538                 down_write(&current->mm->mmap_sem);
539                 error = do_brk(start_addr + len, bss - len);
540                 up_write(&current->mm->mmap_sem);
541                 retval = error;
542                 if (error != start_addr + len)
543                         goto out;
544         }
545         retval = 0;
546 out:
547         return retval;
548 }
549
550 static int __init init_aout_binfmt(void)
551 {
552         return register_binfmt(&aout_format);
553 }
554
555 static void __exit exit_aout_binfmt(void)
556 {
557         unregister_binfmt(&aout_format);
558 }
559
560 core_initcall(init_aout_binfmt);
561 module_exit(exit_aout_binfmt);
562 MODULE_LICENSE("GPL");