x86/paravirt, 64-bit: don't restore user rsp within sysret
[linux-2.6] / arch / m68k / kernel / sys_m68k.c
1 /*
2  * linux/arch/m68k/kernel/sys_m68k.c
3  *
4  * This file contains various random system calls that
5  * have a non-standard calling sequence on the Linux/m68k
6  * platform.
7  */
8
9 #include <linux/capability.h>
10 #include <linux/errno.h>
11 #include <linux/sched.h>
12 #include <linux/mm.h>
13 #include <linux/fs.h>
14 #include <linux/smp.h>
15 #include <linux/smp_lock.h>
16 #include <linux/sem.h>
17 #include <linux/msg.h>
18 #include <linux/shm.h>
19 #include <linux/stat.h>
20 #include <linux/syscalls.h>
21 #include <linux/mman.h>
22 #include <linux/file.h>
23 #include <linux/utsname.h>
24 #include <linux/ipc.h>
25
26 #include <asm/setup.h>
27 #include <asm/uaccess.h>
28 #include <asm/cachectl.h>
29 #include <asm/traps.h>
30 #include <asm/page.h>
31 #include <asm/unistd.h>
32
33 /* common code for old and new mmaps */
34 static inline long do_mmap2(
35         unsigned long addr, unsigned long len,
36         unsigned long prot, unsigned long flags,
37         unsigned long fd, unsigned long pgoff)
38 {
39         int error = -EBADF;
40         struct file * file = NULL;
41
42         flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
43         if (!(flags & MAP_ANONYMOUS)) {
44                 file = fget(fd);
45                 if (!file)
46                         goto out;
47         }
48
49         down_write(&current->mm->mmap_sem);
50         error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
51         up_write(&current->mm->mmap_sem);
52
53         if (file)
54                 fput(file);
55 out:
56         return error;
57 }
58
59 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
60         unsigned long prot, unsigned long flags,
61         unsigned long fd, unsigned long pgoff)
62 {
63         return do_mmap2(addr, len, prot, flags, fd, pgoff);
64 }
65
66 /*
67  * Perform the select(nd, in, out, ex, tv) and mmap() system
68  * calls. Linux/m68k cloned Linux/i386, which didn't use to be able to
69  * handle more than 4 system call parameters, so these system calls
70  * used a memory block for parameter passing..
71  */
72
73 struct mmap_arg_struct {
74         unsigned long addr;
75         unsigned long len;
76         unsigned long prot;
77         unsigned long flags;
78         unsigned long fd;
79         unsigned long offset;
80 };
81
82 asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
83 {
84         struct mmap_arg_struct a;
85         int error = -EFAULT;
86
87         if (copy_from_user(&a, arg, sizeof(a)))
88                 goto out;
89
90         error = -EINVAL;
91         if (a.offset & ~PAGE_MASK)
92                 goto out;
93
94         a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
95
96         error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
97 out:
98         return error;
99 }
100
101 #if 0
102 struct mmap_arg_struct64 {
103         __u32 addr;
104         __u32 len;
105         __u32 prot;
106         __u32 flags;
107         __u64 offset; /* 64 bits */
108         __u32 fd;
109 };
110
111 asmlinkage long sys_mmap64(struct mmap_arg_struct64 *arg)
112 {
113         int error = -EFAULT;
114         struct file * file = NULL;
115         struct mmap_arg_struct64 a;
116         unsigned long pgoff;
117
118         if (copy_from_user(&a, arg, sizeof(a)))
119                 return -EFAULT;
120
121         if ((long)a.offset & ~PAGE_MASK)
122                 return -EINVAL;
123
124         pgoff = a.offset >> PAGE_SHIFT;
125         if ((a.offset >> PAGE_SHIFT) != pgoff)
126                 return -EINVAL;
127
128         if (!(a.flags & MAP_ANONYMOUS)) {
129                 error = -EBADF;
130                 file = fget(a.fd);
131                 if (!file)
132                         goto out;
133         }
134         a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
135
136         down_write(&current->mm->mmap_sem);
137         error = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, pgoff);
138         up_write(&current->mm->mmap_sem);
139         if (file)
140                 fput(file);
141 out:
142         return error;
143 }
144 #endif
145
146 struct sel_arg_struct {
147         unsigned long n;
148         fd_set __user *inp, *outp, *exp;
149         struct timeval __user *tvp;
150 };
151
152 asmlinkage int old_select(struct sel_arg_struct __user *arg)
153 {
154         struct sel_arg_struct a;
155
156         if (copy_from_user(&a, arg, sizeof(a)))
157                 return -EFAULT;
158         /* sys_select() does the appropriate kernel locking */
159         return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
160 }
161
162 /*
163  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
164  *
165  * This is really horribly ugly.
166  */
167 asmlinkage int sys_ipc (uint call, int first, int second,
168                         int third, void __user *ptr, long fifth)
169 {
170         int version, ret;
171
172         version = call >> 16; /* hack for backward compatibility */
173         call &= 0xffff;
174
175         if (call <= SEMCTL)
176                 switch (call) {
177                 case SEMOP:
178                         return sys_semop (first, ptr, second);
179                 case SEMGET:
180                         return sys_semget (first, second, third);
181                 case SEMCTL: {
182                         union semun fourth;
183                         if (!ptr)
184                                 return -EINVAL;
185                         if (get_user(fourth.__pad, (void __user *__user *) ptr))
186                                 return -EFAULT;
187                         return sys_semctl (first, second, third, fourth);
188                         }
189                 default:
190                         return -ENOSYS;
191                 }
192         if (call <= MSGCTL)
193                 switch (call) {
194                 case MSGSND:
195                         return sys_msgsnd (first, ptr, second, third);
196                 case MSGRCV:
197                         switch (version) {
198                         case 0: {
199                                 struct ipc_kludge tmp;
200                                 if (!ptr)
201                                         return -EINVAL;
202                                 if (copy_from_user (&tmp, ptr, sizeof (tmp)))
203                                         return -EFAULT;
204                                 return sys_msgrcv (first, tmp.msgp, second,
205                                                    tmp.msgtyp, third);
206                                 }
207                         default:
208                                 return sys_msgrcv (first, ptr,
209                                                    second, fifth, third);
210                         }
211                 case MSGGET:
212                         return sys_msgget ((key_t) first, second);
213                 case MSGCTL:
214                         return sys_msgctl (first, second, ptr);
215                 default:
216                         return -ENOSYS;
217                 }
218         if (call <= SHMCTL)
219                 switch (call) {
220                 case SHMAT:
221                         switch (version) {
222                         default: {
223                                 ulong raddr;
224                                 ret = do_shmat (first, ptr, second, &raddr);
225                                 if (ret)
226                                         return ret;
227                                 return put_user (raddr, (ulong __user *) third);
228                         }
229                         }
230                 case SHMDT:
231                         return sys_shmdt (ptr);
232                 case SHMGET:
233                         return sys_shmget (first, second, third);
234                 case SHMCTL:
235                         return sys_shmctl (first, second, ptr);
236                 default:
237                         return -ENOSYS;
238                 }
239
240         return -EINVAL;
241 }
242
243 /* Convert virtual (user) address VADDR to physical address PADDR */
244 #define virt_to_phys_040(vaddr)                                         \
245 ({                                                                      \
246   unsigned long _mmusr, _paddr;                                         \
247                                                                         \
248   __asm__ __volatile__ (".chip 68040\n\t"                               \
249                         "ptestr (%1)\n\t"                               \
250                         "movec %%mmusr,%0\n\t"                          \
251                         ".chip 68k"                                     \
252                         : "=r" (_mmusr)                                 \
253                         : "a" (vaddr));                                 \
254   _paddr = (_mmusr & MMU_R_040) ? (_mmusr & PAGE_MASK) : 0;             \
255   _paddr;                                                               \
256 })
257
258 static inline int
259 cache_flush_040 (unsigned long addr, int scope, int cache, unsigned long len)
260 {
261   unsigned long paddr, i;
262
263   switch (scope)
264     {
265     case FLUSH_SCOPE_ALL:
266       switch (cache)
267         {
268         case FLUSH_CACHE_DATA:
269           /* This nop is needed for some broken versions of the 68040.  */
270           __asm__ __volatile__ ("nop\n\t"
271                                 ".chip 68040\n\t"
272                                 "cpusha %dc\n\t"
273                                 ".chip 68k");
274           break;
275         case FLUSH_CACHE_INSN:
276           __asm__ __volatile__ ("nop\n\t"
277                                 ".chip 68040\n\t"
278                                 "cpusha %ic\n\t"
279                                 ".chip 68k");
280           break;
281         default:
282         case FLUSH_CACHE_BOTH:
283           __asm__ __volatile__ ("nop\n\t"
284                                 ".chip 68040\n\t"
285                                 "cpusha %bc\n\t"
286                                 ".chip 68k");
287           break;
288         }
289       break;
290
291     case FLUSH_SCOPE_LINE:
292       /* Find the physical address of the first mapped page in the
293          address range.  */
294       if ((paddr = virt_to_phys_040(addr))) {
295         paddr += addr & ~(PAGE_MASK | 15);
296         len = (len + (addr & 15) + 15) >> 4;
297       } else {
298         unsigned long tmp = PAGE_SIZE - (addr & ~PAGE_MASK);
299
300         if (len <= tmp)
301           return 0;
302         addr += tmp;
303         len -= tmp;
304         tmp = PAGE_SIZE;
305         for (;;)
306           {
307             if ((paddr = virt_to_phys_040(addr)))
308               break;
309             if (len <= tmp)
310               return 0;
311             addr += tmp;
312             len -= tmp;
313           }
314         len = (len + 15) >> 4;
315       }
316       i = (PAGE_SIZE - (paddr & ~PAGE_MASK)) >> 4;
317       while (len--)
318         {
319           switch (cache)
320             {
321             case FLUSH_CACHE_DATA:
322               __asm__ __volatile__ ("nop\n\t"
323                                     ".chip 68040\n\t"
324                                     "cpushl %%dc,(%0)\n\t"
325                                     ".chip 68k"
326                                     : : "a" (paddr));
327               break;
328             case FLUSH_CACHE_INSN:
329               __asm__ __volatile__ ("nop\n\t"
330                                     ".chip 68040\n\t"
331                                     "cpushl %%ic,(%0)\n\t"
332                                     ".chip 68k"
333                                     : : "a" (paddr));
334               break;
335             default:
336             case FLUSH_CACHE_BOTH:
337               __asm__ __volatile__ ("nop\n\t"
338                                     ".chip 68040\n\t"
339                                     "cpushl %%bc,(%0)\n\t"
340                                     ".chip 68k"
341                                     : : "a" (paddr));
342               break;
343             }
344           if (!--i && len)
345             {
346               /*
347                * No need to page align here since it is done by
348                * virt_to_phys_040().
349                */
350               addr += PAGE_SIZE;
351               i = PAGE_SIZE / 16;
352               /* Recompute physical address when crossing a page
353                  boundary. */
354               for (;;)
355                 {
356                   if ((paddr = virt_to_phys_040(addr)))
357                     break;
358                   if (len <= i)
359                     return 0;
360                   len -= i;
361                   addr += PAGE_SIZE;
362                 }
363             }
364           else
365             paddr += 16;
366         }
367       break;
368
369     default:
370     case FLUSH_SCOPE_PAGE:
371       len += (addr & ~PAGE_MASK) + (PAGE_SIZE - 1);
372       for (len >>= PAGE_SHIFT; len--; addr += PAGE_SIZE)
373         {
374           if (!(paddr = virt_to_phys_040(addr)))
375             continue;
376           switch (cache)
377             {
378             case FLUSH_CACHE_DATA:
379               __asm__ __volatile__ ("nop\n\t"
380                                     ".chip 68040\n\t"
381                                     "cpushp %%dc,(%0)\n\t"
382                                     ".chip 68k"
383                                     : : "a" (paddr));
384               break;
385             case FLUSH_CACHE_INSN:
386               __asm__ __volatile__ ("nop\n\t"
387                                     ".chip 68040\n\t"
388                                     "cpushp %%ic,(%0)\n\t"
389                                     ".chip 68k"
390                                     : : "a" (paddr));
391               break;
392             default:
393             case FLUSH_CACHE_BOTH:
394               __asm__ __volatile__ ("nop\n\t"
395                                     ".chip 68040\n\t"
396                                     "cpushp %%bc,(%0)\n\t"
397                                     ".chip 68k"
398                                     : : "a" (paddr));
399               break;
400             }
401         }
402       break;
403     }
404   return 0;
405 }
406
407 #define virt_to_phys_060(vaddr)                         \
408 ({                                                      \
409   unsigned long paddr;                                  \
410   __asm__ __volatile__ (".chip 68060\n\t"               \
411                         "plpar (%0)\n\t"                \
412                         ".chip 68k"                     \
413                         : "=a" (paddr)                  \
414                         : "0" (vaddr));                 \
415   (paddr); /* XXX */                                    \
416 })
417
418 static inline int
419 cache_flush_060 (unsigned long addr, int scope, int cache, unsigned long len)
420 {
421   unsigned long paddr, i;
422
423   /*
424    * 68060 manual says:
425    *  cpush %dc : flush DC, remains valid (with our %cacr setup)
426    *  cpush %ic : invalidate IC
427    *  cpush %bc : flush DC + invalidate IC
428    */
429   switch (scope)
430     {
431     case FLUSH_SCOPE_ALL:
432       switch (cache)
433         {
434         case FLUSH_CACHE_DATA:
435           __asm__ __volatile__ (".chip 68060\n\t"
436                                 "cpusha %dc\n\t"
437                                 ".chip 68k");
438           break;
439         case FLUSH_CACHE_INSN:
440           __asm__ __volatile__ (".chip 68060\n\t"
441                                 "cpusha %ic\n\t"
442                                 ".chip 68k");
443           break;
444         default:
445         case FLUSH_CACHE_BOTH:
446           __asm__ __volatile__ (".chip 68060\n\t"
447                                 "cpusha %bc\n\t"
448                                 ".chip 68k");
449           break;
450         }
451       break;
452
453     case FLUSH_SCOPE_LINE:
454       /* Find the physical address of the first mapped page in the
455          address range.  */
456       len += addr & 15;
457       addr &= -16;
458       if (!(paddr = virt_to_phys_060(addr))) {
459         unsigned long tmp = PAGE_SIZE - (addr & ~PAGE_MASK);
460
461         if (len <= tmp)
462           return 0;
463         addr += tmp;
464         len -= tmp;
465         tmp = PAGE_SIZE;
466         for (;;)
467           {
468             if ((paddr = virt_to_phys_060(addr)))
469               break;
470             if (len <= tmp)
471               return 0;
472             addr += tmp;
473             len -= tmp;
474           }
475       }
476       len = (len + 15) >> 4;
477       i = (PAGE_SIZE - (paddr & ~PAGE_MASK)) >> 4;
478       while (len--)
479         {
480           switch (cache)
481             {
482             case FLUSH_CACHE_DATA:
483               __asm__ __volatile__ (".chip 68060\n\t"
484                                     "cpushl %%dc,(%0)\n\t"
485                                     ".chip 68k"
486                                     : : "a" (paddr));
487               break;
488             case FLUSH_CACHE_INSN:
489               __asm__ __volatile__ (".chip 68060\n\t"
490                                     "cpushl %%ic,(%0)\n\t"
491                                     ".chip 68k"
492                                     : : "a" (paddr));
493               break;
494             default:
495             case FLUSH_CACHE_BOTH:
496               __asm__ __volatile__ (".chip 68060\n\t"
497                                     "cpushl %%bc,(%0)\n\t"
498                                     ".chip 68k"
499                                     : : "a" (paddr));
500               break;
501             }
502           if (!--i && len)
503             {
504
505               /*
506                * We just want to jump to the first cache line
507                * in the next page.
508                */
509               addr += PAGE_SIZE;
510               addr &= PAGE_MASK;
511
512               i = PAGE_SIZE / 16;
513               /* Recompute physical address when crossing a page
514                  boundary. */
515               for (;;)
516                 {
517                   if ((paddr = virt_to_phys_060(addr)))
518                     break;
519                   if (len <= i)
520                     return 0;
521                   len -= i;
522                   addr += PAGE_SIZE;
523                 }
524             }
525           else
526             paddr += 16;
527         }
528       break;
529
530     default:
531     case FLUSH_SCOPE_PAGE:
532       len += (addr & ~PAGE_MASK) + (PAGE_SIZE - 1);
533       addr &= PAGE_MASK;        /* Workaround for bug in some
534                                    revisions of the 68060 */
535       for (len >>= PAGE_SHIFT; len--; addr += PAGE_SIZE)
536         {
537           if (!(paddr = virt_to_phys_060(addr)))
538             continue;
539           switch (cache)
540             {
541             case FLUSH_CACHE_DATA:
542               __asm__ __volatile__ (".chip 68060\n\t"
543                                     "cpushp %%dc,(%0)\n\t"
544                                     ".chip 68k"
545                                     : : "a" (paddr));
546               break;
547             case FLUSH_CACHE_INSN:
548               __asm__ __volatile__ (".chip 68060\n\t"
549                                     "cpushp %%ic,(%0)\n\t"
550                                     ".chip 68k"
551                                     : : "a" (paddr));
552               break;
553             default:
554             case FLUSH_CACHE_BOTH:
555               __asm__ __volatile__ (".chip 68060\n\t"
556                                     "cpushp %%bc,(%0)\n\t"
557                                     ".chip 68k"
558                                     : : "a" (paddr));
559               break;
560             }
561         }
562       break;
563     }
564   return 0;
565 }
566
567 /* sys_cacheflush -- flush (part of) the processor cache.  */
568 asmlinkage int
569 sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
570 {
571         struct vm_area_struct *vma;
572         int ret = -EINVAL;
573
574         lock_kernel();
575         if (scope < FLUSH_SCOPE_LINE || scope > FLUSH_SCOPE_ALL ||
576             cache & ~FLUSH_CACHE_BOTH)
577                 goto out;
578
579         if (scope == FLUSH_SCOPE_ALL) {
580                 /* Only the superuser may explicitly flush the whole cache. */
581                 ret = -EPERM;
582                 if (!capable(CAP_SYS_ADMIN))
583                         goto out;
584         } else {
585                 /*
586                  * Verify that the specified address region actually belongs
587                  * to this process.
588                  */
589                 vma = find_vma (current->mm, addr);
590                 ret = -EINVAL;
591                 /* Check for overflow.  */
592                 if (addr + len < addr)
593                         goto out;
594                 if (vma == NULL || addr < vma->vm_start || addr + len > vma->vm_end)
595                         goto out;
596         }
597
598         if (CPU_IS_020_OR_030) {
599                 if (scope == FLUSH_SCOPE_LINE && len < 256) {
600                         unsigned long cacr;
601                         __asm__ ("movec %%cacr, %0" : "=r" (cacr));
602                         if (cache & FLUSH_CACHE_INSN)
603                                 cacr |= 4;
604                         if (cache & FLUSH_CACHE_DATA)
605                                 cacr |= 0x400;
606                         len >>= 2;
607                         while (len--) {
608                                 __asm__ __volatile__ ("movec %1, %%caar\n\t"
609                                                       "movec %0, %%cacr"
610                                                       : /* no outputs */
611                                                       : "r" (cacr), "r" (addr));
612                                 addr += 4;
613                         }
614                 } else {
615                         /* Flush the whole cache, even if page granularity requested. */
616                         unsigned long cacr;
617                         __asm__ ("movec %%cacr, %0" : "=r" (cacr));
618                         if (cache & FLUSH_CACHE_INSN)
619                                 cacr |= 8;
620                         if (cache & FLUSH_CACHE_DATA)
621                                 cacr |= 0x800;
622                         __asm__ __volatile__ ("movec %0, %%cacr" : : "r" (cacr));
623                 }
624                 ret = 0;
625                 goto out;
626         } else {
627             /*
628              * 040 or 060: don't blindly trust 'scope', someone could
629              * try to flush a few megs of memory.
630              */
631
632             if (len>=3*PAGE_SIZE && scope<FLUSH_SCOPE_PAGE)
633                 scope=FLUSH_SCOPE_PAGE;
634             if (len>=10*PAGE_SIZE && scope<FLUSH_SCOPE_ALL)
635                 scope=FLUSH_SCOPE_ALL;
636             if (CPU_IS_040) {
637                 ret = cache_flush_040 (addr, scope, cache, len);
638             } else if (CPU_IS_060) {
639                 ret = cache_flush_060 (addr, scope, cache, len);
640             }
641         }
642 out:
643         unlock_kernel();
644         return ret;
645 }
646
647 asmlinkage int sys_getpagesize(void)
648 {
649         return PAGE_SIZE;
650 }
651
652 /*
653  * Do a system call from kernel instead of calling sys_execve so we
654  * end up with proper pt_regs.
655  */
656 int kernel_execve(const char *filename, char *const argv[], char *const envp[])
657 {
658         register long __res asm ("%d0") = __NR_execve;
659         register long __a asm ("%d1") = (long)(filename);
660         register long __b asm ("%d2") = (long)(argv);
661         register long __c asm ("%d3") = (long)(envp);
662         asm volatile ("trap  #0" : "+d" (__res)
663                         : "d" (__a), "d" (__b), "d" (__c));
664         return __res;
665 }