Merge branch 'cell-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/arnd/cell...
[linux-2.6] / arch / ia64 / kernel / fsys.S
1 /*
2  * This file contains the light-weight system call handlers (fsyscall-handlers).
3  *
4  * Copyright (C) 2003 Hewlett-Packard Co
5  *      David Mosberger-Tang <davidm@hpl.hp.com>
6  *
7  * 25-Sep-03 davidm     Implement fsys_rt_sigprocmask().
8  * 18-Feb-03 louisk     Implement fsys_gettimeofday().
9  * 28-Feb-03 davidm     Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
10  *                      probably broke it along the way... ;-)
11  * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise fsys_gettimeofday to make
12  *                      it capable of using memory based clocks without falling back to C code.
13  * 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
14  *
15  */
16
17 #include <asm/asmmacro.h>
18 #include <asm/errno.h>
19 #include <asm/asm-offsets.h>
20 #include <asm/percpu.h>
21 #include <asm/thread_info.h>
22 #include <asm/sal.h>
23 #include <asm/signal.h>
24 #include <asm/system.h>
25 #include <asm/unistd.h>
26
27 #include "entry.h"
28
29 /*
30  * See Documentation/ia64/fsys.txt for details on fsyscalls.
31  *
32  * On entry to an fsyscall handler:
33  *   r10        = 0 (i.e., defaults to "successful syscall return")
34  *   r11        = saved ar.pfs (a user-level value)
35  *   r15        = system call number
36  *   r16        = "current" task pointer (in normal kernel-mode, this is in r13)
37  *   r32-r39    = system call arguments
38  *   b6         = return address (a user-level value)
39  *   ar.pfs     = previous frame-state (a user-level value)
40  *   PSR.be     = cleared to zero (i.e., little-endian byte order is in effect)
41  *   all other registers may contain values passed in from user-mode
42  *
43  * On return from an fsyscall handler:
44  *   r11        = saved ar.pfs (as passed into the fsyscall handler)
45  *   r15        = system call number (as passed into the fsyscall handler)
46  *   r32-r39    = system call arguments (as passed into the fsyscall handler)
47  *   b6         = return address (as passed into the fsyscall handler)
48  *   ar.pfs     = previous frame-state (as passed into the fsyscall handler)
49  */
50
51 ENTRY(fsys_ni_syscall)
52         .prologue
53         .altrp b6
54         .body
55         mov r8=ENOSYS
56         mov r10=-1
57         FSYS_RETURN
58 END(fsys_ni_syscall)
59
60 ENTRY(fsys_getpid)
61         .prologue
62         .altrp b6
63         .body
64         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
65         ;;
66         ld4 r9=[r9]
67         add r8=IA64_TASK_TGID_OFFSET,r16
68         ;;
69         and r9=TIF_ALLWORK_MASK,r9
70         ld4 r8=[r8]                             // r8 = current->tgid
71         ;;
72         cmp.ne p8,p0=0,r9
73 (p8)    br.spnt.many fsys_fallback_syscall
74         FSYS_RETURN
75 END(fsys_getpid)
76
77 ENTRY(fsys_getppid)
78         .prologue
79         .altrp b6
80         .body
81         add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
82         ;;
83         ld8 r17=[r17]                           // r17 = current->group_leader
84         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
85         ;;
86
87         ld4 r9=[r9]
88         add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
89         ;;
90         and r9=TIF_ALLWORK_MASK,r9
91
92 1:      ld8 r18=[r17]                           // r18 = current->group_leader->real_parent
93         ;;
94         cmp.ne p8,p0=0,r9
95         add r8=IA64_TASK_TGID_OFFSET,r18        // r8 = &current->group_leader->real_parent->tgid
96         ;;
97
98         /*
99          * The .acq is needed to ensure that the read of tgid has returned its data before
100          * we re-check "real_parent".
101          */
102         ld4.acq r8=[r8]                         // r8 = current->group_leader->real_parent->tgid
103 #ifdef CONFIG_SMP
104         /*
105          * Re-read current->group_leader->real_parent.
106          */
107         ld8 r19=[r17]                           // r19 = current->group_leader->real_parent
108 (p8)    br.spnt.many fsys_fallback_syscall
109         ;;
110         cmp.ne p6,p0=r18,r19                    // did real_parent change?
111         mov r19=0                       // i must not leak kernel bits...
112 (p6)    br.cond.spnt.few 1b                     // yes -> redo the read of tgid and the check
113         ;;
114         mov r17=0                       // i must not leak kernel bits...
115         mov r18=0                       // i must not leak kernel bits...
116 #else
117         mov r17=0                       // i must not leak kernel bits...
118         mov r18=0                       // i must not leak kernel bits...
119         mov r19=0                       // i must not leak kernel bits...
120 #endif
121         FSYS_RETURN
122 END(fsys_getppid)
123
124 ENTRY(fsys_set_tid_address)
125         .prologue
126         .altrp b6
127         .body
128         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
129         ;;
130         ld4 r9=[r9]
131         tnat.z p6,p7=r32                // check argument register for being NaT
132         ;;
133         and r9=TIF_ALLWORK_MASK,r9
134         add r8=IA64_TASK_PID_OFFSET,r16
135         add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
136         ;;
137         ld4 r8=[r8]
138         cmp.ne p8,p0=0,r9
139         mov r17=-1
140         ;;
141 (p6)    st8 [r18]=r32
142 (p7)    st8 [r18]=r17
143 (p8)    br.spnt.many fsys_fallback_syscall
144         ;;
145         mov r17=0                       // i must not leak kernel bits...
146         mov r18=0                       // i must not leak kernel bits...
147         FSYS_RETURN
148 END(fsys_set_tid_address)
149
150 /*
151  * Ensure that the time interpolator structure is compatible with the asm code
152  */
153 #if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
154         || IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
155 #error fsys_gettimeofday incompatible with changes to struct time_interpolator
156 #endif
157 #define CLOCK_REALTIME 0
158 #define CLOCK_MONOTONIC 1
159 #define CLOCK_DIVIDE_BY_1000 0x4000
160 #define CLOCK_ADD_MONOTONIC 0x8000
161
162 ENTRY(fsys_gettimeofday)
163         .prologue
164         .altrp b6
165         .body
166         mov r31 = r32
167         tnat.nz p6,p0 = r33             // guard against NaT argument
168 (p6)    br.cond.spnt.few .fail_einval
169         mov r30 = CLOCK_DIVIDE_BY_1000
170         ;;
171 .gettime:
172         // Register map
173         // Incoming r31 = pointer to address where to place result
174         //          r30 = flags determining how time is processed
175         // r2,r3 = temp r4-r7 preserved
176         // r8 = result nanoseconds
177         // r9 = result seconds
178         // r10 = temporary storage for clock difference
179         // r11 = preserved: saved ar.pfs
180         // r12 = preserved: memory stack
181         // r13 = preserved: thread pointer
182         // r14 = address of mask / mask
183         // r15 = preserved: system call number
184         // r16 = preserved: current task pointer
185         // r17 = wall to monotonic use
186         // r18 = time_interpolator->offset
187         // r19 = address of wall_to_monotonic
188         // r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
189         // r21 = shift factor
190         // r22 = address of time interpolator->last_counter
191         // r23 = address of time_interpolator->last_cycle
192         // r24 = adress of time_interpolator->offset
193         // r25 = last_cycle value
194         // r26 = last_counter value
195         // r27 = pointer to xtime
196         // r28 = sequence number at the beginning of critcal section
197         // r29 = address of seqlock
198         // r30 = time processing flags / memory address
199         // r31 = pointer to result
200         // Predicates
201         // p6,p7 short term use
202         // p8 = timesource ar.itc
203         // p9 = timesource mmio64
204         // p10 = timesource mmio32
205         // p11 = timesource not to be handled by asm code
206         // p12 = memory time source ( = p9 | p10)
207         // p13 = do cmpxchg with time_interpolator_last_cycle
208         // p14 = Divide by 1000
209         // p15 = Add monotonic
210         //
211         // Note that instructions are optimized for McKinley. McKinley can process two
212         // bundles simultaneously and therefore we continuously try to feed the CPU
213         // two bundles and then a stop.
214         tnat.nz p6,p0 = r31     // branch deferred since it does not fit into bundle structure
215         mov pr = r30,0xc000     // Set predicates according to function
216         add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
217         movl r20 = time_interpolator
218         ;;
219         ld8 r20 = [r20]         // get pointer to time_interpolator structure
220         movl r29 = xtime_lock
221         ld4 r2 = [r2]           // process work pending flags
222         movl r27 = xtime
223         ;;      // only one bundle here
224         ld8 r21 = [r20]         // first quad with control information
225         and r2 = TIF_ALLWORK_MASK,r2
226 (p6)    br.cond.spnt.few .fail_einval   // deferred branch
227         ;;
228         add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
229         extr r3 = r21,32,32     // time_interpolator->nsec_per_cyc
230         extr r8 = r21,0,16      // time_interpolator->source
231         cmp.ne p6, p0 = 0, r2   // Fallback if work is scheduled
232 (p6)    br.cond.spnt.many fsys_fallback_syscall
233         ;;
234         cmp.eq p8,p12 = 0,r8    // Check for cpu timer
235         cmp.eq p9,p0 = 1,r8     // MMIO64 ?
236         extr r2 = r21,24,8      // time_interpolator->jitter
237         cmp.eq p10,p0 = 2,r8    // MMIO32 ?
238         cmp.ltu p11,p0 = 2,r8   // function or other clock
239 (p11)   br.cond.spnt.many fsys_fallback_syscall
240         ;;
241         setf.sig f7 = r3        // Setup for scaling of counter
242 (p15)   movl r19 = wall_to_monotonic
243 (p12)   ld8 r30 = [r10]
244         cmp.ne p13,p0 = r2,r0   // need jitter compensation?
245         extr r21 = r21,16,8     // shift factor
246         ;;
247 .time_redo:
248         .pred.rel.mutex p8,p9,p10
249         ld4.acq r28 = [r29]     // xtime_lock.sequence. Must come first for locking purposes
250 (p8)    mov r2 = ar.itc         // CPU_TIMER. 36 clocks latency!!!
251         add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
252 (p9)    ld8 r2 = [r30]          // readq(ti->address). Could also have latency issues..
253 (p10)   ld4 r2 = [r30]          // readw(ti->address)
254 (p13)   add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
255         ;;                      // could be removed by moving the last add upward
256         ld8 r26 = [r22]         // time_interpolator->last_counter
257 (p13)   ld8 r25 = [r23]         // time interpolator->last_cycle
258         add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
259 (p15)   ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
260         ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
261         add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
262         ;;
263         ld8 r18 = [r24]         // time_interpolator->offset
264         ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET    // xtime.tv_nsec
265 (p13)   sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
266         ;;
267         ld8 r14 = [r14]         // time_interpolator->mask
268 (p13)   cmp.gt.unc p6,p7 = r3,r0        // check if it is less than last. p6,p7 cleared
269         sub r10 = r2,r26        // current_counter - last_counter
270         ;;
271 (p6)    sub r10 = r25,r26       // time we got was less than last_cycle
272 (p7)    mov ar.ccv = r25        // more than last_cycle. Prep for cmpxchg
273         ;;
274         and r10 = r10,r14       // Apply mask
275         ;;
276         setf.sig f8 = r10
277         nop.i 123
278         ;;
279 (p7)    cmpxchg8.rel r3 = [r23],r2,ar.ccv
280 EX(.fail_efault, probe.w.fault r31, 3)  // This takes 5 cycles and we have spare time
281         xmpy.l f8 = f8,f7       // nsec_per_cyc*(counter-last_counter)
282 (p15)   add r9 = r9,r17         // Add wall to monotonic.secs to result secs
283         ;;
284 (p15)   ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
285 (p7)    cmp.ne p7,p0 = r25,r3   // if cmpxchg not successful redo
286         // simulate tbit.nz.or p7,p0 = r28,0
287         and r28 = ~1,r28        // Make sequence even to force retry if odd
288         getf.sig r2 = f8
289         mf
290         add r8 = r8,r18         // Add time interpolator offset
291         ;;
292         ld4 r10 = [r29]         // xtime_lock.sequence
293 (p15)   add r8 = r8, r17        // Add monotonic.nsecs to nsecs
294         shr.u r2 = r2,r21
295         ;;              // overloaded 3 bundles!
296         // End critical section.
297         add r8 = r8,r2          // Add xtime.nsecs
298         cmp4.ne.or p7,p0 = r28,r10
299 (p7)    br.cond.dpnt.few .time_redo     // sequence number changed ?
300         // Now r8=tv->tv_nsec and r9=tv->tv_sec
301         mov r10 = r0
302         movl r2 = 1000000000
303         add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
304 (p14)   movl r3 = 2361183241434822607   // Prep for / 1000 hack
305         ;;
306 .time_normalize:
307         mov r21 = r8
308         cmp.ge p6,p0 = r8,r2
309 (p14)   shr.u r20 = r8, 3               // We can repeat this if necessary just wasting some time
310         ;;
311 (p14)   setf.sig f8 = r20
312 (p6)    sub r8 = r8,r2
313 (p6)    add r9 = 1,r9                   // two nops before the branch.
314 (p14)   setf.sig f7 = r3                // Chances for repeats are 1 in 10000 for gettod
315 (p6)    br.cond.dpnt.few .time_normalize
316         ;;
317         // Divided by 8 though shift. Now divide by 125
318         // The compiler was able to do that with a multiply
319         // and a shift and we do the same
320 EX(.fail_efault, probe.w.fault r23, 3)          // This also costs 5 cycles
321 (p14)   xmpy.hu f8 = f8, f7                     // xmpy has 5 cycles latency so use it...
322         ;;
323         mov r8 = r0
324 (p14)   getf.sig r2 = f8
325         ;;
326 (p14)   shr.u r21 = r2, 4
327         ;;
328 EX(.fail_efault, st8 [r31] = r9)
329 EX(.fail_efault, st8 [r23] = r21)
330         FSYS_RETURN
331 .fail_einval:
332         mov r8 = EINVAL
333         mov r10 = -1
334         FSYS_RETURN
335 .fail_efault:
336         mov r8 = EFAULT
337         mov r10 = -1
338         FSYS_RETURN
339 END(fsys_gettimeofday)
340
341 ENTRY(fsys_clock_gettime)
342         .prologue
343         .altrp b6
344         .body
345         cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
346         // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
347 (p6)    br.spnt.few fsys_fallback_syscall
348         mov r31 = r33
349         shl r30 = r32,15
350         br.many .gettime
351 END(fsys_clock_gettime)
352
353 /*
354  * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
355  */
356 #if _NSIG_WORDS != 1
357 # error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
358 #endif
359 ENTRY(fsys_rt_sigprocmask)
360         .prologue
361         .altrp b6
362         .body
363
364         add r2=IA64_TASK_BLOCKED_OFFSET,r16
365         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
366         cmp4.ltu p6,p0=SIG_SETMASK,r32
367
368         cmp.ne p15,p0=r0,r34                    // oset != NULL?
369         tnat.nz p8,p0=r34
370         add r31=IA64_TASK_SIGHAND_OFFSET,r16
371         ;;
372         ld8 r3=[r2]                             // read/prefetch current->blocked
373         ld4 r9=[r9]
374         tnat.nz.or p6,p0=r35
375
376         cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
377         tnat.nz.or p6,p0=r32
378 (p6)    br.spnt.few .fail_einval                // fail with EINVAL
379         ;;
380 #ifdef CONFIG_SMP
381         ld8 r31=[r31]                           // r31 <- current->sighand
382 #endif
383         and r9=TIF_ALLWORK_MASK,r9
384         tnat.nz.or p8,p0=r33
385         ;;
386         cmp.ne p7,p0=0,r9
387         cmp.eq p6,p0=r0,r33                     // set == NULL?
388         add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
389 (p8)    br.spnt.few .fail_efault                // fail with EFAULT
390 (p7)    br.spnt.many fsys_fallback_syscall      // got pending kernel work...
391 (p6)    br.dpnt.many .store_mask                // -> short-circuit to just reading the signal mask
392
393         /* Argh, we actually have to do some work and _update_ the signal mask: */
394
395 EX(.fail_efault, probe.r.fault r33, 3)          // verify user has read-access to *set
396 EX(.fail_efault, ld8 r14=[r33])                 // r14 <- *set
397         mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
398         ;;
399
400         rsm psr.i                               // mask interrupt delivery
401         mov ar.ccv=0
402         andcm r14=r14,r17                       // filter out SIGKILL & SIGSTOP
403
404 #ifdef CONFIG_SMP
405         mov r17=1
406         ;;
407         cmpxchg4.acq r18=[r31],r17,ar.ccv       // try to acquire the lock
408         mov r8=EINVAL                   // default to EINVAL
409         ;;
410         ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
411         cmp4.ne p6,p0=r18,r0
412 (p6)    br.cond.spnt.many .lock_contention
413         ;;
414 #else
415         ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
416         mov r8=EINVAL                   // default to EINVAL
417 #endif
418         add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
419         add r19=IA64_TASK_SIGNAL_OFFSET,r16
420         cmp4.eq p6,p0=SIG_BLOCK,r32
421         ;;
422         ld8 r19=[r19]                   // r19 <- current->signal
423         cmp4.eq p7,p0=SIG_UNBLOCK,r32
424         cmp4.eq p8,p0=SIG_SETMASK,r32
425         ;;
426         ld8 r18=[r18]                   // r18 <- current->pending.signal
427         .pred.rel.mutex p6,p7,p8
428 (p6)    or r14=r3,r14                   // SIG_BLOCK
429 (p7)    andcm r14=r3,r14                // SIG_UNBLOCK
430
431 (p8)    mov r14=r14                     // SIG_SETMASK
432 (p6)    mov r8=0                        // clear error code
433         // recalc_sigpending()
434         add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
435
436         add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
437         ;;
438         ld4 r17=[r17]           // r17 <- current->signal->group_stop_count
439 (p7)    mov r8=0                // clear error code
440
441         ld8 r19=[r19]           // r19 <- current->signal->shared_pending
442         ;;
443         cmp4.gt p6,p7=r17,r0    // p6/p7 <- (current->signal->group_stop_count > 0)?
444 (p8)    mov r8=0                // clear error code
445
446         or r18=r18,r19          // r18 <- current->pending | current->signal->shared_pending
447         ;;
448         // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
449         andcm r18=r18,r14
450         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
451         ;;
452
453 (p7)    cmp.ne.or.andcm p6,p7=r18,r0            // p6/p7 <- signal pending
454         mov r19=0                                       // i must not leak kernel bits...
455 (p6)    br.cond.dpnt.many .sig_pending
456         ;;
457
458 1:      ld4 r17=[r9]                            // r17 <- current->thread_info->flags
459         ;;
460         mov ar.ccv=r17
461         and r18=~_TIF_SIGPENDING,r17            // r18 <- r17 & ~(1 << TIF_SIGPENDING)
462         ;;
463
464         st8 [r2]=r14                            // update current->blocked with new mask
465         cmpxchg4.acq r8=[r9],r18,ar.ccv         // current->thread_info->flags <- r18
466         ;;
467         cmp.ne p6,p0=r17,r8                     // update failed?
468 (p6)    br.cond.spnt.few 1b                     // yes -> retry
469
470 #ifdef CONFIG_SMP
471         st4.rel [r31]=r0                        // release the lock
472 #endif
473         ssm psr.i
474         ;;
475
476         srlz.d                                  // ensure psr.i is set again
477         mov r18=0                                       // i must not leak kernel bits...
478
479 .store_mask:
480 EX(.fail_efault, (p15) probe.w.fault r34, 3)    // verify user has write-access to *oset
481 EX(.fail_efault, (p15) st8 [r34]=r3)
482         mov r2=0                                        // i must not leak kernel bits...
483         mov r3=0                                        // i must not leak kernel bits...
484         mov r8=0                                // return 0
485         mov r9=0                                        // i must not leak kernel bits...
486         mov r14=0                                       // i must not leak kernel bits...
487         mov r17=0                                       // i must not leak kernel bits...
488         mov r31=0                                       // i must not leak kernel bits...
489         FSYS_RETURN
490
491 .sig_pending:
492 #ifdef CONFIG_SMP
493         st4.rel [r31]=r0                        // release the lock
494 #endif
495         ssm psr.i
496         ;;
497         srlz.d
498         br.sptk.many fsys_fallback_syscall      // with signal pending, do the heavy-weight syscall
499
500 #ifdef CONFIG_SMP
501 .lock_contention:
502         /* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
503         ssm psr.i
504         ;;
505         srlz.d
506         br.sptk.many fsys_fallback_syscall
507 #endif
508 END(fsys_rt_sigprocmask)
509
510 /*
511  * fsys_getcpu doesn't use the third parameter in this implementation. It reads
512  * current_thread_info()->cpu and corresponding node in cpu_to_node_map.
513  */
514 ENTRY(fsys_getcpu)
515         .prologue
516         .altrp b6
517         .body
518         ;;
519         add r2=TI_FLAGS+IA64_TASK_SIZE,r16
520         tnat.nz p6,p0 = r32                     // guard against NaT argument
521         add r3=TI_CPU+IA64_TASK_SIZE,r16
522         ;;
523         ld4 r3=[r3]                             // M r3 = thread_info->cpu
524         ld4 r2=[r2]                             // M r2 = thread_info->flags
525 (p6)    br.cond.spnt.few .fail_einval           // B
526         ;;
527         tnat.nz p7,p0 = r33                     // I guard against NaT argument
528 (p7)    br.cond.spnt.few .fail_einval           // B
529 #ifdef CONFIG_NUMA
530         movl r17=cpu_to_node_map
531         ;;
532 EX(.fail_efault, probe.w.fault r32, 3)          // M This takes 5 cycles
533 EX(.fail_efault, probe.w.fault r33, 3)          // M This takes 5 cycles
534         shladd r18=r3,1,r17
535         ;;
536         ld2 r20=[r18]                           // r20 = cpu_to_node_map[cpu]
537         and r2 = TIF_ALLWORK_MASK,r2
538         ;;
539         cmp.ne p8,p0=0,r2
540 (p8)    br.spnt.many fsys_fallback_syscall
541         ;;
542         ;;
543 EX(.fail_efault, st4 [r32] = r3)
544 EX(.fail_efault, st2 [r33] = r20)
545         mov r8=0
546         ;;
547 #else
548 EX(.fail_efault, probe.w.fault r32, 3)          // M This takes 5 cycles
549 EX(.fail_efault, probe.w.fault r33, 3)          // M This takes 5 cycles
550         and r2 = TIF_ALLWORK_MASK,r2
551         ;;
552         cmp.ne p8,p0=0,r2
553 (p8)    br.spnt.many fsys_fallback_syscall
554         ;;
555 EX(.fail_efault, st4 [r32] = r3)
556 EX(.fail_efault, st2 [r33] = r0)
557         mov r8=0
558         ;;
559 #endif
560         FSYS_RETURN
561 END(fsys_getcpu)
562
563 ENTRY(fsys_fallback_syscall)
564         .prologue
565         .altrp b6
566         .body
567         /*
568          * We only get here from light-weight syscall handlers.  Thus, we already
569          * know that r15 contains a valid syscall number.  No need to re-check.
570          */
571         adds r17=-1024,r15
572         movl r14=sys_call_table
573         ;;
574         rsm psr.i
575         shladd r18=r17,3,r14
576         ;;
577         ld8 r18=[r18]                           // load normal (heavy-weight) syscall entry-point
578         mov r29=psr                             // read psr (12 cyc load latency)
579         mov r27=ar.rsc
580         mov r21=ar.fpsr
581         mov r26=ar.pfs
582 END(fsys_fallback_syscall)
583         /* FALL THROUGH */
584 GLOBAL_ENTRY(fsys_bubble_down)
585         .prologue
586         .altrp b6
587         .body
588         /*
589          * We get here for syscalls that don't have a lightweight
590          * handler.  For those, we need to bubble down into the kernel
591          * and that requires setting up a minimal pt_regs structure,
592          * and initializing the CPU state more or less as if an
593          * interruption had occurred.  To make syscall-restarts work,
594          * we setup pt_regs such that cr_iip points to the second
595          * instruction in syscall_via_break.  Decrementing the IP
596          * hence will restart the syscall via break and not
597          * decrementing IP will return us to the caller, as usual.
598          * Note that we preserve the value of psr.pp rather than
599          * initializing it from dcr.pp.  This makes it possible to
600          * distinguish fsyscall execution from other privileged
601          * execution.
602          *
603          * On entry:
604          *      - normal fsyscall handler register usage, except
605          *        that we also have:
606          *      - r18: address of syscall entry point
607          *      - r21: ar.fpsr
608          *      - r26: ar.pfs
609          *      - r27: ar.rsc
610          *      - r29: psr
611          *
612          * We used to clear some PSR bits here but that requires slow
613          * serialization.  Fortuntely, that isn't really necessary.
614          * The rationale is as follows: we used to clear bits
615          * ~PSR_PRESERVED_BITS in PSR.L.  Since
616          * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
617          * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
618          * However,
619          *
620          * PSR.BE : already is turned off in __kernel_syscall_via_epc()
621          * PSR.AC : don't care (kernel normally turns PSR.AC on)
622          * PSR.I  : already turned off by the time fsys_bubble_down gets
623          *          invoked
624          * PSR.DFL: always 0 (kernel never turns it on)
625          * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
626          *          initiative
627          * PSR.DI : always 0 (kernel never turns it on)
628          * PSR.SI : always 0 (kernel never turns it on)
629          * PSR.DB : don't care --- kernel never enables kernel-level
630          *          breakpoints
631          * PSR.TB : must be 0 already; if it wasn't zero on entry to
632          *          __kernel_syscall_via_epc, the branch to fsys_bubble_down
633          *          will trigger a taken branch; the taken-trap-handler then
634          *          converts the syscall into a break-based system-call.
635          */
636         /*
637          * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
638          * The rest we have to synthesize.
639          */
640 #       define PSR_ONE_BITS             ((3 << IA64_PSR_CPL0_BIT)       \
641                                          | (0x1 << IA64_PSR_RI_BIT)     \
642                                          | IA64_PSR_BN | IA64_PSR_I)
643
644         invala                                  // M0|1
645         movl r14=ia64_ret_from_syscall          // X
646
647         nop.m 0
648         movl r28=__kernel_syscall_via_break     // X    create cr.iip
649         ;;
650
651         mov r2=r16                              // A    get task addr to addl-addressable register
652         adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
653         mov r31=pr                              // I0   save pr (2 cyc)
654         ;;
655         st1 [r16]=r0                            // M2|3 clear current->thread.on_ustack flag
656         addl r22=IA64_RBS_OFFSET,r2             // A    compute base of RBS
657         add r3=TI_FLAGS+IA64_TASK_SIZE,r2       // A
658         ;;
659         ld4 r3=[r3]                             // M0|1 r3 = current_thread_info()->flags
660         lfetch.fault.excl.nt1 [r22]             // M0|1 prefetch register backing-store
661         nop.i 0
662         ;;
663         mov ar.rsc=0                            // M2   set enforced lazy mode, pl 0, LE, loadrs=0
664         nop.m 0
665         nop.i 0
666         ;;
667         mov r23=ar.bspstore                     // M2 (12 cyc) save ar.bspstore
668         mov.m r24=ar.rnat                       // M2 (5 cyc) read ar.rnat (dual-issues!)
669         nop.i 0
670         ;;
671         mov ar.bspstore=r22                     // M2 (6 cyc) switch to kernel RBS
672         movl r8=PSR_ONE_BITS                    // X
673         ;;
674         mov r25=ar.unat                         // M2 (5 cyc) save ar.unat
675         mov r19=b6                              // I0   save b6 (2 cyc)
676         mov r20=r1                              // A    save caller's gp in r20
677         ;;
678         or r29=r8,r29                           // A    construct cr.ipsr value to save
679         mov b6=r18                              // I0   copy syscall entry-point to b6 (7 cyc)
680         addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
681
682         mov r18=ar.bsp                          // M2   save (kernel) ar.bsp (12 cyc)
683         cmp.ne pKStk,pUStk=r0,r0                // A    set pKStk <- 0, pUStk <- 1
684         br.call.sptk.many b7=ia64_syscall_setup // B
685         ;;
686         mov ar.rsc=0x3                          // M2   set eager mode, pl 0, LE, loadrs=0
687         mov rp=r14                              // I0   set the real return addr
688         and r3=_TIF_SYSCALL_TRACEAUDIT,r3       // A
689         ;;
690         ssm psr.i                               // M2   we're on kernel stacks now, reenable irqs
691         cmp.eq p8,p0=r3,r0                      // A
692 (p10)   br.cond.spnt.many ia64_ret_from_syscall // B    return if bad call-frame or r15 is a NaT
693
694         nop.m 0
695 (p8)    br.call.sptk.many b6=b6                 // B    (ignore return address)
696         br.cond.spnt ia64_trace_syscall         // B
697 END(fsys_bubble_down)
698
699         .rodata
700         .align 8
701         .globl fsyscall_table
702
703         data8 fsys_bubble_down
704 fsyscall_table:
705         data8 fsys_ni_syscall
706         data8 0                         // exit                 // 1025
707         data8 0                         // read
708         data8 0                         // write
709         data8 0                         // open
710         data8 0                         // close
711         data8 0                         // creat                // 1030
712         data8 0                         // link
713         data8 0                         // unlink
714         data8 0                         // execve
715         data8 0                         // chdir
716         data8 0                         // fchdir               // 1035
717         data8 0                         // utimes
718         data8 0                         // mknod
719         data8 0                         // chmod
720         data8 0                         // chown
721         data8 0                         // lseek                // 1040
722         data8 fsys_getpid               // getpid
723         data8 fsys_getppid              // getppid
724         data8 0                         // mount
725         data8 0                         // umount
726         data8 0                         // setuid               // 1045
727         data8 0                         // getuid
728         data8 0                         // geteuid
729         data8 0                         // ptrace
730         data8 0                         // access
731         data8 0                         // sync                 // 1050
732         data8 0                         // fsync
733         data8 0                         // fdatasync
734         data8 0                         // kill
735         data8 0                         // rename
736         data8 0                         // mkdir                // 1055
737         data8 0                         // rmdir
738         data8 0                         // dup
739         data8 0                         // pipe
740         data8 0                         // times
741         data8 0                         // brk                  // 1060
742         data8 0                         // setgid
743         data8 0                         // getgid
744         data8 0                         // getegid
745         data8 0                         // acct
746         data8 0                         // ioctl                // 1065
747         data8 0                         // fcntl
748         data8 0                         // umask
749         data8 0                         // chroot
750         data8 0                         // ustat
751         data8 0                         // dup2                 // 1070
752         data8 0                         // setreuid
753         data8 0                         // setregid
754         data8 0                         // getresuid
755         data8 0                         // setresuid
756         data8 0                         // getresgid            // 1075
757         data8 0                         // setresgid
758         data8 0                         // getgroups
759         data8 0                         // setgroups
760         data8 0                         // getpgid
761         data8 0                         // setpgid              // 1080
762         data8 0                         // setsid
763         data8 0                         // getsid
764         data8 0                         // sethostname
765         data8 0                         // setrlimit
766         data8 0                         // getrlimit            // 1085
767         data8 0                         // getrusage
768         data8 fsys_gettimeofday         // gettimeofday
769         data8 0                         // settimeofday
770         data8 0                         // select
771         data8 0                         // poll                 // 1090
772         data8 0                         // symlink
773         data8 0                         // readlink
774         data8 0                         // uselib
775         data8 0                         // swapon
776         data8 0                         // swapoff              // 1095
777         data8 0                         // reboot
778         data8 0                         // truncate
779         data8 0                         // ftruncate
780         data8 0                         // fchmod
781         data8 0                         // fchown               // 1100
782         data8 0                         // getpriority
783         data8 0                         // setpriority
784         data8 0                         // statfs
785         data8 0                         // fstatfs
786         data8 0                         // gettid               // 1105
787         data8 0                         // semget
788         data8 0                         // semop
789         data8 0                         // semctl
790         data8 0                         // msgget
791         data8 0                         // msgsnd               // 1110
792         data8 0                         // msgrcv
793         data8 0                         // msgctl
794         data8 0                         // shmget
795         data8 0                         // shmat
796         data8 0                         // shmdt                // 1115
797         data8 0                         // shmctl
798         data8 0                         // syslog
799         data8 0                         // setitimer
800         data8 0                         // getitimer
801         data8 0                                                 // 1120
802         data8 0
803         data8 0
804         data8 0                         // vhangup
805         data8 0                         // lchown
806         data8 0                         // remap_file_pages     // 1125
807         data8 0                         // wait4
808         data8 0                         // sysinfo
809         data8 0                         // clone
810         data8 0                         // setdomainname
811         data8 0                         // newuname             // 1130
812         data8 0                         // adjtimex
813         data8 0
814         data8 0                         // init_module
815         data8 0                         // delete_module
816         data8 0                                                 // 1135
817         data8 0
818         data8 0                         // quotactl
819         data8 0                         // bdflush
820         data8 0                         // sysfs
821         data8 0                         // personality          // 1140
822         data8 0                         // afs_syscall
823         data8 0                         // setfsuid
824         data8 0                         // setfsgid
825         data8 0                         // getdents
826         data8 0                         // flock                // 1145
827         data8 0                         // readv
828         data8 0                         // writev
829         data8 0                         // pread64
830         data8 0                         // pwrite64
831         data8 0                         // sysctl               // 1150
832         data8 0                         // mmap
833         data8 0                         // munmap
834         data8 0                         // mlock
835         data8 0                         // mlockall
836         data8 0                         // mprotect             // 1155
837         data8 0                         // mremap
838         data8 0                         // msync
839         data8 0                         // munlock
840         data8 0                         // munlockall
841         data8 0                         // sched_getparam       // 1160
842         data8 0                         // sched_setparam
843         data8 0                         // sched_getscheduler
844         data8 0                         // sched_setscheduler
845         data8 0                         // sched_yield
846         data8 0                         // sched_get_priority_max       // 1165
847         data8 0                         // sched_get_priority_min
848         data8 0                         // sched_rr_get_interval
849         data8 0                         // nanosleep
850         data8 0                         // nfsservctl
851         data8 0                         // prctl                // 1170
852         data8 0                         // getpagesize
853         data8 0                         // mmap2
854         data8 0                         // pciconfig_read
855         data8 0                         // pciconfig_write
856         data8 0                         // perfmonctl           // 1175
857         data8 0                         // sigaltstack
858         data8 0                         // rt_sigaction
859         data8 0                         // rt_sigpending
860         data8 fsys_rt_sigprocmask       // rt_sigprocmask
861         data8 0                         // rt_sigqueueinfo      // 1180
862         data8 0                         // rt_sigreturn
863         data8 0                         // rt_sigsuspend
864         data8 0                         // rt_sigtimedwait
865         data8 0                         // getcwd
866         data8 0                         // capget               // 1185
867         data8 0                         // capset
868         data8 0                         // sendfile
869         data8 0
870         data8 0
871         data8 0                         // socket               // 1190
872         data8 0                         // bind
873         data8 0                         // connect
874         data8 0                         // listen
875         data8 0                         // accept
876         data8 0                         // getsockname          // 1195
877         data8 0                         // getpeername
878         data8 0                         // socketpair
879         data8 0                         // send
880         data8 0                         // sendto
881         data8 0                         // recv                 // 1200
882         data8 0                         // recvfrom
883         data8 0                         // shutdown
884         data8 0                         // setsockopt
885         data8 0                         // getsockopt
886         data8 0                         // sendmsg              // 1205
887         data8 0                         // recvmsg
888         data8 0                         // pivot_root
889         data8 0                         // mincore
890         data8 0                         // madvise
891         data8 0                         // newstat              // 1210
892         data8 0                         // newlstat
893         data8 0                         // newfstat
894         data8 0                         // clone2
895         data8 0                         // getdents64
896         data8 0                         // getunwind            // 1215
897         data8 0                         // readahead
898         data8 0                         // setxattr
899         data8 0                         // lsetxattr
900         data8 0                         // fsetxattr
901         data8 0                         // getxattr             // 1220
902         data8 0                         // lgetxattr
903         data8 0                         // fgetxattr
904         data8 0                         // listxattr
905         data8 0                         // llistxattr
906         data8 0                         // flistxattr           // 1225
907         data8 0                         // removexattr
908         data8 0                         // lremovexattr
909         data8 0                         // fremovexattr
910         data8 0                         // tkill
911         data8 0                         // futex                // 1230
912         data8 0                         // sched_setaffinity
913         data8 0                         // sched_getaffinity
914         data8 fsys_set_tid_address      // set_tid_address
915         data8 0                         // fadvise64_64
916         data8 0                         // tgkill               // 1235
917         data8 0                         // exit_group
918         data8 0                         // lookup_dcookie
919         data8 0                         // io_setup
920         data8 0                         // io_destroy
921         data8 0                         // io_getevents         // 1240
922         data8 0                         // io_submit
923         data8 0                         // io_cancel
924         data8 0                         // epoll_create
925         data8 0                         // epoll_ctl
926         data8 0                         // epoll_wait           // 1245
927         data8 0                         // restart_syscall
928         data8 0                         // semtimedop
929         data8 0                         // timer_create
930         data8 0                         // timer_settime
931         data8 0                         // timer_gettime        // 1250
932         data8 0                         // timer_getoverrun
933         data8 0                         // timer_delete
934         data8 0                         // clock_settime
935         data8 fsys_clock_gettime        // clock_gettime
936         data8 0                         // clock_getres         // 1255
937         data8 0                         // clock_nanosleep
938         data8 0                         // fstatfs64
939         data8 0                         // statfs64
940         data8 0                         // mbind
941         data8 0                         // get_mempolicy        // 1260
942         data8 0                         // set_mempolicy
943         data8 0                         // mq_open
944         data8 0                         // mq_unlink
945         data8 0                         // mq_timedsend
946         data8 0                         // mq_timedreceive      // 1265
947         data8 0                         // mq_notify
948         data8 0                         // mq_getsetattr
949         data8 0                         // kexec_load
950         data8 0                         // vserver
951         data8 0                         // waitid               // 1270
952         data8 0                         // add_key
953         data8 0                         // request_key
954         data8 0                         // keyctl
955         data8 0                         // ioprio_set
956         data8 0                         // ioprio_get           // 1275
957         data8 0                         // move_pages
958         data8 0                         // inotify_init
959         data8 0                         // inotify_add_watch
960         data8 0                         // inotify_rm_watch
961         data8 0                         // migrate_pages        // 1280
962         data8 0                         // openat
963         data8 0                         // mkdirat
964         data8 0                         // mknodat
965         data8 0                         // fchownat
966         data8 0                         // futimesat            // 1285
967         data8 0                         // newfstatat
968         data8 0                         // unlinkat
969         data8 0                         // renameat
970         data8 0                         // linkat
971         data8 0                         // symlinkat            // 1290
972         data8 0                         // readlinkat
973         data8 0                         // fchmodat
974         data8 0                         // faccessat
975         data8 0
976         data8 0                                                 // 1295
977         data8 0                         // unshare
978         data8 0                         // splice
979         data8 0                         // set_robust_list
980         data8 0                         // get_robust_list
981         data8 0                         // sync_file_range      // 1300
982         data8 0                         // tee
983         data8 0                         // vmsplice
984         data8 0
985         data8 fsys_getcpu               // getcpu               // 1304
986
987         // fill in zeros for the remaining entries
988         .zero:
989         .space fsyscall_table + 8*NR_syscalls - .zero, 0