Linux-2.6.12-rc2
[linux-2.6] / arch / ia64 / kernel / fsys.S
1 /*
2  * This file contains the light-weight system call handlers (fsyscall-handlers).
3  *
4  * Copyright (C) 2003 Hewlett-Packard Co
5  *      David Mosberger-Tang <davidm@hpl.hp.com>
6  *
7  * 25-Sep-03 davidm     Implement fsys_rt_sigprocmask().
8  * 18-Feb-03 louisk     Implement fsys_gettimeofday().
9  * 28-Feb-03 davidm     Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
10  *                      probably broke it along the way... ;-)
11  * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise fsys_gettimeofday to make
12  *                      it capable of using memory based clocks without falling back to C code.
13  */
14
15 #include <asm/asmmacro.h>
16 #include <asm/errno.h>
17 #include <asm/offsets.h>
18 #include <asm/percpu.h>
19 #include <asm/thread_info.h>
20 #include <asm/sal.h>
21 #include <asm/signal.h>
22 #include <asm/system.h>
23 #include <asm/unistd.h>
24
25 #include "entry.h"
26
27 /*
28  * See Documentation/ia64/fsys.txt for details on fsyscalls.
29  *
30  * On entry to an fsyscall handler:
31  *   r10        = 0 (i.e., defaults to "successful syscall return")
32  *   r11        = saved ar.pfs (a user-level value)
33  *   r15        = system call number
34  *   r16        = "current" task pointer (in normal kernel-mode, this is in r13)
35  *   r32-r39    = system call arguments
36  *   b6         = return address (a user-level value)
37  *   ar.pfs     = previous frame-state (a user-level value)
38  *   PSR.be     = cleared to zero (i.e., little-endian byte order is in effect)
39  *   all other registers may contain values passed in from user-mode
40  *
41  * On return from an fsyscall handler:
42  *   r11        = saved ar.pfs (as passed into the fsyscall handler)
43  *   r15        = system call number (as passed into the fsyscall handler)
44  *   r32-r39    = system call arguments (as passed into the fsyscall handler)
45  *   b6         = return address (as passed into the fsyscall handler)
46  *   ar.pfs     = previous frame-state (as passed into the fsyscall handler)
47  */
48
49 ENTRY(fsys_ni_syscall)
50         .prologue
51         .altrp b6
52         .body
53         mov r8=ENOSYS
54         mov r10=-1
55         FSYS_RETURN
56 END(fsys_ni_syscall)
57
58 ENTRY(fsys_getpid)
59         .prologue
60         .altrp b6
61         .body
62         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
63         ;;
64         ld4 r9=[r9]
65         add r8=IA64_TASK_TGID_OFFSET,r16
66         ;;
67         and r9=TIF_ALLWORK_MASK,r9
68         ld4 r8=[r8]                             // r8 = current->tgid
69         ;;
70         cmp.ne p8,p0=0,r9
71 (p8)    br.spnt.many fsys_fallback_syscall
72         FSYS_RETURN
73 END(fsys_getpid)
74
75 ENTRY(fsys_getppid)
76         .prologue
77         .altrp b6
78         .body
79         add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
80         ;;
81         ld8 r17=[r17]                           // r17 = current->group_leader
82         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
83         ;;
84
85         ld4 r9=[r9]
86         add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
87         ;;
88         and r9=TIF_ALLWORK_MASK,r9
89
90 1:      ld8 r18=[r17]                           // r18 = current->group_leader->real_parent
91         ;;
92         cmp.ne p8,p0=0,r9
93         add r8=IA64_TASK_TGID_OFFSET,r18        // r8 = &current->group_leader->real_parent->tgid
94         ;;
95
96         /*
97          * The .acq is needed to ensure that the read of tgid has returned its data before
98          * we re-check "real_parent".
99          */
100         ld4.acq r8=[r8]                         // r8 = current->group_leader->real_parent->tgid
101 #ifdef CONFIG_SMP
102         /*
103          * Re-read current->group_leader->real_parent.
104          */
105         ld8 r19=[r17]                           // r19 = current->group_leader->real_parent
106 (p8)    br.spnt.many fsys_fallback_syscall
107         ;;
108         cmp.ne p6,p0=r18,r19                    // did real_parent change?
109         mov r19=0                       // i must not leak kernel bits...
110 (p6)    br.cond.spnt.few 1b                     // yes -> redo the read of tgid and the check
111         ;;
112         mov r17=0                       // i must not leak kernel bits...
113         mov r18=0                       // i must not leak kernel bits...
114 #else
115         mov r17=0                       // i must not leak kernel bits...
116         mov r18=0                       // i must not leak kernel bits...
117         mov r19=0                       // i must not leak kernel bits...
118 #endif
119         FSYS_RETURN
120 END(fsys_getppid)
121
122 ENTRY(fsys_set_tid_address)
123         .prologue
124         .altrp b6
125         .body
126         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
127         ;;
128         ld4 r9=[r9]
129         tnat.z p6,p7=r32                // check argument register for being NaT
130         ;;
131         and r9=TIF_ALLWORK_MASK,r9
132         add r8=IA64_TASK_PID_OFFSET,r16
133         add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
134         ;;
135         ld4 r8=[r8]
136         cmp.ne p8,p0=0,r9
137         mov r17=-1
138         ;;
139 (p6)    st8 [r18]=r32
140 (p7)    st8 [r18]=r17
141 (p8)    br.spnt.many fsys_fallback_syscall
142         ;;
143         mov r17=0                       // i must not leak kernel bits...
144         mov r18=0                       // i must not leak kernel bits...
145         FSYS_RETURN
146 END(fsys_set_tid_address)
147
148 /*
149  * Ensure that the time interpolator structure is compatible with the asm code
150  */
151 #if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
152         || IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
153 #error fsys_gettimeofday incompatible with changes to struct time_interpolator
154 #endif
155 #define CLOCK_REALTIME 0
156 #define CLOCK_MONOTONIC 1
157 #define CLOCK_DIVIDE_BY_1000 0x4000
158 #define CLOCK_ADD_MONOTONIC 0x8000
159
160 ENTRY(fsys_gettimeofday)
161         .prologue
162         .altrp b6
163         .body
164         mov r31 = r32
165         tnat.nz p6,p0 = r33             // guard against NaT argument
166 (p6)    br.cond.spnt.few .fail_einval
167         mov r30 = CLOCK_DIVIDE_BY_1000
168         ;;
169 .gettime:
170         // Register map
171         // Incoming r31 = pointer to address where to place result
172         //          r30 = flags determining how time is processed
173         // r2,r3 = temp r4-r7 preserved
174         // r8 = result nanoseconds
175         // r9 = result seconds
176         // r10 = temporary storage for clock difference
177         // r11 = preserved: saved ar.pfs
178         // r12 = preserved: memory stack
179         // r13 = preserved: thread pointer
180         // r14 = address of mask / mask
181         // r15 = preserved: system call number
182         // r16 = preserved: current task pointer
183         // r17 = wall to monotonic use
184         // r18 = time_interpolator->offset
185         // r19 = address of wall_to_monotonic
186         // r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
187         // r21 = shift factor
188         // r22 = address of time interpolator->last_counter
189         // r23 = address of time_interpolator->last_cycle
190         // r24 = adress of time_interpolator->offset
191         // r25 = last_cycle value
192         // r26 = last_counter value
193         // r27 = pointer to xtime
194         // r28 = sequence number at the beginning of critcal section
195         // r29 = address of seqlock
196         // r30 = time processing flags / memory address
197         // r31 = pointer to result
198         // Predicates
199         // p6,p7 short term use
200         // p8 = timesource ar.itc
201         // p9 = timesource mmio64
202         // p10 = timesource mmio32
203         // p11 = timesource not to be handled by asm code
204         // p12 = memory time source ( = p9 | p10)
205         // p13 = do cmpxchg with time_interpolator_last_cycle
206         // p14 = Divide by 1000
207         // p15 = Add monotonic
208         //
209         // Note that instructions are optimized for McKinley. McKinley can process two
210         // bundles simultaneously and therefore we continuously try to feed the CPU
211         // two bundles and then a stop.
212         tnat.nz p6,p0 = r31     // branch deferred since it does not fit into bundle structure
213         mov pr = r30,0xc000     // Set predicates according to function
214         add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
215         movl r20 = time_interpolator
216         ;;
217         ld8 r20 = [r20]         // get pointer to time_interpolator structure
218         movl r29 = xtime_lock
219         ld4 r2 = [r2]           // process work pending flags
220         movl r27 = xtime
221         ;;      // only one bundle here
222         ld8 r21 = [r20]         // first quad with control information
223         and r2 = TIF_ALLWORK_MASK,r2
224 (p6)    br.cond.spnt.few .fail_einval   // deferred branch
225         ;;
226         add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
227         extr r3 = r21,32,32     // time_interpolator->nsec_per_cyc
228         extr r8 = r21,0,16      // time_interpolator->source
229         cmp.ne p6, p0 = 0, r2   // Fallback if work is scheduled
230 (p6)    br.cond.spnt.many fsys_fallback_syscall
231         ;;
232         cmp.eq p8,p12 = 0,r8    // Check for cpu timer
233         cmp.eq p9,p0 = 1,r8     // MMIO64 ?
234         extr r2 = r21,24,8      // time_interpolator->jitter
235         cmp.eq p10,p0 = 2,r8    // MMIO32 ?
236         cmp.ltu p11,p0 = 2,r8   // function or other clock
237 (p11)   br.cond.spnt.many fsys_fallback_syscall
238         ;;
239         setf.sig f7 = r3        // Setup for scaling of counter
240 (p15)   movl r19 = wall_to_monotonic
241 (p12)   ld8 r30 = [r10]
242         cmp.ne p13,p0 = r2,r0   // need jitter compensation?
243         extr r21 = r21,16,8     // shift factor
244         ;;
245 .time_redo:
246         .pred.rel.mutex p8,p9,p10
247         ld4.acq r28 = [r29]     // xtime_lock.sequence. Must come first for locking purposes
248 (p8)    mov r2 = ar.itc         // CPU_TIMER. 36 clocks latency!!!
249         add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
250 (p9)    ld8 r2 = [r30]          // readq(ti->address). Could also have latency issues..
251 (p10)   ld4 r2 = [r30]          // readw(ti->address)
252 (p13)   add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
253         ;;                      // could be removed by moving the last add upward
254         ld8 r26 = [r22]         // time_interpolator->last_counter
255 (p13)   ld8 r25 = [r23]         // time interpolator->last_cycle
256         add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
257 (p15)   ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
258         ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
259         add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
260         ;;
261         ld8 r18 = [r24]         // time_interpolator->offset
262         ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET    // xtime.tv_nsec
263 (p13)   sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
264         ;;
265         ld8 r14 = [r14]         // time_interpolator->mask
266 (p13)   cmp.gt.unc p6,p7 = r3,r0        // check if it is less than last. p6,p7 cleared
267         sub r10 = r2,r26        // current_counter - last_counter
268         ;;
269 (p6)    sub r10 = r25,r26       // time we got was less than last_cycle
270 (p7)    mov ar.ccv = r25        // more than last_cycle. Prep for cmpxchg
271         ;;
272         and r10 = r10,r14       // Apply mask
273         ;;
274         setf.sig f8 = r10
275         nop.i 123
276         ;;
277 (p7)    cmpxchg8.rel r3 = [r23],r2,ar.ccv
278 EX(.fail_efault, probe.w.fault r31, 3)  // This takes 5 cycles and we have spare time
279         xmpy.l f8 = f8,f7       // nsec_per_cyc*(counter-last_counter)
280 (p15)   add r9 = r9,r17         // Add wall to monotonic.secs to result secs
281         ;;
282 (p15)   ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
283 (p7)    cmp.ne p7,p0 = r25,r3   // if cmpxchg not successful redo
284         // simulate tbit.nz.or p7,p0 = r28,0
285         and r28 = ~1,r28        // Make sequence even to force retry if odd
286         getf.sig r2 = f8
287         mf
288         add r8 = r8,r18         // Add time interpolator offset
289         ;;
290         ld4 r10 = [r29]         // xtime_lock.sequence
291 (p15)   add r8 = r8, r17        // Add monotonic.nsecs to nsecs
292         shr.u r2 = r2,r21
293         ;;              // overloaded 3 bundles!
294         // End critical section.
295         add r8 = r8,r2          // Add xtime.nsecs
296         cmp4.ne.or p7,p0 = r28,r10
297 (p7)    br.cond.dpnt.few .time_redo     // sequence number changed ?
298         // Now r8=tv->tv_nsec and r9=tv->tv_sec
299         mov r10 = r0
300         movl r2 = 1000000000
301         add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
302 (p14)   movl r3 = 2361183241434822607   // Prep for / 1000 hack
303         ;;
304 .time_normalize:
305         mov r21 = r8
306         cmp.ge p6,p0 = r8,r2
307 (p14)   shr.u r20 = r8, 3               // We can repeat this if necessary just wasting some time
308         ;;
309 (p14)   setf.sig f8 = r20
310 (p6)    sub r8 = r8,r2
311 (p6)    add r9 = 1,r9                   // two nops before the branch.
312 (p14)   setf.sig f7 = r3                // Chances for repeats are 1 in 10000 for gettod
313 (p6)    br.cond.dpnt.few .time_normalize
314         ;;
315         // Divided by 8 though shift. Now divide by 125
316         // The compiler was able to do that with a multiply
317         // and a shift and we do the same
318 EX(.fail_efault, probe.w.fault r23, 3)          // This also costs 5 cycles
319 (p14)   xmpy.hu f8 = f8, f7                     // xmpy has 5 cycles latency so use it...
320         ;;
321         mov r8 = r0
322 (p14)   getf.sig r2 = f8
323         ;;
324 (p14)   shr.u r21 = r2, 4
325         ;;
326 EX(.fail_efault, st8 [r31] = r9)
327 EX(.fail_efault, st8 [r23] = r21)
328         FSYS_RETURN
329 .fail_einval:
330         mov r8 = EINVAL
331         mov r10 = -1
332         FSYS_RETURN
333 .fail_efault:
334         mov r8 = EFAULT
335         mov r10 = -1
336         FSYS_RETURN
337 END(fsys_gettimeofday)
338
339 ENTRY(fsys_clock_gettime)
340         .prologue
341         .altrp b6
342         .body
343         cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
344         // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
345 (p6)    br.spnt.few fsys_fallback_syscall
346         mov r31 = r33
347         shl r30 = r32,15
348         br.many .gettime
349 END(fsys_clock_gettime)
350
351 /*
352  * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
353  */
354 #if _NSIG_WORDS != 1
355 # error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
356 #endif
357 ENTRY(fsys_rt_sigprocmask)
358         .prologue
359         .altrp b6
360         .body
361
362         add r2=IA64_TASK_BLOCKED_OFFSET,r16
363         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
364         cmp4.ltu p6,p0=SIG_SETMASK,r32
365
366         cmp.ne p15,p0=r0,r34                    // oset != NULL?
367         tnat.nz p8,p0=r34
368         add r31=IA64_TASK_SIGHAND_OFFSET,r16
369         ;;
370         ld8 r3=[r2]                             // read/prefetch current->blocked
371         ld4 r9=[r9]
372         tnat.nz.or p6,p0=r35
373
374         cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
375         tnat.nz.or p6,p0=r32
376 (p6)    br.spnt.few .fail_einval                // fail with EINVAL
377         ;;
378 #ifdef CONFIG_SMP
379         ld8 r31=[r31]                           // r31 <- current->sighand
380 #endif
381         and r9=TIF_ALLWORK_MASK,r9
382         tnat.nz.or p8,p0=r33
383         ;;
384         cmp.ne p7,p0=0,r9
385         cmp.eq p6,p0=r0,r33                     // set == NULL?
386         add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
387 (p8)    br.spnt.few .fail_efault                // fail with EFAULT
388 (p7)    br.spnt.many fsys_fallback_syscall      // got pending kernel work...
389 (p6)    br.dpnt.many .store_mask                // -> short-circuit to just reading the signal mask
390
391         /* Argh, we actually have to do some work and _update_ the signal mask: */
392
393 EX(.fail_efault, probe.r.fault r33, 3)          // verify user has read-access to *set
394 EX(.fail_efault, ld8 r14=[r33])                 // r14 <- *set
395         mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
396         ;;
397
398         rsm psr.i                               // mask interrupt delivery
399         mov ar.ccv=0
400         andcm r14=r14,r17                       // filter out SIGKILL & SIGSTOP
401
402 #ifdef CONFIG_SMP
403         mov r17=1
404         ;;
405         cmpxchg4.acq r18=[r31],r17,ar.ccv       // try to acquire the lock
406         mov r8=EINVAL                   // default to EINVAL
407         ;;
408         ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
409         cmp4.ne p6,p0=r18,r0
410 (p6)    br.cond.spnt.many .lock_contention
411         ;;
412 #else
413         ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
414         mov r8=EINVAL                   // default to EINVAL
415 #endif
416         add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
417         add r19=IA64_TASK_SIGNAL_OFFSET,r16
418         cmp4.eq p6,p0=SIG_BLOCK,r32
419         ;;
420         ld8 r19=[r19]                   // r19 <- current->signal
421         cmp4.eq p7,p0=SIG_UNBLOCK,r32
422         cmp4.eq p8,p0=SIG_SETMASK,r32
423         ;;
424         ld8 r18=[r18]                   // r18 <- current->pending.signal
425         .pred.rel.mutex p6,p7,p8
426 (p6)    or r14=r3,r14                   // SIG_BLOCK
427 (p7)    andcm r14=r3,r14                // SIG_UNBLOCK
428
429 (p8)    mov r14=r14                     // SIG_SETMASK
430 (p6)    mov r8=0                        // clear error code
431         // recalc_sigpending()
432         add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
433
434         add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
435         ;;
436         ld4 r17=[r17]           // r17 <- current->signal->group_stop_count
437 (p7)    mov r8=0                // clear error code
438
439         ld8 r19=[r19]           // r19 <- current->signal->shared_pending
440         ;;
441         cmp4.gt p6,p7=r17,r0    // p6/p7 <- (current->signal->group_stop_count > 0)?
442 (p8)    mov r8=0                // clear error code
443
444         or r18=r18,r19          // r18 <- current->pending | current->signal->shared_pending
445         ;;
446         // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
447         andcm r18=r18,r14
448         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
449         ;;
450
451 (p7)    cmp.ne.or.andcm p6,p7=r18,r0            // p6/p7 <- signal pending
452         mov r19=0                                       // i must not leak kernel bits...
453 (p6)    br.cond.dpnt.many .sig_pending
454         ;;
455
456 1:      ld4 r17=[r9]                            // r17 <- current->thread_info->flags
457         ;;
458         mov ar.ccv=r17
459         and r18=~_TIF_SIGPENDING,r17            // r18 <- r17 & ~(1 << TIF_SIGPENDING)
460         ;;
461
462         st8 [r2]=r14                            // update current->blocked with new mask
463         cmpxchg4.acq r14=[r9],r18,ar.ccv        // current->thread_info->flags <- r18
464         ;;
465         cmp.ne p6,p0=r17,r14                    // update failed?
466 (p6)    br.cond.spnt.few 1b                     // yes -> retry
467
468 #ifdef CONFIG_SMP
469         st4.rel [r31]=r0                        // release the lock
470 #endif
471         ssm psr.i
472         ;;
473
474         srlz.d                                  // ensure psr.i is set again
475         mov r18=0                                       // i must not leak kernel bits...
476
477 .store_mask:
478 EX(.fail_efault, (p15) probe.w.fault r34, 3)    // verify user has write-access to *oset
479 EX(.fail_efault, (p15) st8 [r34]=r3)
480         mov r2=0                                        // i must not leak kernel bits...
481         mov r3=0                                        // i must not leak kernel bits...
482         mov r8=0                                // return 0
483         mov r9=0                                        // i must not leak kernel bits...
484         mov r14=0                                       // i must not leak kernel bits...
485         mov r17=0                                       // i must not leak kernel bits...
486         mov r31=0                                       // i must not leak kernel bits...
487         FSYS_RETURN
488
489 .sig_pending:
490 #ifdef CONFIG_SMP
491         st4.rel [r31]=r0                        // release the lock
492 #endif
493         ssm psr.i
494         ;;
495         srlz.d
496         br.sptk.many fsys_fallback_syscall      // with signal pending, do the heavy-weight syscall
497
498 #ifdef CONFIG_SMP
499 .lock_contention:
500         /* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
501         ssm psr.i
502         ;;
503         srlz.d
504         br.sptk.many fsys_fallback_syscall
505 #endif
506 END(fsys_rt_sigprocmask)
507
508 ENTRY(fsys_fallback_syscall)
509         .prologue
510         .altrp b6
511         .body
512         /*
513          * We only get here from light-weight syscall handlers.  Thus, we already
514          * know that r15 contains a valid syscall number.  No need to re-check.
515          */
516         adds r17=-1024,r15
517         movl r14=sys_call_table
518         ;;
519         rsm psr.i
520         shladd r18=r17,3,r14
521         ;;
522         ld8 r18=[r18]                           // load normal (heavy-weight) syscall entry-point
523         mov r29=psr                             // read psr (12 cyc load latency)
524         mov r27=ar.rsc
525         mov r21=ar.fpsr
526         mov r26=ar.pfs
527 END(fsys_fallback_syscall)
528         /* FALL THROUGH */
529 GLOBAL_ENTRY(fsys_bubble_down)
530         .prologue
531         .altrp b6
532         .body
533         /*
534          * We get here for syscalls that don't have a lightweight handler.  For those, we
535          * need to bubble down into the kernel and that requires setting up a minimal
536          * pt_regs structure, and initializing the CPU state more or less as if an
537          * interruption had occurred.  To make syscall-restarts work, we setup pt_regs
538          * such that cr_iip points to the second instruction in syscall_via_break.
539          * Decrementing the IP hence will restart the syscall via break and not
540          * decrementing IP will return us to the caller, as usual.  Note that we preserve
541          * the value of psr.pp rather than initializing it from dcr.pp.  This makes it
542          * possible to distinguish fsyscall execution from other privileged execution.
543          *
544          * On entry:
545          *      - normal fsyscall handler register usage, except that we also have:
546          *      - r18: address of syscall entry point
547          *      - r21: ar.fpsr
548          *      - r26: ar.pfs
549          *      - r27: ar.rsc
550          *      - r29: psr
551          */
552 #       define PSR_PRESERVED_BITS       (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
553                                          | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \
554                                          | IA64_PSR_IC)
555         /*
556          * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.  The rest we have
557          * to synthesize.
558          */
559 #       define PSR_ONE_BITS             ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
560                                          | IA64_PSR_BN | IA64_PSR_I)
561
562         invala
563         movl r8=PSR_ONE_BITS
564
565         mov r25=ar.unat                 // save ar.unat (5 cyc)
566         movl r9=PSR_PRESERVED_BITS
567
568         mov ar.rsc=0                    // set enforced lazy mode, pl 0, little-endian, loadrs=0
569         movl r28=__kernel_syscall_via_break
570         ;;
571         mov r23=ar.bspstore             // save ar.bspstore (12 cyc)
572         mov r31=pr                      // save pr (2 cyc)
573         mov r20=r1                      // save caller's gp in r20
574         ;;
575         mov r2=r16                      // copy current task addr to addl-addressable register
576         and r9=r9,r29
577         mov r19=b6                      // save b6 (2 cyc)
578         ;;
579         mov psr.l=r9                    // slam the door (17 cyc to srlz.i)
580         or r29=r8,r29                   // construct cr.ipsr value to save
581         addl r22=IA64_RBS_OFFSET,r2     // compute base of RBS
582         ;;
583         // GAS reports a spurious RAW hazard on the read of ar.rnat because it thinks
584         // we may be reading ar.itc after writing to psr.l.  Avoid that message with
585         // this directive:
586         dv_serialize_data
587         mov.m r24=ar.rnat               // read ar.rnat (5 cyc lat)
588         lfetch.fault.excl.nt1 [r22]
589         adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2
590
591         // ensure previous insn group is issued before we stall for srlz.i:
592         ;;
593         srlz.i                          // ensure new psr.l has been established
594         /////////////////////////////////////////////////////////////////////////////
595         ////////// from this point on, execution is not interruptible anymore
596         /////////////////////////////////////////////////////////////////////////////
597         addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2    // compute base of memory stack
598         cmp.ne pKStk,pUStk=r0,r0        // set pKStk <- 0, pUStk <- 1
599         ;;
600         st1 [r16]=r0                    // clear current->thread.on_ustack flag
601         mov ar.bspstore=r22             // switch to kernel RBS
602         mov b6=r18                      // copy syscall entry-point to b6 (7 cyc)
603         add r3=TI_FLAGS+IA64_TASK_SIZE,r2
604         ;;
605         ld4 r3=[r3]                             // r2 = current_thread_info()->flags
606         mov r18=ar.bsp                  // save (kernel) ar.bsp (12 cyc)
607         mov ar.rsc=0x3                  // set eager mode, pl 0, little-endian, loadrs=0
608         br.call.sptk.many b7=ia64_syscall_setup
609         ;;
610         ssm psr.i
611         movl r2=ia64_ret_from_syscall
612         ;;
613         mov rp=r2                               // set the real return addr
614         tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
615         ;;
616 (p10)   br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8
617 (p8)    br.call.sptk.many b6=b6         // ignore this return addr
618         br.cond.sptk ia64_trace_syscall
619 END(fsys_bubble_down)
620
621         .rodata
622         .align 8
623         .globl fsyscall_table
624
625         data8 fsys_bubble_down
626 fsyscall_table:
627         data8 fsys_ni_syscall
628         data8 0                         // exit                 // 1025
629         data8 0                         // read
630         data8 0                         // write
631         data8 0                         // open
632         data8 0                         // close
633         data8 0                         // creat                // 1030
634         data8 0                         // link
635         data8 0                         // unlink
636         data8 0                         // execve
637         data8 0                         // chdir
638         data8 0                         // fchdir               // 1035
639         data8 0                         // utimes
640         data8 0                         // mknod
641         data8 0                         // chmod
642         data8 0                         // chown
643         data8 0                         // lseek                // 1040
644         data8 fsys_getpid               // getpid
645         data8 fsys_getppid              // getppid
646         data8 0                         // mount
647         data8 0                         // umount
648         data8 0                         // setuid               // 1045
649         data8 0                         // getuid
650         data8 0                         // geteuid
651         data8 0                         // ptrace
652         data8 0                         // access
653         data8 0                         // sync                 // 1050
654         data8 0                         // fsync
655         data8 0                         // fdatasync
656         data8 0                         // kill
657         data8 0                         // rename
658         data8 0                         // mkdir                // 1055
659         data8 0                         // rmdir
660         data8 0                         // dup
661         data8 0                         // pipe
662         data8 0                         // times
663         data8 0                         // brk                  // 1060
664         data8 0                         // setgid
665         data8 0                         // getgid
666         data8 0                         // getegid
667         data8 0                         // acct
668         data8 0                         // ioctl                // 1065
669         data8 0                         // fcntl
670         data8 0                         // umask
671         data8 0                         // chroot
672         data8 0                         // ustat
673         data8 0                         // dup2                 // 1070
674         data8 0                         // setreuid
675         data8 0                         // setregid
676         data8 0                         // getresuid
677         data8 0                         // setresuid
678         data8 0                         // getresgid            // 1075
679         data8 0                         // setresgid
680         data8 0                         // getgroups
681         data8 0                         // setgroups
682         data8 0                         // getpgid
683         data8 0                         // setpgid              // 1080
684         data8 0                         // setsid
685         data8 0                         // getsid
686         data8 0                         // sethostname
687         data8 0                         // setrlimit
688         data8 0                         // getrlimit            // 1085
689         data8 0                         // getrusage
690         data8 fsys_gettimeofday         // gettimeofday
691         data8 0                         // settimeofday
692         data8 0                         // select
693         data8 0                         // poll                 // 1090
694         data8 0                         // symlink
695         data8 0                         // readlink
696         data8 0                         // uselib
697         data8 0                         // swapon
698         data8 0                         // swapoff              // 1095
699         data8 0                         // reboot
700         data8 0                         // truncate
701         data8 0                         // ftruncate
702         data8 0                         // fchmod
703         data8 0                         // fchown               // 1100
704         data8 0                         // getpriority
705         data8 0                         // setpriority
706         data8 0                         // statfs
707         data8 0                         // fstatfs
708         data8 0                         // gettid               // 1105
709         data8 0                         // semget
710         data8 0                         // semop
711         data8 0                         // semctl
712         data8 0                         // msgget
713         data8 0                         // msgsnd               // 1110
714         data8 0                         // msgrcv
715         data8 0                         // msgctl
716         data8 0                         // shmget
717         data8 0                         // shmat
718         data8 0                         // shmdt                // 1115
719         data8 0                         // shmctl
720         data8 0                         // syslog
721         data8 0                         // setitimer
722         data8 0                         // getitimer
723         data8 0                                                 // 1120
724         data8 0
725         data8 0
726         data8 0                         // vhangup
727         data8 0                         // lchown
728         data8 0                         // remap_file_pages     // 1125
729         data8 0                         // wait4
730         data8 0                         // sysinfo
731         data8 0                         // clone
732         data8 0                         // setdomainname
733         data8 0                         // newuname             // 1130
734         data8 0                         // adjtimex
735         data8 0
736         data8 0                         // init_module
737         data8 0                         // delete_module
738         data8 0                                                 // 1135
739         data8 0
740         data8 0                         // quotactl
741         data8 0                         // bdflush
742         data8 0                         // sysfs
743         data8 0                         // personality          // 1140
744         data8 0                         // afs_syscall
745         data8 0                         // setfsuid
746         data8 0                         // setfsgid
747         data8 0                         // getdents
748         data8 0                         // flock                // 1145
749         data8 0                         // readv
750         data8 0                         // writev
751         data8 0                         // pread64
752         data8 0                         // pwrite64
753         data8 0                         // sysctl               // 1150
754         data8 0                         // mmap
755         data8 0                         // munmap
756         data8 0                         // mlock
757         data8 0                         // mlockall
758         data8 0                         // mprotect             // 1155
759         data8 0                         // mremap
760         data8 0                         // msync
761         data8 0                         // munlock
762         data8 0                         // munlockall
763         data8 0                         // sched_getparam       // 1160
764         data8 0                         // sched_setparam
765         data8 0                         // sched_getscheduler
766         data8 0                         // sched_setscheduler
767         data8 0                         // sched_yield
768         data8 0                         // sched_get_priority_max       // 1165
769         data8 0                         // sched_get_priority_min
770         data8 0                         // sched_rr_get_interval
771         data8 0                         // nanosleep
772         data8 0                         // nfsservctl
773         data8 0                         // prctl                // 1170
774         data8 0                         // getpagesize
775         data8 0                         // mmap2
776         data8 0                         // pciconfig_read
777         data8 0                         // pciconfig_write
778         data8 0                         // perfmonctl           // 1175
779         data8 0                         // sigaltstack
780         data8 0                         // rt_sigaction
781         data8 0                         // rt_sigpending
782         data8 fsys_rt_sigprocmask       // rt_sigprocmask
783         data8 0                         // rt_sigqueueinfo      // 1180
784         data8 0                         // rt_sigreturn
785         data8 0                         // rt_sigsuspend
786         data8 0                         // rt_sigtimedwait
787         data8 0                         // getcwd
788         data8 0                         // capget               // 1185
789         data8 0                         // capset
790         data8 0                         // sendfile
791         data8 0
792         data8 0
793         data8 0                         // socket               // 1190
794         data8 0                         // bind
795         data8 0                         // connect
796         data8 0                         // listen
797         data8 0                         // accept
798         data8 0                         // getsockname          // 1195
799         data8 0                         // getpeername
800         data8 0                         // socketpair
801         data8 0                         // send
802         data8 0                         // sendto
803         data8 0                         // recv                 // 1200
804         data8 0                         // recvfrom
805         data8 0                         // shutdown
806         data8 0                         // setsockopt
807         data8 0                         // getsockopt
808         data8 0                         // sendmsg              // 1205
809         data8 0                         // recvmsg
810         data8 0                         // pivot_root
811         data8 0                         // mincore
812         data8 0                         // madvise
813         data8 0                         // newstat              // 1210
814         data8 0                         // newlstat
815         data8 0                         // newfstat
816         data8 0                         // clone2
817         data8 0                         // getdents64
818         data8 0                         // getunwind            // 1215
819         data8 0                         // readahead
820         data8 0                         // setxattr
821         data8 0                         // lsetxattr
822         data8 0                         // fsetxattr
823         data8 0                         // getxattr             // 1220
824         data8 0                         // lgetxattr
825         data8 0                         // fgetxattr
826         data8 0                         // listxattr
827         data8 0                         // llistxattr
828         data8 0                         // flistxattr           // 1225
829         data8 0                         // removexattr
830         data8 0                         // lremovexattr
831         data8 0                         // fremovexattr
832         data8 0                         // tkill
833         data8 0                         // futex                // 1230
834         data8 0                         // sched_setaffinity
835         data8 0                         // sched_getaffinity
836         data8 fsys_set_tid_address      // set_tid_address
837         data8 0                         // fadvise64_64
838         data8 0                         // tgkill               // 1235
839         data8 0                         // exit_group
840         data8 0                         // lookup_dcookie
841         data8 0                         // io_setup
842         data8 0                         // io_destroy
843         data8 0                         // io_getevents         // 1240
844         data8 0                         // io_submit
845         data8 0                         // io_cancel
846         data8 0                         // epoll_create
847         data8 0                         // epoll_ctl
848         data8 0                         // epoll_wait           // 1245
849         data8 0                         // restart_syscall
850         data8 0                         // semtimedop
851         data8 0                         // timer_create
852         data8 0                         // timer_settime
853         data8 0                         // timer_gettime        // 1250
854         data8 0                         // timer_getoverrun
855         data8 0                         // timer_delete
856         data8 0                         // clock_settime
857         data8 fsys_clock_gettime        // clock_gettime
858         data8 0                         // clock_getres         // 1255
859         data8 0                         // clock_nanosleep
860         data8 0                         // fstatfs64
861         data8 0                         // statfs64
862         data8 0
863         data8 0                                                 // 1260
864         data8 0
865         data8 0                         // mq_open
866         data8 0                         // mq_unlink
867         data8 0                         // mq_timedsend
868         data8 0                         // mq_timedreceive      // 1265
869         data8 0                         // mq_notify
870         data8 0                         // mq_getsetattr
871         data8 0                         // kexec_load
872         data8 0
873         data8 0                                                 // 1270
874         data8 0
875         data8 0
876         data8 0
877         data8 0
878         data8 0                                                 // 1275
879         data8 0
880         data8 0
881         data8 0
882         data8 0
883
884         .org fsyscall_table + 8*NR_syscalls     // guard against failures to increase NR_syscalls