Merge git://git.kernel.org/pub/scm/linux/kernel/git/jk/spufs
[linux-2.6] / arch / ia64 / kvm / trampoline.S
1 /* Save all processor states
2  *
3  * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
4  * Copyright (c) 2007 Anthony Xu   <anthony.xu@intel.com>
5  */
6
7 #include <asm/asmmacro.h>
8 #include "asm-offsets.h"
9
10
11 #define CTX(name)    VMM_CTX_##name##_OFFSET
12
13         /*
14          *      r32:            context_t base address
15          */
16 #define SAVE_BRANCH_REGS                        \
17         add     r2 = CTX(B0),r32;               \
18         add     r3 = CTX(B1),r32;               \
19         mov     r16 = b0;                       \
20         mov     r17 = b1;                       \
21         ;;                                      \
22         st8     [r2]=r16,16;                    \
23         st8     [r3]=r17,16;                    \
24         ;;                                      \
25         mov     r16 = b2;                       \
26         mov     r17 = b3;                       \
27         ;;                                      \
28         st8     [r2]=r16,16;                    \
29         st8     [r3]=r17,16;                    \
30         ;;                                      \
31         mov     r16 = b4;                       \
32         mov     r17 = b5;                       \
33         ;;                                      \
34         st8     [r2]=r16;                       \
35         st8     [r3]=r17;                       \
36         ;;
37
38         /*
39          *      r33:            context_t base address
40          */
41 #define RESTORE_BRANCH_REGS                     \
42         add     r2 = CTX(B0),r33;               \
43         add     r3 = CTX(B1),r33;               \
44         ;;                                      \
45         ld8     r16=[r2],16;                    \
46         ld8     r17=[r3],16;                    \
47         ;;                                      \
48         mov     b0 = r16;                       \
49         mov     b1 = r17;                       \
50         ;;                                      \
51         ld8     r16=[r2],16;                    \
52         ld8     r17=[r3],16;                    \
53         ;;                                      \
54         mov     b2 = r16;                       \
55         mov     b3 = r17;                       \
56         ;;                                      \
57         ld8     r16=[r2];                       \
58         ld8     r17=[r3];                       \
59         ;;                                      \
60         mov     b4=r16;                         \
61         mov     b5=r17;                         \
62         ;;
63
64
65         /*
66          *      r32: context_t base address
67          *      bsw == 1
68          *      Save all bank1 general registers, r4 ~ r7
69          */
70 #define SAVE_GENERAL_REGS                       \
71         add     r2=CTX(R4),r32;                 \
72         add     r3=CTX(R5),r32;                 \
73         ;;                                      \
74 .mem.offset 0,0;                                \
75         st8.spill       [r2]=r4,16;             \
76 .mem.offset 8,0;                                \
77         st8.spill       [r3]=r5,16;             \
78         ;;                                      \
79 .mem.offset 0,0;                                \
80         st8.spill       [r2]=r6,48;             \
81 .mem.offset 8,0;                                \
82         st8.spill       [r3]=r7,48;             \
83         ;;                                      \
84 .mem.offset 0,0;                                \
85     st8.spill    [r2]=r12;                      \
86 .mem.offset 8,0;                                \
87     st8.spill    [r3]=r13;                      \
88     ;;
89
90         /*
91          *      r33: context_t base address
92          *      bsw == 1
93          */
94 #define RESTORE_GENERAL_REGS                    \
95         add     r2=CTX(R4),r33;                 \
96         add     r3=CTX(R5),r33;                 \
97         ;;                                      \
98         ld8.fill        r4=[r2],16;             \
99         ld8.fill        r5=[r3],16;             \
100         ;;                                      \
101         ld8.fill        r6=[r2],48;             \
102         ld8.fill        r7=[r3],48;             \
103         ;;                                      \
104         ld8.fill    r12=[r2];                   \
105         ld8.fill    r13 =[r3];                  \
106         ;;
107
108
109
110
111         /*
112          *      r32:            context_t base address
113          */
114 #define SAVE_KERNEL_REGS                        \
115         add     r2 = CTX(KR0),r32;              \
116         add     r3 = CTX(KR1),r32;              \
117         mov     r16 = ar.k0;                    \
118         mov     r17 = ar.k1;                    \
119         ;;                                      \
120         st8     [r2] = r16,16;                  \
121         st8     [r3] = r17,16;                  \
122         ;;                                      \
123         mov     r16 = ar.k2;                    \
124         mov     r17 = ar.k3;                    \
125         ;;                                      \
126         st8     [r2] = r16,16;                  \
127         st8     [r3] = r17,16;                  \
128         ;;                                      \
129         mov     r16 = ar.k4;                    \
130         mov     r17 = ar.k5;                    \
131         ;;                                      \
132         st8     [r2] = r16,16;                  \
133         st8     [r3] = r17,16;                  \
134         ;;                                      \
135         mov     r16 = ar.k6;                    \
136         mov     r17 = ar.k7;                    \
137         ;;                                      \
138         st8     [r2] = r16;                     \
139         st8     [r3] = r17;                     \
140         ;;
141
142
143
144         /*
145          *      r33:            context_t base address
146          */
147 #define RESTORE_KERNEL_REGS                     \
148         add     r2 = CTX(KR0),r33;              \
149         add     r3 = CTX(KR1),r33;              \
150         ;;                                      \
151         ld8     r16=[r2],16;                    \
152         ld8     r17=[r3],16;                    \
153         ;;                                      \
154         mov     ar.k0=r16;                      \
155         mov     ar.k1=r17;                      \
156         ;;                                      \
157         ld8     r16=[r2],16;                    \
158         ld8     r17=[r3],16;                    \
159         ;;                                      \
160         mov     ar.k2=r16;                      \
161         mov     ar.k3=r17;                      \
162         ;;                                      \
163         ld8     r16=[r2],16;                    \
164         ld8     r17=[r3],16;                    \
165         ;;                                      \
166         mov     ar.k4=r16;                      \
167         mov     ar.k5=r17;                      \
168         ;;                                      \
169         ld8     r16=[r2],16;                    \
170         ld8     r17=[r3],16;                    \
171         ;;                                      \
172         mov     ar.k6=r16;                      \
173         mov     ar.k7=r17;                      \
174         ;;
175
176
177
178         /*
179          *      r32:            context_t base address
180          */
181 #define SAVE_APP_REGS                           \
182         add  r2 = CTX(BSPSTORE),r32;            \
183         mov  r16 = ar.bspstore;                 \
184         ;;                                      \
185         st8  [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
186         mov  r16 = ar.rnat;                     \
187         ;;                                      \
188         st8  [r2] = r16,CTX(FCR)-CTX(RNAT);     \
189         mov  r16 = ar.fcr;                      \
190         ;;                                      \
191         st8  [r2] = r16,CTX(EFLAG)-CTX(FCR);    \
192         mov  r16 = ar.eflag;                    \
193         ;;                                      \
194         st8  [r2] = r16,CTX(CFLG)-CTX(EFLAG);   \
195         mov  r16 = ar.cflg;                     \
196         ;;                                      \
197         st8  [r2] = r16,CTX(FSR)-CTX(CFLG);     \
198         mov  r16 = ar.fsr;                      \
199         ;;                                      \
200         st8  [r2] = r16,CTX(FIR)-CTX(FSR);      \
201         mov  r16 = ar.fir;                      \
202         ;;                                      \
203         st8  [r2] = r16,CTX(FDR)-CTX(FIR);      \
204         mov  r16 = ar.fdr;                      \
205         ;;                                      \
206         st8  [r2] = r16,CTX(UNAT)-CTX(FDR);     \
207         mov  r16 = ar.unat;                     \
208         ;;                                      \
209         st8  [r2] = r16,CTX(FPSR)-CTX(UNAT);    \
210         mov  r16 = ar.fpsr;                     \
211         ;;                                      \
212         st8  [r2] = r16,CTX(PFS)-CTX(FPSR);     \
213         mov  r16 = ar.pfs;                      \
214         ;;                                      \
215         st8  [r2] = r16,CTX(LC)-CTX(PFS);       \
216         mov  r16 = ar.lc;                       \
217         ;;                                      \
218         st8  [r2] = r16;                        \
219         ;;
220
221         /*
222          *      r33:            context_t base address
223          */
224 #define RESTORE_APP_REGS                        \
225         add  r2=CTX(BSPSTORE),r33;              \
226         ;;                                      \
227         ld8  r16=[r2],CTX(RNAT)-CTX(BSPSTORE);  \
228         ;;                                      \
229         mov  ar.bspstore=r16;                   \
230         ld8  r16=[r2],CTX(FCR)-CTX(RNAT);       \
231         ;;                                      \
232         mov  ar.rnat=r16;                       \
233         ld8  r16=[r2],CTX(EFLAG)-CTX(FCR);      \
234         ;;                                      \
235         mov  ar.fcr=r16;                        \
236         ld8  r16=[r2],CTX(CFLG)-CTX(EFLAG);     \
237         ;;                                      \
238         mov  ar.eflag=r16;                      \
239         ld8  r16=[r2],CTX(FSR)-CTX(CFLG);       \
240         ;;                                      \
241         mov  ar.cflg=r16;                       \
242         ld8  r16=[r2],CTX(FIR)-CTX(FSR);        \
243         ;;                                      \
244         mov  ar.fsr=r16;                        \
245         ld8  r16=[r2],CTX(FDR)-CTX(FIR);        \
246         ;;                                      \
247         mov  ar.fir=r16;                        \
248         ld8  r16=[r2],CTX(UNAT)-CTX(FDR);       \
249         ;;                                      \
250         mov  ar.fdr=r16;                        \
251         ld8  r16=[r2],CTX(FPSR)-CTX(UNAT);      \
252         ;;                                      \
253         mov  ar.unat=r16;                       \
254         ld8  r16=[r2],CTX(PFS)-CTX(FPSR);       \
255         ;;                                      \
256         mov  ar.fpsr=r16;                       \
257         ld8  r16=[r2],CTX(LC)-CTX(PFS);         \
258         ;;                                      \
259         mov  ar.pfs=r16;                        \
260         ld8  r16=[r2];                          \
261         ;;                                      \
262         mov  ar.lc=r16;                         \
263         ;;
264
265         /*
266          *      r32:            context_t base address
267          */
268 #define SAVE_CTL_REGS                           \
269         add     r2 = CTX(DCR),r32;              \
270         mov     r16 = cr.dcr;                   \
271         ;;                                      \
272         st8     [r2] = r16,CTX(IVA)-CTX(DCR);   \
273         ;;                                      \
274         mov     r16 = cr.iva;                   \
275         ;;                                      \
276         st8     [r2] = r16,CTX(PTA)-CTX(IVA);   \
277         ;;                                      \
278         mov r16 = cr.pta;                       \
279         ;;                                      \
280         st8 [r2] = r16 ;                        \
281         ;;
282
283         /*
284          *      r33:            context_t base address
285          */
286 #define RESTORE_CTL_REGS                                \
287         add     r2 = CTX(DCR),r33;                      \
288         ;;                                              \
289         ld8     r16 = [r2],CTX(IVA)-CTX(DCR);           \
290         ;;                                              \
291         mov     cr.dcr = r16;                           \
292         dv_serialize_data;                              \
293         ;;                                              \
294         ld8     r16 = [r2],CTX(PTA)-CTX(IVA);           \
295         ;;                                              \
296         mov     cr.iva = r16;                           \
297         dv_serialize_data;                              \
298         ;;                                              \
299         ld8 r16 = [r2];                                 \
300         ;;                                              \
301         mov cr.pta = r16;                               \
302         dv_serialize_data;                              \
303         ;;
304
305
306         /*
307          *      r32:            context_t base address
308          */
309 #define SAVE_REGION_REGS                        \
310         add     r2=CTX(RR0),r32;                \
311         mov     r16=rr[r0];                     \
312         dep.z   r18=1,61,3;                     \
313         ;;                                      \
314         st8     [r2]=r16,8;                     \
315         mov     r17=rr[r18];                    \
316         dep.z   r18=2,61,3;                     \
317         ;;                                      \
318         st8     [r2]=r17,8;                     \
319         mov     r16=rr[r18];                    \
320         dep.z   r18=3,61,3;                     \
321         ;;                                      \
322         st8     [r2]=r16,8;                     \
323         mov     r17=rr[r18];                    \
324         dep.z   r18=4,61,3;                     \
325         ;;                                      \
326         st8     [r2]=r17,8;                     \
327         mov     r16=rr[r18];                    \
328         dep.z   r18=5,61,3;                     \
329         ;;                                      \
330         st8     [r2]=r16,8;                     \
331         mov     r17=rr[r18];                    \
332         dep.z   r18=7,61,3;                     \
333         ;;                                      \
334         st8     [r2]=r17,16;                    \
335         mov     r16=rr[r18];                    \
336         ;;                                      \
337         st8     [r2]=r16,8;                     \
338         ;;
339
340         /*
341          *      r33:context_t base address
342          */
343 #define RESTORE_REGION_REGS     \
344         add     r2=CTX(RR0),r33;\
345         mov r18=r0;             \
346         ;;                      \
347         ld8     r20=[r2],8;     \
348         ;;      /* rr0 */       \
349         ld8     r21=[r2],8;     \
350         ;;      /* rr1 */       \
351         ld8     r22=[r2],8;     \
352         ;;      /* rr2 */       \
353         ld8     r23=[r2],8;     \
354         ;;      /* rr3 */       \
355         ld8     r24=[r2],8;     \
356         ;;      /* rr4 */       \
357         ld8     r25=[r2],16;    \
358         ;;      /* rr5 */       \
359         ld8     r27=[r2];       \
360         ;;      /* rr7 */       \
361         mov rr[r18]=r20;        \
362         dep.z   r18=1,61,3;     \
363         ;;  /* rr1 */           \
364         mov rr[r18]=r21;        \
365         dep.z   r18=2,61,3;     \
366         ;;  /* rr2 */           \
367         mov rr[r18]=r22;        \
368         dep.z   r18=3,61,3;     \
369         ;;  /* rr3 */           \
370         mov rr[r18]=r23;        \
371         dep.z   r18=4,61,3;     \
372         ;;  /* rr4 */           \
373         mov rr[r18]=r24;        \
374         dep.z   r18=5,61,3;     \
375         ;;  /* rr5 */           \
376         mov rr[r18]=r25;        \
377         dep.z   r18=7,61,3;     \
378         ;;  /* rr7 */           \
379         mov rr[r18]=r27;        \
380         ;;                      \
381         srlz.i;                 \
382         ;;
383
384
385
386         /*
387          *      r32:    context_t base address
388          *      r36~r39:scratch registers
389          */
390 #define SAVE_DEBUG_REGS                         \
391         add     r2=CTX(IBR0),r32;               \
392         add     r3=CTX(DBR0),r32;               \
393         mov     r16=ibr[r0];                    \
394         mov     r17=dbr[r0];                    \
395         ;;                                      \
396         st8     [r2]=r16,8;                     \
397         st8     [r3]=r17,8;                     \
398         add     r18=1,r0;                       \
399         ;;                                      \
400         mov     r16=ibr[r18];                   \
401         mov     r17=dbr[r18];                   \
402         ;;                                      \
403         st8     [r2]=r16,8;                     \
404         st8     [r3]=r17,8;                     \
405         add     r18=2,r0;                       \
406         ;;                                      \
407         mov     r16=ibr[r18];                   \
408         mov     r17=dbr[r18];                   \
409         ;;                                      \
410         st8     [r2]=r16,8;                     \
411         st8     [r3]=r17,8;                     \
412         add     r18=2,r0;                       \
413         ;;                                      \
414         mov     r16=ibr[r18];                   \
415         mov     r17=dbr[r18];                   \
416         ;;                                      \
417         st8     [r2]=r16,8;                     \
418         st8     [r3]=r17,8;                     \
419         add     r18=3,r0;                       \
420         ;;                                      \
421         mov     r16=ibr[r18];                   \
422         mov     r17=dbr[r18];                   \
423         ;;                                      \
424         st8     [r2]=r16,8;                     \
425         st8     [r3]=r17,8;                     \
426         add     r18=4,r0;                       \
427         ;;                                      \
428         mov     r16=ibr[r18];                   \
429         mov     r17=dbr[r18];                   \
430         ;;                                      \
431         st8     [r2]=r16,8;                     \
432         st8     [r3]=r17,8;                     \
433         add     r18=5,r0;                       \
434         ;;                                      \
435         mov     r16=ibr[r18];                   \
436         mov     r17=dbr[r18];                   \
437         ;;                                      \
438         st8     [r2]=r16,8;                     \
439         st8     [r3]=r17,8;                     \
440         add     r18=6,r0;                       \
441         ;;                                      \
442         mov     r16=ibr[r18];                   \
443         mov     r17=dbr[r18];                   \
444         ;;                                      \
445         st8     [r2]=r16,8;                     \
446         st8     [r3]=r17,8;                     \
447         add     r18=7,r0;                       \
448         ;;                                      \
449         mov     r16=ibr[r18];                   \
450         mov     r17=dbr[r18];                   \
451         ;;                                      \
452         st8     [r2]=r16,8;                     \
453         st8     [r3]=r17,8;                     \
454         ;;
455
456
457 /*
458  *      r33:    point to context_t structure
459  *      ar.lc are corrupted.
460  */
461 #define RESTORE_DEBUG_REGS                      \
462         add     r2=CTX(IBR0),r33;               \
463         add     r3=CTX(DBR0),r33;               \
464         mov r16=7;                              \
465         mov r17=r0;                             \
466         ;;                                      \
467         mov ar.lc = r16;                        \
468         ;;                                      \
469 1:                                              \
470         ld8 r18=[r2],8;                         \
471         ld8 r19=[r3],8;                         \
472         ;;                                      \
473         mov ibr[r17]=r18;                       \
474         mov dbr[r17]=r19;                       \
475         ;;                                      \
476         srlz.i;                                 \
477         ;;                                      \
478         add r17=1,r17;                          \
479         br.cloop.sptk 1b;                       \
480         ;;
481
482
483         /*
484          *      r32:            context_t base address
485          */
486 #define SAVE_FPU_LOW                            \
487         add     r2=CTX(F2),r32;                 \
488         add     r3=CTX(F3),r32;                 \
489         ;;                                      \
490         stf.spill.nta   [r2]=f2,32;             \
491         stf.spill.nta   [r3]=f3,32;             \
492         ;;                                      \
493         stf.spill.nta   [r2]=f4,32;             \
494         stf.spill.nta   [r3]=f5,32;             \
495         ;;                                      \
496         stf.spill.nta   [r2]=f6,32;             \
497         stf.spill.nta   [r3]=f7,32;             \
498         ;;                                      \
499         stf.spill.nta   [r2]=f8,32;             \
500         stf.spill.nta   [r3]=f9,32;             \
501         ;;                                      \
502         stf.spill.nta   [r2]=f10,32;            \
503         stf.spill.nta   [r3]=f11,32;            \
504         ;;                                      \
505         stf.spill.nta   [r2]=f12,32;            \
506         stf.spill.nta   [r3]=f13,32;            \
507         ;;                                      \
508         stf.spill.nta   [r2]=f14,32;            \
509         stf.spill.nta   [r3]=f15,32;            \
510         ;;                                      \
511         stf.spill.nta   [r2]=f16,32;            \
512         stf.spill.nta   [r3]=f17,32;            \
513         ;;                                      \
514         stf.spill.nta   [r2]=f18,32;            \
515         stf.spill.nta   [r3]=f19,32;            \
516         ;;                                      \
517         stf.spill.nta   [r2]=f20,32;            \
518         stf.spill.nta   [r3]=f21,32;            \
519         ;;                                      \
520         stf.spill.nta   [r2]=f22,32;            \
521         stf.spill.nta   [r3]=f23,32;            \
522         ;;                                      \
523         stf.spill.nta   [r2]=f24,32;            \
524         stf.spill.nta   [r3]=f25,32;            \
525         ;;                                      \
526         stf.spill.nta   [r2]=f26,32;            \
527         stf.spill.nta   [r3]=f27,32;            \
528         ;;                                      \
529         stf.spill.nta   [r2]=f28,32;            \
530         stf.spill.nta   [r3]=f29,32;            \
531         ;;                                      \
532         stf.spill.nta   [r2]=f30;               \
533         stf.spill.nta   [r3]=f31;               \
534         ;;
535
536         /*
537          *      r32:            context_t base address
538          */
539 #define SAVE_FPU_HIGH                           \
540         add     r2=CTX(F32),r32;                \
541         add     r3=CTX(F33),r32;                \
542         ;;                                      \
543         stf.spill.nta   [r2]=f32,32;            \
544         stf.spill.nta   [r3]=f33,32;            \
545         ;;                                      \
546         stf.spill.nta   [r2]=f34,32;            \
547         stf.spill.nta   [r3]=f35,32;            \
548         ;;                                      \
549         stf.spill.nta   [r2]=f36,32;            \
550         stf.spill.nta   [r3]=f37,32;            \
551         ;;                                      \
552         stf.spill.nta   [r2]=f38,32;            \
553         stf.spill.nta   [r3]=f39,32;            \
554         ;;                                      \
555         stf.spill.nta   [r2]=f40,32;            \
556         stf.spill.nta   [r3]=f41,32;            \
557         ;;                                      \
558         stf.spill.nta   [r2]=f42,32;            \
559         stf.spill.nta   [r3]=f43,32;            \
560         ;;                                      \
561         stf.spill.nta   [r2]=f44,32;            \
562         stf.spill.nta   [r3]=f45,32;            \
563         ;;                                      \
564         stf.spill.nta   [r2]=f46,32;            \
565         stf.spill.nta   [r3]=f47,32;            \
566         ;;                                      \
567         stf.spill.nta   [r2]=f48,32;            \
568         stf.spill.nta   [r3]=f49,32;            \
569         ;;                                      \
570         stf.spill.nta   [r2]=f50,32;            \
571         stf.spill.nta   [r3]=f51,32;            \
572         ;;                                      \
573         stf.spill.nta   [r2]=f52,32;            \
574         stf.spill.nta   [r3]=f53,32;            \
575         ;;                                      \
576         stf.spill.nta   [r2]=f54,32;            \
577         stf.spill.nta   [r3]=f55,32;            \
578         ;;                                      \
579         stf.spill.nta   [r2]=f56,32;            \
580         stf.spill.nta   [r3]=f57,32;            \
581         ;;                                      \
582         stf.spill.nta   [r2]=f58,32;            \
583         stf.spill.nta   [r3]=f59,32;            \
584         ;;                                      \
585         stf.spill.nta   [r2]=f60,32;            \
586         stf.spill.nta   [r3]=f61,32;            \
587         ;;                                      \
588         stf.spill.nta   [r2]=f62,32;            \
589         stf.spill.nta   [r3]=f63,32;            \
590         ;;                                      \
591         stf.spill.nta   [r2]=f64,32;            \
592         stf.spill.nta   [r3]=f65,32;            \
593         ;;                                      \
594         stf.spill.nta   [r2]=f66,32;            \
595         stf.spill.nta   [r3]=f67,32;            \
596         ;;                                      \
597         stf.spill.nta   [r2]=f68,32;            \
598         stf.spill.nta   [r3]=f69,32;            \
599         ;;                                      \
600         stf.spill.nta   [r2]=f70,32;            \
601         stf.spill.nta   [r3]=f71,32;            \
602         ;;                                      \
603         stf.spill.nta   [r2]=f72,32;            \
604         stf.spill.nta   [r3]=f73,32;            \
605         ;;                                      \
606         stf.spill.nta   [r2]=f74,32;            \
607         stf.spill.nta   [r3]=f75,32;            \
608         ;;                                      \
609         stf.spill.nta   [r2]=f76,32;            \
610         stf.spill.nta   [r3]=f77,32;            \
611         ;;                                      \
612         stf.spill.nta   [r2]=f78,32;            \
613         stf.spill.nta   [r3]=f79,32;            \
614         ;;                                      \
615         stf.spill.nta   [r2]=f80,32;            \
616         stf.spill.nta   [r3]=f81,32;            \
617         ;;                                      \
618         stf.spill.nta   [r2]=f82,32;            \
619         stf.spill.nta   [r3]=f83,32;            \
620         ;;                                      \
621         stf.spill.nta   [r2]=f84,32;            \
622         stf.spill.nta   [r3]=f85,32;            \
623         ;;                                      \
624         stf.spill.nta   [r2]=f86,32;            \
625         stf.spill.nta   [r3]=f87,32;            \
626         ;;                                      \
627         stf.spill.nta   [r2]=f88,32;            \
628         stf.spill.nta   [r3]=f89,32;            \
629         ;;                                      \
630         stf.spill.nta   [r2]=f90,32;            \
631         stf.spill.nta   [r3]=f91,32;            \
632         ;;                                      \
633         stf.spill.nta   [r2]=f92,32;            \
634         stf.spill.nta   [r3]=f93,32;            \
635         ;;                                      \
636         stf.spill.nta   [r2]=f94,32;            \
637         stf.spill.nta   [r3]=f95,32;            \
638         ;;                                      \
639         stf.spill.nta   [r2]=f96,32;            \
640         stf.spill.nta   [r3]=f97,32;            \
641         ;;                                      \
642         stf.spill.nta   [r2]=f98,32;            \
643         stf.spill.nta   [r3]=f99,32;            \
644         ;;                                      \
645         stf.spill.nta   [r2]=f100,32;           \
646         stf.spill.nta   [r3]=f101,32;           \
647         ;;                                      \
648         stf.spill.nta   [r2]=f102,32;           \
649         stf.spill.nta   [r3]=f103,32;           \
650         ;;                                      \
651         stf.spill.nta   [r2]=f104,32;           \
652         stf.spill.nta   [r3]=f105,32;           \
653         ;;                                      \
654         stf.spill.nta   [r2]=f106,32;           \
655         stf.spill.nta   [r3]=f107,32;           \
656         ;;                                      \
657         stf.spill.nta   [r2]=f108,32;           \
658         stf.spill.nta   [r3]=f109,32;           \
659         ;;                                      \
660         stf.spill.nta   [r2]=f110,32;           \
661         stf.spill.nta   [r3]=f111,32;           \
662         ;;                                      \
663         stf.spill.nta   [r2]=f112,32;           \
664         stf.spill.nta   [r3]=f113,32;           \
665         ;;                                      \
666         stf.spill.nta   [r2]=f114,32;           \
667         stf.spill.nta   [r3]=f115,32;           \
668         ;;                                      \
669         stf.spill.nta   [r2]=f116,32;           \
670         stf.spill.nta   [r3]=f117,32;           \
671         ;;                                      \
672         stf.spill.nta   [r2]=f118,32;           \
673         stf.spill.nta   [r3]=f119,32;           \
674         ;;                                      \
675         stf.spill.nta   [r2]=f120,32;           \
676         stf.spill.nta   [r3]=f121,32;           \
677         ;;                                      \
678         stf.spill.nta   [r2]=f122,32;           \
679         stf.spill.nta   [r3]=f123,32;           \
680         ;;                                      \
681         stf.spill.nta   [r2]=f124,32;           \
682         stf.spill.nta   [r3]=f125,32;           \
683         ;;                                      \
684         stf.spill.nta   [r2]=f126;              \
685         stf.spill.nta   [r3]=f127;              \
686         ;;
687
688      /*
689       *      r33:    point to context_t structure
690       */
691 #define RESTORE_FPU_LOW                         \
692     add     r2 = CTX(F2), r33;                  \
693     add     r3 = CTX(F3), r33;                  \
694     ;;                                          \
695     ldf.fill.nta f2 = [r2], 32;                 \
696     ldf.fill.nta f3 = [r3], 32;                 \
697     ;;                                          \
698     ldf.fill.nta f4 = [r2], 32;                 \
699     ldf.fill.nta f5 = [r3], 32;                 \
700     ;;                                          \
701     ldf.fill.nta f6 = [r2], 32;                 \
702     ldf.fill.nta f7 = [r3], 32;                 \
703     ;;                                          \
704     ldf.fill.nta f8 = [r2], 32;                 \
705     ldf.fill.nta f9 = [r3], 32;                 \
706     ;;                                          \
707     ldf.fill.nta f10 = [r2], 32;                \
708     ldf.fill.nta f11 = [r3], 32;                \
709     ;;                                          \
710     ldf.fill.nta f12 = [r2], 32;                \
711     ldf.fill.nta f13 = [r3], 32;                \
712     ;;                                          \
713     ldf.fill.nta f14 = [r2], 32;                \
714     ldf.fill.nta f15 = [r3], 32;                \
715     ;;                                          \
716     ldf.fill.nta f16 = [r2], 32;                \
717     ldf.fill.nta f17 = [r3], 32;                \
718     ;;                                          \
719     ldf.fill.nta f18 = [r2], 32;                \
720     ldf.fill.nta f19 = [r3], 32;                \
721     ;;                                          \
722     ldf.fill.nta f20 = [r2], 32;                \
723     ldf.fill.nta f21 = [r3], 32;                \
724     ;;                                          \
725     ldf.fill.nta f22 = [r2], 32;                \
726     ldf.fill.nta f23 = [r3], 32;                \
727     ;;                                          \
728     ldf.fill.nta f24 = [r2], 32;                \
729     ldf.fill.nta f25 = [r3], 32;                \
730     ;;                                          \
731     ldf.fill.nta f26 = [r2], 32;                \
732     ldf.fill.nta f27 = [r3], 32;                \
733         ;;                                      \
734     ldf.fill.nta f28 = [r2], 32;                \
735     ldf.fill.nta f29 = [r3], 32;                \
736     ;;                                          \
737     ldf.fill.nta f30 = [r2], 32;                \
738     ldf.fill.nta f31 = [r3], 32;                \
739     ;;
740
741
742
743     /*
744      *      r33:    point to context_t structure
745      */
746 #define RESTORE_FPU_HIGH                        \
747     add     r2 = CTX(F32), r33;                 \
748     add     r3 = CTX(F33), r33;                 \
749     ;;                                          \
750     ldf.fill.nta f32 = [r2], 32;                \
751     ldf.fill.nta f33 = [r3], 32;                \
752     ;;                                          \
753     ldf.fill.nta f34 = [r2], 32;                \
754     ldf.fill.nta f35 = [r3], 32;                \
755     ;;                                          \
756     ldf.fill.nta f36 = [r2], 32;                \
757     ldf.fill.nta f37 = [r3], 32;                \
758     ;;                                          \
759     ldf.fill.nta f38 = [r2], 32;                \
760     ldf.fill.nta f39 = [r3], 32;                \
761     ;;                                          \
762     ldf.fill.nta f40 = [r2], 32;                \
763     ldf.fill.nta f41 = [r3], 32;                \
764     ;;                                          \
765     ldf.fill.nta f42 = [r2], 32;                \
766     ldf.fill.nta f43 = [r3], 32;                \
767     ;;                                          \
768     ldf.fill.nta f44 = [r2], 32;                \
769     ldf.fill.nta f45 = [r3], 32;                \
770     ;;                                          \
771     ldf.fill.nta f46 = [r2], 32;                \
772     ldf.fill.nta f47 = [r3], 32;                \
773     ;;                                          \
774     ldf.fill.nta f48 = [r2], 32;                \
775     ldf.fill.nta f49 = [r3], 32;                \
776     ;;                                          \
777     ldf.fill.nta f50 = [r2], 32;                \
778     ldf.fill.nta f51 = [r3], 32;                \
779     ;;                                          \
780     ldf.fill.nta f52 = [r2], 32;                \
781     ldf.fill.nta f53 = [r3], 32;                \
782     ;;                                          \
783     ldf.fill.nta f54 = [r2], 32;                \
784     ldf.fill.nta f55 = [r3], 32;                \
785     ;;                                          \
786     ldf.fill.nta f56 = [r2], 32;                \
787     ldf.fill.nta f57 = [r3], 32;                \
788     ;;                                          \
789     ldf.fill.nta f58 = [r2], 32;                \
790     ldf.fill.nta f59 = [r3], 32;                \
791     ;;                                          \
792     ldf.fill.nta f60 = [r2], 32;                \
793     ldf.fill.nta f61 = [r3], 32;                \
794     ;;                                          \
795     ldf.fill.nta f62 = [r2], 32;                \
796     ldf.fill.nta f63 = [r3], 32;                \
797     ;;                                          \
798     ldf.fill.nta f64 = [r2], 32;                \
799     ldf.fill.nta f65 = [r3], 32;                \
800     ;;                                          \
801     ldf.fill.nta f66 = [r2], 32;                \
802     ldf.fill.nta f67 = [r3], 32;                \
803     ;;                                          \
804     ldf.fill.nta f68 = [r2], 32;                \
805     ldf.fill.nta f69 = [r3], 32;                \
806     ;;                                          \
807     ldf.fill.nta f70 = [r2], 32;                \
808     ldf.fill.nta f71 = [r3], 32;                \
809     ;;                                          \
810     ldf.fill.nta f72 = [r2], 32;                \
811     ldf.fill.nta f73 = [r3], 32;                \
812     ;;                                          \
813     ldf.fill.nta f74 = [r2], 32;                \
814     ldf.fill.nta f75 = [r3], 32;                \
815     ;;                                          \
816     ldf.fill.nta f76 = [r2], 32;                \
817     ldf.fill.nta f77 = [r3], 32;                \
818     ;;                                          \
819     ldf.fill.nta f78 = [r2], 32;                \
820     ldf.fill.nta f79 = [r3], 32;                \
821     ;;                                          \
822     ldf.fill.nta f80 = [r2], 32;                \
823     ldf.fill.nta f81 = [r3], 32;                \
824     ;;                                          \
825     ldf.fill.nta f82 = [r2], 32;                \
826     ldf.fill.nta f83 = [r3], 32;                \
827     ;;                                          \
828     ldf.fill.nta f84 = [r2], 32;                \
829     ldf.fill.nta f85 = [r3], 32;                \
830     ;;                                          \
831     ldf.fill.nta f86 = [r2], 32;                \
832     ldf.fill.nta f87 = [r3], 32;                \
833     ;;                                          \
834     ldf.fill.nta f88 = [r2], 32;                \
835     ldf.fill.nta f89 = [r3], 32;                \
836     ;;                                          \
837     ldf.fill.nta f90 = [r2], 32;                \
838     ldf.fill.nta f91 = [r3], 32;                \
839     ;;                                          \
840     ldf.fill.nta f92 = [r2], 32;                \
841     ldf.fill.nta f93 = [r3], 32;                \
842     ;;                                          \
843     ldf.fill.nta f94 = [r2], 32;                \
844     ldf.fill.nta f95 = [r3], 32;                \
845     ;;                                          \
846     ldf.fill.nta f96 = [r2], 32;                \
847     ldf.fill.nta f97 = [r3], 32;                \
848     ;;                                          \
849     ldf.fill.nta f98 = [r2], 32;                \
850     ldf.fill.nta f99 = [r3], 32;                \
851     ;;                                          \
852     ldf.fill.nta f100 = [r2], 32;               \
853     ldf.fill.nta f101 = [r3], 32;               \
854     ;;                                          \
855     ldf.fill.nta f102 = [r2], 32;               \
856     ldf.fill.nta f103 = [r3], 32;               \
857     ;;                                          \
858     ldf.fill.nta f104 = [r2], 32;               \
859     ldf.fill.nta f105 = [r3], 32;               \
860     ;;                                          \
861     ldf.fill.nta f106 = [r2], 32;               \
862     ldf.fill.nta f107 = [r3], 32;               \
863     ;;                                          \
864     ldf.fill.nta f108 = [r2], 32;               \
865     ldf.fill.nta f109 = [r3], 32;               \
866     ;;                                          \
867     ldf.fill.nta f110 = [r2], 32;               \
868     ldf.fill.nta f111 = [r3], 32;               \
869     ;;                                          \
870     ldf.fill.nta f112 = [r2], 32;               \
871     ldf.fill.nta f113 = [r3], 32;               \
872     ;;                                          \
873     ldf.fill.nta f114 = [r2], 32;               \
874     ldf.fill.nta f115 = [r3], 32;               \
875     ;;                                          \
876     ldf.fill.nta f116 = [r2], 32;               \
877     ldf.fill.nta f117 = [r3], 32;               \
878     ;;                                          \
879     ldf.fill.nta f118 = [r2], 32;               \
880     ldf.fill.nta f119 = [r3], 32;               \
881     ;;                                          \
882     ldf.fill.nta f120 = [r2], 32;               \
883     ldf.fill.nta f121 = [r3], 32;               \
884     ;;                                          \
885     ldf.fill.nta f122 = [r2], 32;               \
886     ldf.fill.nta f123 = [r3], 32;               \
887     ;;                                          \
888     ldf.fill.nta f124 = [r2], 32;               \
889     ldf.fill.nta f125 = [r3], 32;               \
890     ;;                                          \
891     ldf.fill.nta f126 = [r2], 32;               \
892     ldf.fill.nta f127 = [r3], 32;               \
893     ;;
894
895         /*
896          *      r32:            context_t base address
897          */
898 #define SAVE_PTK_REGS                           \
899     add r2=CTX(PKR0), r32;                      \
900     mov r16=7;                                  \
901     ;;                                          \
902     mov ar.lc=r16;                              \
903     mov r17=r0;                                 \
904     ;;                                          \
905 1:                                              \
906     mov r18=pkr[r17];                           \
907     ;;                                          \
908     srlz.i;                                     \
909     ;;                                          \
910     st8 [r2]=r18, 8;                            \
911     ;;                                          \
912     add r17 =1,r17;                             \
913     ;;                                          \
914     br.cloop.sptk 1b;                           \
915     ;;
916
917 /*
918  *      r33:    point to context_t structure
919  *      ar.lc are corrupted.
920  */
921 #define RESTORE_PTK_REGS                        \
922     add r2=CTX(PKR0), r33;                      \
923     mov r16=7;                                  \
924     ;;                                          \
925     mov ar.lc=r16;                              \
926     mov r17=r0;                                 \
927     ;;                                          \
928 1:                                              \
929     ld8 r18=[r2], 8;                            \
930     ;;                                          \
931     mov pkr[r17]=r18;                           \
932     ;;                                          \
933     srlz.i;                                     \
934     ;;                                          \
935     add r17 =1,r17;                             \
936     ;;                                          \
937     br.cloop.sptk 1b;                           \
938     ;;
939
940
941 /*
942  * void vmm_trampoline( context_t * from,
943  *                      context_t * to)
944  *
945  *      from:   r32
946  *      to:     r33
947  *  note: interrupt disabled before call this function.
948  */
949 GLOBAL_ENTRY(vmm_trampoline)
950     mov r16 = psr
951     adds r2 = CTX(PSR), r32
952     ;;
953     st8 [r2] = r16, 8       // psr
954     mov r17 = pr
955     ;;
956     st8 [r2] = r17, 8       // pr
957     mov r18 = ar.unat
958     ;;
959     st8 [r2] = r18
960     mov r17 = ar.rsc
961     ;;
962     adds r2 = CTX(RSC),r32
963     ;;
964     st8 [r2]= r17
965     mov ar.rsc =0
966     flushrs
967     ;;
968     SAVE_GENERAL_REGS
969     ;;
970     SAVE_KERNEL_REGS
971     ;;
972     SAVE_APP_REGS
973     ;;
974     SAVE_BRANCH_REGS
975     ;;
976     SAVE_CTL_REGS
977     ;;
978     SAVE_REGION_REGS
979     ;;
980     //SAVE_DEBUG_REGS
981     ;;
982     rsm  psr.dfl
983     ;;
984     srlz.d
985     ;;
986     SAVE_FPU_LOW
987     ;;
988     rsm  psr.dfh
989     ;;
990     srlz.d
991     ;;
992     SAVE_FPU_HIGH
993     ;;
994     SAVE_PTK_REGS
995     ;;
996     RESTORE_PTK_REGS
997     ;;
998     RESTORE_FPU_HIGH
999     ;;
1000     RESTORE_FPU_LOW
1001     ;;
1002     //RESTORE_DEBUG_REGS
1003     ;;
1004     RESTORE_REGION_REGS
1005     ;;
1006     RESTORE_CTL_REGS
1007     ;;
1008     RESTORE_BRANCH_REGS
1009     ;;
1010     RESTORE_APP_REGS
1011     ;;
1012     RESTORE_KERNEL_REGS
1013     ;;
1014     RESTORE_GENERAL_REGS
1015     ;;
1016     adds r2=CTX(PSR), r33
1017     ;;
1018     ld8 r16=[r2], 8       // psr
1019     ;;
1020     mov psr.l=r16
1021     ;;
1022     srlz.d
1023     ;;
1024     ld8 r16=[r2], 8       // pr
1025     ;;
1026     mov pr =r16,-1
1027     ld8 r16=[r2]       // unat
1028     ;;
1029     mov ar.unat=r16
1030     ;;
1031     adds r2=CTX(RSC),r33
1032     ;;
1033     ld8 r16 =[r2]
1034     ;;
1035     mov ar.rsc = r16
1036     ;;
1037     br.ret.sptk.few b0
1038 END(vmm_trampoline)