1 /* visemul.c: Emulation of VIS instructions.
3 * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
5 #include <linux/kernel.h>
6 #include <linux/errno.h>
7 #include <linux/thread_info.h>
9 #include <asm/ptrace.h>
10 #include <asm/pstate.h>
11 #include <asm/system.h>
12 #include <asm/fpumacro.h>
13 #include <asm/uaccess.h>
15 /* OPF field of various VIS instructions. */
17 /* 000111011 - four 16-bit packs */
18 #define FPACK16_OPF 0x03b
20 /* 000111010 - two 32-bit packs */
21 #define FPACK32_OPF 0x03a
23 /* 000111101 - four 16-bit packs */
24 #define FPACKFIX_OPF 0x03d
26 /* 001001101 - four 16-bit expands */
27 #define FEXPAND_OPF 0x04d
29 /* 001001011 - two 32-bit merges */
30 #define FPMERGE_OPF 0x04b
32 /* 000110001 - 8-by-16-bit partitoned product */
33 #define FMUL8x16_OPF 0x031
35 /* 000110011 - 8-by-16-bit upper alpha partitioned product */
36 #define FMUL8x16AU_OPF 0x033
38 /* 000110101 - 8-by-16-bit lower alpha partitioned product */
39 #define FMUL8x16AL_OPF 0x035
41 /* 000110110 - upper 8-by-16-bit partitioned product */
42 #define FMUL8SUx16_OPF 0x036
44 /* 000110111 - lower 8-by-16-bit partitioned product */
45 #define FMUL8ULx16_OPF 0x037
47 /* 000111000 - upper 8-by-16-bit partitioned product */
48 #define FMULD8SUx16_OPF 0x038
50 /* 000111001 - lower unsigned 8-by-16-bit partitioned product */
51 #define FMULD8ULx16_OPF 0x039
53 /* 000101000 - four 16-bit compare; set rd if src1 > src2 */
54 #define FCMPGT16_OPF 0x028
56 /* 000101100 - two 32-bit compare; set rd if src1 > src2 */
57 #define FCMPGT32_OPF 0x02c
59 /* 000100000 - four 16-bit compare; set rd if src1 <= src2 */
60 #define FCMPLE16_OPF 0x020
62 /* 000100100 - two 32-bit compare; set rd if src1 <= src2 */
63 #define FCMPLE32_OPF 0x024
65 /* 000100010 - four 16-bit compare; set rd if src1 != src2 */
66 #define FCMPNE16_OPF 0x022
68 /* 000100110 - two 32-bit compare; set rd if src1 != src2 */
69 #define FCMPNE32_OPF 0x026
71 /* 000101010 - four 16-bit compare; set rd if src1 == src2 */
72 #define FCMPEQ16_OPF 0x02a
74 /* 000101110 - two 32-bit compare; set rd if src1 == src2 */
75 #define FCMPEQ32_OPF 0x02e
77 /* 000000000 - Eight 8-bit edge boundary processing */
78 #define EDGE8_OPF 0x000
80 /* 000000001 - Eight 8-bit edge boundary processing, no CC */
81 #define EDGE8N_OPF 0x001
83 /* 000000010 - Eight 8-bit edge boundary processing, little-endian */
84 #define EDGE8L_OPF 0x002
86 /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */
87 #define EDGE8LN_OPF 0x003
89 /* 000000100 - Four 16-bit edge boundary processing */
90 #define EDGE16_OPF 0x004
92 /* 000000101 - Four 16-bit edge boundary processing, no CC */
93 #define EDGE16N_OPF 0x005
95 /* 000000110 - Four 16-bit edge boundary processing, little-endian */
96 #define EDGE16L_OPF 0x006
98 /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */
99 #define EDGE16LN_OPF 0x007
101 /* 000001000 - Two 32-bit edge boundary processing */
102 #define EDGE32_OPF 0x008
104 /* 000001001 - Two 32-bit edge boundary processing, no CC */
105 #define EDGE32N_OPF 0x009
107 /* 000001010 - Two 32-bit edge boundary processing, little-endian */
108 #define EDGE32L_OPF 0x00a
110 /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */
111 #define EDGE32LN_OPF 0x00b
113 /* 000111110 - distance between 8 8-bit components */
114 #define PDIST_OPF 0x03e
116 /* 000010000 - convert 8-bit 3-D address to blocked byte address */
117 #define ARRAY8_OPF 0x010
119 /* 000010010 - convert 16-bit 3-D address to blocked byte address */
120 #define ARRAY16_OPF 0x012
122 /* 000010100 - convert 32-bit 3-D address to blocked byte address */
123 #define ARRAY32_OPF 0x014
125 /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */
126 #define BMASK_OPF 0x019
128 /* 001001100 - Permute bytes as specified by GSR.MASK */
129 #define BSHUFFLE_OPF 0x04c
131 #define VIS_OPCODE_MASK ((0x3 << 30) | (0x3f << 19))
132 #define VIS_OPCODE_VAL ((0x2 << 30) | (0x36 << 19))
134 #define VIS_OPF_SHIFT 5
135 #define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT)
137 #define RS1(INSN) (((INSN) >> 24) & 0x1f)
138 #define RS2(INSN) (((INSN) >> 0) & 0x1f)
139 #define RD(INSN) (((INSN) >> 25) & 0x1f)
141 static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
142 unsigned int rd, int from_kernel)
144 if (rs2 >= 16 || rs1 >= 16 || rd >= 16) {
145 if (from_kernel != 0)
146 __asm__ __volatile__("flushw");
152 static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
157 return (!reg ? 0 : regs->u_regs[reg]);
158 if (regs->tstate & TSTATE_PRIV) {
159 struct reg_window *win;
160 win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
161 value = win->locals[reg - 16];
162 } else if (test_thread_flag(TIF_32BIT)) {
163 struct reg_window32 __user *win32;
164 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
165 get_user(value, &win32->locals[reg - 16]);
167 struct reg_window __user *win;
168 win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
169 get_user(value, &win->locals[reg - 16]);
174 static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
175 struct pt_regs *regs)
178 BUG_ON(regs->tstate & TSTATE_PRIV);
180 if (test_thread_flag(TIF_32BIT)) {
181 struct reg_window32 __user *win32;
182 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
183 return (unsigned long __user *)&win32->locals[reg - 16];
185 struct reg_window __user *win;
186 win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
187 return &win->locals[reg - 16];
191 static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg,
192 struct pt_regs *regs)
195 BUG_ON(regs->tstate & TSTATE_PRIV);
197 return ®s->u_regs[reg];
200 static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd)
203 unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs);
207 unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
209 if (test_thread_flag(TIF_32BIT))
210 __put_user((u32)val, (u32 __user *)rd_user);
212 __put_user(val, rd_user);
216 static inline unsigned long fpd_regval(struct fpustate *f,
217 unsigned int insn_regnum)
219 insn_regnum = (((insn_regnum & 1) << 5) |
220 (insn_regnum & 0x1e));
222 return *(unsigned long *) &f->regs[insn_regnum];
225 static inline unsigned long *fpd_regaddr(struct fpustate *f,
226 unsigned int insn_regnum)
228 insn_regnum = (((insn_regnum & 1) << 5) |
229 (insn_regnum & 0x1e));
231 return (unsigned long *) &f->regs[insn_regnum];
234 static inline unsigned int fps_regval(struct fpustate *f,
235 unsigned int insn_regnum)
237 return f->regs[insn_regnum];
240 static inline unsigned int *fps_regaddr(struct fpustate *f,
241 unsigned int insn_regnum)
243 return &f->regs[insn_regnum];
249 struct edge_tab edge8_tab[8] = {
259 struct edge_tab edge8_tab_l[8] = {
269 struct edge_tab edge16_tab[4] = {
275 struct edge_tab edge16_tab_l[4] = {
281 struct edge_tab edge32_tab[2] = {
285 struct edge_tab edge32_tab_l[2] = {
290 static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf)
292 unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val;
295 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
296 orig_rs1 = rs1 = fetch_reg(RS1(insn), regs);
297 orig_rs2 = rs2 = fetch_reg(RS2(insn), regs);
299 if (test_thread_flag(TIF_32BIT)) {
300 rs1 = rs1 & 0xffffffff;
301 rs2 = rs2 & 0xffffffff;
307 left = edge8_tab[rs1 & 0x7].left;
308 right = edge8_tab[rs2 & 0x7].right;
312 left = edge8_tab_l[rs1 & 0x7].left;
313 right = edge8_tab_l[rs2 & 0x7].right;
318 left = edge16_tab[(rs1 >> 1) & 0x3].left;
319 right = edge16_tab[(rs2 >> 1) & 0x3].right;
324 left = edge16_tab_l[(rs1 >> 1) & 0x3].left;
325 right = edge16_tab_l[(rs2 >> 1) & 0x3].right;
330 left = edge32_tab[(rs1 >> 2) & 0x1].left;
331 right = edge32_tab[(rs2 >> 2) & 0x1].right;
336 left = edge32_tab_l[(rs1 >> 2) & 0x1].left;
337 right = edge32_tab_l[(rs2 >> 2) & 0x1].right;
341 if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL))
342 rd_val = right & left;
346 store_reg(regs, rd_val, RD(insn));
355 unsigned long ccr, tstate;
357 __asm__ __volatile__("subcc %1, %2, %%g0\n\t"
360 : "r" (orig_rs1), "r" (orig_rs2)
362 tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC);
363 regs->tstate = tstate | (ccr << 32UL);
368 static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf)
370 unsigned long rs1, rs2, rd_val;
371 unsigned int bits, bits_mask;
373 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
374 rs1 = fetch_reg(RS1(insn), regs);
375 rs2 = fetch_reg(RS2(insn), regs);
377 bits = (rs2 > 5 ? 5 : rs2);
378 bits_mask = (1UL << bits) - 1UL;
380 rd_val = ((((rs1 >> 11) & 0x3) << 0) |
381 (((rs1 >> 33) & 0x3) << 2) |
382 (((rs1 >> 55) & 0x1) << 4) |
383 (((rs1 >> 13) & 0xf) << 5) |
384 (((rs1 >> 35) & 0xf) << 9) |
385 (((rs1 >> 56) & 0xf) << 13) |
386 (((rs1 >> 17) & bits_mask) << 17) |
387 (((rs1 >> 39) & bits_mask) << (17 + bits)) |
388 (((rs1 >> 60) & 0xf) << (17 + (2*bits))));
399 store_reg(regs, rd_val, RD(insn));
402 static void bmask(struct pt_regs *regs, unsigned int insn)
404 unsigned long rs1, rs2, rd_val, gsr;
406 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
407 rs1 = fetch_reg(RS1(insn), regs);
408 rs2 = fetch_reg(RS2(insn), regs);
411 store_reg(regs, rd_val, RD(insn));
413 gsr = current_thread_info()->gsr[0] & 0xffffffff;
414 gsr |= rd_val << 32UL;
415 current_thread_info()->gsr[0] = gsr;
418 static void bshuffle(struct pt_regs *regs, unsigned int insn)
420 struct fpustate *f = FPUSTATE;
421 unsigned long rs1, rs2, rd_val;
422 unsigned long bmask, i;
424 bmask = current_thread_info()->gsr[0] >> 32UL;
426 rs1 = fpd_regval(f, RS1(insn));
427 rs2 = fpd_regval(f, RS2(insn));
430 for (i = 0; i < 8; i++) {
431 unsigned long which = (bmask >> (i * 4)) & 0xf;
435 byte = (rs1 >> (which * 8)) & 0xff;
437 byte = (rs2 >> ((which-8)*8)) & 0xff;
438 rd_val |= (byte << (i * 8));
441 *fpd_regaddr(f, RD(insn)) = rd_val;
444 static void pdist(struct pt_regs *regs, unsigned int insn)
446 struct fpustate *f = FPUSTATE;
447 unsigned long rs1, rs2, *rd, rd_val;
450 rs1 = fpd_regval(f, RS1(insn));
451 rs2 = fpd_regval(f, RS1(insn));
452 rd = fpd_regaddr(f, RD(insn));
456 for (i = 0; i < 8; i++) {
459 s1 = (rs1 >> (56 - (i * 8))) & 0xff;
460 s2 = (rs2 >> (56 - (i * 8))) & 0xff;
462 /* Absolute value of difference. */
473 static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf)
475 struct fpustate *f = FPUSTATE;
476 unsigned long rs1, rs2, gsr, scale, rd_val;
478 gsr = current_thread_info()->gsr[0];
479 scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f);
484 rs2 = fpd_regval(f, RS2(insn));
486 for (byte = 0; byte < 4; byte++) {
488 s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL;
489 int scaled = src << scale;
490 int from_fixed = scaled >> 7;
492 val = ((from_fixed < 0) ?
497 rd_val |= (val << (8 * byte));
499 *fps_regaddr(f, RD(insn)) = rd_val;
506 rs1 = fpd_regval(f, RS1(insn));
507 rs2 = fpd_regval(f, RS2(insn));
508 rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL);
509 for (word = 0; word < 2; word++) {
511 s32 src = (rs2 >> (word * 32UL));
512 s64 scaled = src << scale;
513 s64 from_fixed = scaled >> 23;
515 val = ((from_fixed < 0) ?
520 rd_val |= (val << (32 * word));
522 *fpd_regaddr(f, RD(insn)) = rd_val;
529 rs2 = fpd_regval(f, RS2(insn));
532 for (word = 0; word < 2; word++) {
534 s32 src = (rs2 >> (word * 32UL));
535 s64 scaled = src << scale;
536 s64 from_fixed = scaled >> 16;
538 val = ((from_fixed < -32768) ?
540 (from_fixed > 32767) ?
543 rd_val |= ((val & 0xffff) << (word * 16));
545 *fps_regaddr(f, RD(insn)) = rd_val;
552 rs2 = fps_regval(f, RS2(insn));
555 for (byte = 0; byte < 4; byte++) {
557 u8 src = (rs2 >> (byte * 8)) & 0xff;
561 rd_val |= (val << (byte * 16));
563 *fpd_regaddr(f, RD(insn)) = rd_val;
568 rs1 = fps_regval(f, RS1(insn));
569 rs2 = fps_regval(f, RS2(insn));
571 rd_val = (((rs2 & 0x000000ff) << 0) |
572 ((rs1 & 0x000000ff) << 8) |
573 ((rs2 & 0x0000ff00) << 8) |
574 ((rs1 & 0x0000ff00) << 16) |
575 ((rs2 & 0x00ff0000) << 16) |
576 ((rs1 & 0x00ff0000) << 24) |
577 ((rs2 & 0xff000000) << 24) |
578 ((rs1 & 0xff000000) << 32));
579 *fpd_regaddr(f, RD(insn)) = rd_val;
585 static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf)
587 struct fpustate *f = FPUSTATE;
588 unsigned long rs1, rs2, rd_val;
594 rs1 = fps_regval(f, RS1(insn));
595 rs2 = fpd_regval(f, RS2(insn));
598 for (byte = 0; byte < 4; byte++) {
599 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
600 s16 src2 = (rs2 >> (byte * 16)) & 0xffff;
601 u32 prod = src1 * src2;
602 u16 scaled = ((prod & 0x00ffff00) >> 8);
607 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
610 *fpd_regaddr(f, RD(insn)) = rd_val;
615 case FMUL8x16AL_OPF: {
619 rs1 = fps_regval(f, RS1(insn));
620 rs2 = fps_regval(f, RS2(insn));
623 src2 = (rs2 >> (opf == FMUL8x16AU_OPF) ? 16 : 0);
624 for (byte = 0; byte < 4; byte++) {
625 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
626 u32 prod = src1 * src2;
627 u16 scaled = ((prod & 0x00ffff00) >> 8);
632 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
635 *fpd_regaddr(f, RD(insn)) = rd_val;
640 case FMUL8ULx16_OPF: {
641 unsigned long byte, ushift;
643 rs1 = fpd_regval(f, RS1(insn));
644 rs2 = fpd_regval(f, RS2(insn));
647 ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0;
648 for (byte = 0; byte < 4; byte++) {
654 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
655 src2 = ((rs2 >> (16 * byte)) & 0xffff);
657 scaled = ((prod & 0x00ffff00) >> 8);
662 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
665 *fpd_regaddr(f, RD(insn)) = rd_val;
669 case FMULD8SUx16_OPF:
670 case FMULD8ULx16_OPF: {
671 unsigned long byte, ushift;
673 rs1 = fps_regval(f, RS1(insn));
674 rs2 = fps_regval(f, RS2(insn));
677 ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0;
678 for (byte = 0; byte < 2; byte++) {
684 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
685 src2 = ((rs2 >> (16 * byte)) & 0xffff);
687 scaled = ((prod & 0x00ffff00) >> 8);
692 rd_val |= ((scaled & 0xffffUL) <<
693 ((byte * 32UL) + 7UL));
695 *fpd_regaddr(f, RD(insn)) = rd_val;
701 static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
703 struct fpustate *f = FPUSTATE;
704 unsigned long rs1, rs2, rd_val, i;
706 rs1 = fpd_regval(f, RS1(insn));
707 rs2 = fpd_regval(f, RS2(insn));
713 for (i = 0; i < 4; i++) {
714 s16 a = (rs1 >> (i * 16)) & 0xffff;
715 s16 b = (rs2 >> (i * 16)) & 0xffff;
723 for (i = 0; i < 2; i++) {
724 s32 a = (rs1 >> (i * 32)) & 0xffff;
725 s32 b = (rs2 >> (i * 32)) & 0xffff;
733 for (i = 0; i < 4; i++) {
734 s16 a = (rs1 >> (i * 16)) & 0xffff;
735 s16 b = (rs2 >> (i * 16)) & 0xffff;
743 for (i = 0; i < 2; i++) {
744 s32 a = (rs1 >> (i * 32)) & 0xffff;
745 s32 b = (rs2 >> (i * 32)) & 0xffff;
753 for (i = 0; i < 4; i++) {
754 s16 a = (rs1 >> (i * 16)) & 0xffff;
755 s16 b = (rs2 >> (i * 16)) & 0xffff;
763 for (i = 0; i < 2; i++) {
764 s32 a = (rs1 >> (i * 32)) & 0xffff;
765 s32 b = (rs2 >> (i * 32)) & 0xffff;
773 for (i = 0; i < 4; i++) {
774 s16 a = (rs1 >> (i * 16)) & 0xffff;
775 s16 b = (rs2 >> (i * 16)) & 0xffff;
783 for (i = 0; i < 2; i++) {
784 s32 a = (rs1 >> (i * 32)) & 0xffff;
785 s32 b = (rs2 >> (i * 32)) & 0xffff;
793 maybe_flush_windows(0, 0, RD(insn), 0);
794 store_reg(regs, rd_val, RD(insn));
797 /* Emulate the VIS instructions which are not implemented in
798 * hardware on Niagara.
800 int vis_emul(struct pt_regs *regs, unsigned int insn)
802 unsigned long pc = regs->tpc;
805 BUG_ON(regs->tstate & TSTATE_PRIV);
807 if (test_thread_flag(TIF_32BIT))
810 if (get_user(insn, (u32 __user *) pc))
813 if ((insn & VIS_OPCODE_MASK) != VIS_OPCODE_VAL)
816 opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT;
821 /* Pixel Formatting Instructions. */
827 pformat(regs, insn, opf);
830 /* Partitioned Multiply Instructions */
836 case FMULD8SUx16_OPF:
837 case FMULD8ULx16_OPF:
838 pmul(regs, insn, opf);
841 /* Pixel Compare Instructions */
850 pcmp(regs, insn, opf);
853 /* Edge Handling Instructions */
866 edge(regs, insn, opf);
869 /* Pixel Component Distance */
874 /* Three-Dimensional Array Addressing Instructions */
878 array(regs, insn, opf);
881 /* Byte Mask and Shuffle Instructions */
887 bshuffle(regs, insn);
891 regs->tpc = regs->tnpc;