randr12: fix dpms, detect, destroy, save and restore for multiple encoders per connector
[nouveau] / src / nv30_shaders.c
1 /*
2  * Copyright 2007 Nouveau Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20  * SOFTWARE.
21  */
22
23
24 #include "nv30_shaders.h"
25
26 void
27 NV30_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
28 {
29         NVPtr pNv = NVPTR(pScrn);
30         struct nouveau_channel *chan = pNv->chan;
31         struct nouveau_grobj *rankine = pNv->Nv3D;
32         static struct nouveau_bo *fp_mem = NULL;
33         static int next_hw_id_offset = 0;
34
35         if (!fp_mem) {
36                 if (nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_PIN,
37                                    0, 0x1000, &fp_mem)) {
38                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
39                                         "Couldn't alloc fragprog buffer!\n");
40                         return;
41                 }
42
43                 if (nouveau_bo_map(fp_mem, NOUVEAU_BO_RDWR)) {
44                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
45                                    "Couldn't map fragprog buffer!\n");
46                 }
47         }
48
49         if (!shader->hw_id) {
50                 uint32_t *map = fp_mem->map + next_hw_id_offset;
51                 int i;
52
53                 for (i = 0; i < shader->size; i++) {
54                         uint32_t data = shader->data[i];
55 #if (X_BYTE_ORDER != X_LITTLE_ENDIAN)
56                         data = ((data >> 16) | ((data & 0xffff) << 16));
57 #endif
58                         map[i] = data;
59                 }
60
61                 shader->hw_id += next_hw_id_offset;
62                 next_hw_id_offset += (shader->size * sizeof(uint32_t));
63                 next_hw_id_offset = (next_hw_id_offset + 63) & ~63;
64         }
65
66         BEGIN_RING(chan, rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
67         OUT_RELOC (chan, fp_mem, shader->hw_id, NOUVEAU_BO_VRAM |
68                    NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
69                    NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
70                    NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
71
72         BEGIN_RING(chan, rankine, NV34TCL_FP_REG_CONTROL, 1);
73         OUT_RING  (chan, (1 << 16)| 0xf);
74         BEGIN_RING(chan, rankine, NV34TCL_MULTISAMPLE_CONTROL, 1);
75         OUT_RING  (chan, 0xffff0000);
76
77         BEGIN_RING(chan, rankine, NV34TCL_FP_CONTROL,1);
78         OUT_RING  (chan, (shader->card_priv.NV30FP.num_regs-1)/2);
79 }
80
81
82
83 void
84 NV40_LoadVtxProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
85 {
86         NVPtr pNv = NVPTR(pScrn);
87         struct nouveau_channel *chan = pNv->chan;
88         struct nouveau_grobj *curie = pNv->Nv3D;
89         static int next_hw_id = 0;
90         int i;
91
92         if (!shader->hw_id) {
93                 shader->hw_id = next_hw_id;
94
95                 BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
96                 OUT_RING  (chan, (shader->hw_id));
97                 for (i=0; i<shader->size; i+=4) {
98                         BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_INST(0), 4);
99                         OUT_RING  (chan, shader->data[i + 0]);
100                         OUT_RING  (chan, shader->data[i + 1]);
101                         OUT_RING  (chan, shader->data[i + 2]);
102                         OUT_RING  (chan, shader->data[i + 3]);
103                         next_hw_id++;
104                 }
105         }
106
107         BEGIN_RING(chan, curie, NV40TCL_VP_START_FROM_ID, 1);
108         OUT_RING  (chan, (shader->hw_id));
109
110         BEGIN_RING(chan, curie, NV40TCL_VP_ATTRIB_EN, 2);
111         OUT_RING  (chan, shader->card_priv.NV30VP.vp_in_reg);
112         OUT_RING  (chan, shader->card_priv.NV30VP.vp_out_reg);
113 }
114
115 void
116 NV40_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
117 {
118         NVPtr pNv = NVPTR(pScrn);
119         struct nouveau_channel *chan = pNv->chan;
120         struct nouveau_grobj *curie = pNv->Nv3D;
121         static struct nouveau_bo *fp_mem = NULL;
122         static int next_hw_id_offset = 0;
123
124         if (!fp_mem) {
125                 if (nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
126                                 0, 0x1000, &fp_mem)) {
127                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
128                                 "Couldn't alloc fragprog buffer!\n");
129                         return;
130                 }
131
132                 if (nouveau_bo_map(fp_mem, NOUVEAU_BO_RDWR)) {
133                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
134                                    "Couldn't map fragprog buffer!\n");
135                 }
136         }
137
138         if (!shader->hw_id) {
139                 uint32_t *map = fp_mem->map + next_hw_id_offset;
140                 int i;
141
142                 for (i = 0; i < shader->size; i++) {
143                         uint32_t data = shader->data[i];
144 #if (X_BYTE_ORDER != X_LITTLE_ENDIAN)
145                         data = ((data >> 16) | ((data & 0xffff) << 16));
146 #endif
147                         map[i] = data;
148                 }
149
150                 shader->hw_id = next_hw_id_offset;
151                 next_hw_id_offset += (shader->size * sizeof(uint32_t));
152                 next_hw_id_offset = (next_hw_id_offset + 63) & ~63;
153         }
154
155         BEGIN_RING(chan, curie, NV40TCL_FP_ADDRESS, 1);
156         OUT_RELOC (chan, fp_mem, shader->hw_id, NOUVEAU_BO_VRAM |
157                          NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
158                          NOUVEAU_BO_OR,
159                          NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1);
160         BEGIN_RING(chan, curie, NV40TCL_FP_CONTROL, 1);
161         OUT_RING  (chan, shader->card_priv.NV30FP.num_regs <<
162                          NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT);
163 }
164
165 /*******************************************************************************
166  * NV40/G70 vertex shaders
167  */
168
169 nv_shader_t nv40_vp_exa_render = {
170         .card_priv.NV30VP.vp_in_reg  = 0x00000309,
171         .card_priv.NV30VP.vp_out_reg = 0x0000c001,
172         .size = (3*4),
173         .data = {
174                 /* MOV result.position, vertex.position */
175                 0x40041c6c, 0x0040000d, 0x8106c083, 0x6041ff80,
176                 /* MOV result.texcoord[0], vertex.texcoord[0] */
177                 0x401f9c6c, 0x0040080d, 0x8106c083, 0x6041ff9c,
178                 /* MOV result.texcoord[1], vertex.texcoord[1] */
179                 0x401f9c6c, 0x0040090d, 0x8106c083, 0x6041ffa1,
180         }
181 };
182
183 /*******************************************************************************
184  * NV30/NV40/G70 fragment shaders
185  */
186
187 nv_shader_t nv30_fp_pass_col0 = {
188         .card_priv.NV30FP.num_regs = 2,
189         .size = (1*4),
190         .data = {
191                 /* MOV R0, fragment.color */
192                 0x01403e81, 0x1c9dc801, 0x0001c800, 0x3fe1c800, 
193         }
194 };
195
196 nv_shader_t nv30_fp_pass_tex0 = {
197         .card_priv.NV30FP.num_regs = 2,
198         .size = (2*4),
199         .data = {
200                 /* TEX R0, fragment.texcoord[0], texture[0], 2D */
201                 0x17009e00, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
202                 /* MOV R0, R0 */
203                 0x01401e81, 0x1c9dc800, 0x0001c800, 0x0001c800,
204         }
205 };
206
207 nv_shader_t nv30_fp_composite_mask = {
208         .card_priv.NV30FP.num_regs = 2,
209         .size = (3*4),
210         .data = {
211                 /* TEXC0 R1.w         , fragment.texcoord[1], texture[1], 2D */
212                 0x1702b102, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
213                 /* TEX   R0 (NE0.wwww), fragment.texcoord[0], texture[0], 2D */
214                 0x17009e00, 0x1ff5c801, 0x0001c800, 0x3fe1c800,
215                 /* MUL   R0           , R0, R1.w */
216                 0x02001e81, 0x1c9dc800, 0x0001fe04, 0x0001c800,
217         }
218 };
219
220 nv_shader_t nv30_fp_composite_mask_sa_ca = {
221         .card_priv.NV30FP.num_regs = 2,
222         .size = (3*4),
223         .data = {
224                 /* TEXC0 R1.w         , fragment.texcoord[0], texture[0], 2D */
225                 0x17009102, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
226                 /* TEX   R0 (NE0.wwww), fragment.texcoord[1], texture[1], 2D */
227                 0x1702be00, 0x1ff5c801, 0x0001c800, 0x3fe1c800,
228                 /* MUL   R0           , R1,wwww, R0 */
229                 0x02001e81, 0x1c9dfe04, 0x0001c800, 0x0001c800,
230         }
231 };
232
233 nv_shader_t nv30_fp_composite_mask_ca = {
234         .card_priv.NV30FP.num_regs = 2,
235         .size = (3*4),
236         .data = {
237                 /* TEXC0 R0           , fragment.texcoord[0], texture[0], 2D */
238                 0x17009f00, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
239                 /* TEX   R1 (NE0.xyzw), fragment.texcoord[1], texture[1], 2D */
240                 0x1702be02, 0x1c95c801, 0x0001c800, 0x3fe1c800,
241                 /* MUL   R0           , R0, R1 */
242                 0x02001e81, 0x1c9dc800, 0x0001c804, 0x0001c800,
243         }
244 };
245
246 nv_shader_t nv40_vp_video = {
247         .card_priv.NV30VP.vp_in_reg  = 0x00000309,
248         .card_priv.NV30VP.vp_out_reg = 0x0000c001,
249         .size = (3*4),
250         .data = {
251                 /* MOV result.position, vertex.position */
252                 0x40041c6c, 0x0040000d, 0x8106c083, 0x6041ff80,
253                 /* MOV result.texcoord[0], vertex.texcoord[0] */
254                 0x401f9c6c, 0x0040080d, 0x8106c083, 0x6041ff9c,
255                 /* MOV result.texcoord[1], vertex.texcoord[1] */
256                 0x401f9c6c, 0x0040090d, 0x8106c083, 0x6041ffa1,
257         }
258 };
259
260 nv_shader_t nv40_fp_yv12_bicubic = {
261         .card_priv.NV30FP.num_regs = 4,
262         .size = (29*4),
263         .data = {
264                 /* INST 0: MOVR R0.xy (TR0.xyzw), attrib.texcoord[0] */
265                 0x01008600, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
266                 /* INST 1: ADDR R0.z (TR0.xyzw), R0.yyyy, { 0.50, 0.00, 0.00, 0.00 }.xxxx */
267                 0x03000800, 0x1c9caa00, 0x00000002, 0x0001c800,
268                 0x3f000000, 0x00000000, 0x00000000, 0x00000000,
269                 /* INST 2: ADDR R1.x (TR0.xyzw), R0, { 0.50, 0.00, 0.00, 0.00 }.xxxx */
270                 0x03000202, 0x1c9dc800, 0x00000002, 0x0001c800,
271                 0x3f000000, 0x00000000, 0x00000000, 0x00000000,
272                 /* INST 3: TEXRC0 R1.xyz (TR0.xyzw), R0.zzzz, texture[0] */
273                 0x17000f82, 0x1c9d5400, 0x0001c800, 0x0001c800,
274                 /* INST 4: MULR R2.yw (TR0.xyzw), R1.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */
275                 0x02001404, 0x1c9ca104, 0x0000a002, 0x0001c800,
276                 0xbf800000, 0x3f800000, 0x00000000, 0x00000000,
277                 /* INST 5: TEXR R3.xyz (TR0.xyzw), R1, texture[0] */
278                 0x17000e86, 0x1c9dc804, 0x0001c800, 0x0001c800,
279                 /* INST 6: MULR R2.xz (TR0.xyzw), R3.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */
280                 0x02000a04, 0x1c9ca10c, 0x0000a002, 0x0001c800,
281                 0xbf800000, 0x3f800000, 0x00000000, 0x00000000,
282                 /* INST 7: ADDR R2 (TR0.xyzw), R0.xyxy, R2 */
283                 0x03001e04, 0x1c9c8800, 0x0001c808, 0x0001c800,
284                 /* INST 8: TEXR R1.y (TR0.xyzw), R2.zwzz, -texture[1] */
285                 0x17020402, 0x1c9d5c08, 0x0001c800, 0x0001c800,
286                 /* INST 9: MADH R1.x (TR0.xyzw), -R1.zzzz, R1.yyyy, R1.yyyy */
287                 0x04400282, 0x1c9f5504, 0x0000aa04, 0x0000aa04,
288                 /* INST 10: TEXR R0.y (TR0.xyzw), R2.xwxw, -texture[1] */
289                 0x17020400, 0x1c9d9808, 0x0001c800, 0x0001c800,
290                 /* INST 11: MADH R0.w (TR0.xyzw), -R1.zzzz, R0.yyyy, R0.yyyy */
291                 0x04401080, 0x1c9f5504, 0x0000aa00, 0x0000aa00,
292                 /* INST 12: TEXR R0.x (TR0.xyzw), R2.zyxy, texture[1] */
293                 0x17020200, 0x1c9c8c08, 0x0001c800, 0x0001c800,
294                 /* INST 13: MADH R1.x (TR0.xyzw), R1.zzzz, R0, R1 */
295                 0x04400282, 0x1c9d5504, 0x0001c800, 0x0001c904,
296                 /* INST 14: TEXR R0.x (NE0.zzzz), R2, texture[1] */
297                 0x17020200, 0x1555c808, 0x0001c800, 0x0001c800,
298                 /* INST 15: MADH R0.x (TR0.xyzw), R1.zzzz, R0, R0.wwww */
299                 0x04400280, 0x1c9d5504, 0x0001c800, 0x0001ff00,
300                 /* INST 16: MADH R0.w (TR0.xyzw), -R3.zzzz, R1.xxxx, R1.xxxx */
301                 0x04401080, 0x1c9f550c, 0x00000104, 0x00000104,
302                 /* INST 17: TEXR R0.yz (TR0.xyzw), attrib.texcoord[1], abs(texture[2]) */
303                 0x1704ac80, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
304                 /* INST 18: MADH R0.x (TR0.xyzw), R3.zzzz, R0, R0.wwww */
305                 0x04400280, 0x1c9d550c, 0x0001c900, 0x0001ff00,
306                 /* INST 19: MADH R1.xyz (TR0.xyzw), R0.xxxx, { 1.16, -0.87, 0.53, -1.08 }.xxxx, { 1.16, -0.87, 0.53, -1.08 }.yzww */
307                 0x04400e82, 0x1c9c0100, 0x00000002, 0x0001f202,
308                 0x3f9507c8, 0xbf5ee393, 0x3f078fef, 0xbf8a6762,
309                 /* INST 20: MADH R1.xyz (TR0.xyzw), R0.yyyy, { 0.00, -0.39, 2.02, 0.00 }, R1 */
310                 0x04400e82, 0x1c9cab00, 0x0001c802, 0x0001c904,
311                 0x00000000, 0xbec890d6, 0x40011687, 0x00000000,
312                 /* INST 21: MADH R0.xyz (TR0.xyzw), R0.zzzz, { 1.60, -0.81, 0.00, 0.00 }, R1 + END */
313                 0x04400e81, 0x1c9d5500, 0x0001c802, 0x0001c904,
314                 0x3fcc432d, 0xbf501a37, 0x00000000, 0x00000000,
315         }
316 };
317
318 nv_shader_t nv30_fp_yv12_bicubic = {
319         .card_priv.NV30FP.num_regs = 4,
320         .size = (24*4),
321         .data = {
322                 /* INST 0: MOVR R2.xy (TR0.xyzw), attrib.texcoord[0] */
323                 0x01008604, 0x1c9dc801, 0x0001c800, 0x0001c800,
324                 /* INST 1: ADDR R0.xy (TR0.xyzw), R2, { 0.50, 0.00, 0.00, 0.00 }.xxxx */
325                 0x03000600, 0x1c9dc808, 0x00000002, 0x0001c800,
326                 0x3f000000, 0x00000000, 0x00000000, 0x00000000,
327                 /* INST 2: TEXR R3.xyz (TR0.xyzw), R0, texture[0] */
328                 0x17000e06, 0x1c9dc800, 0x0001c800, 0x0001c800,
329                 /* INST 3: TEXR R0.xyz (TR0.xyzw), R0.yyyy, texture[0] */
330                 0x17000e00, 0x1c9caa00, 0x0001c800, 0x0001c800,
331                 /* INST 4: MULR R1.xz (TR0.xyzw), R3.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */
332                 0x02000a02, 0x1c9ca00c, 0x0000a002, 0x0001c800,
333                 0xbf800000, 0x3f800000, 0x00000000, 0x00000000,
334                 /* INST 5: MULR R1.yw (TR0.xyzw), R0.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */
335                 0x02001402, 0x1c9ca000, 0x0000a002, 0x0001c800,
336                 0xbf800000, 0x3f800000, 0x00000000, 0x00000000,
337                 /* INST 6: ADDR R2 (TR0.xyzw), R2.xyxy, R1 */
338                 0x03001e04, 0x1c9c8808, 0x0001c804, 0x0001c800,
339                 /* INST 7: TEXR R0.x (TR0.xyzw), R2, texture[1] */
340                 0x17020200, 0x1c9dc808, 0x0001c800, 0x0001c800,
341                 /* INST 8: TEXR R1.y (TR0.xyzw), R2.xwxw, texture[1] */
342                 0x17020402, 0x1c9d9808, 0x0001c800, 0x0001c800,
343                 /* INST 9: TEXR R1.x (TR0.xyzw), R2.zyxy, texture[1] */
344                 0x17020202, 0x1c9c8c08, 0x0001c800, 0x0001c800,
345                 /* INST 10: LRPH R0.x (TR0.xyzw), R0.zzzz, R0, R1.yyyy */
346                 0x1f400280, 0x1c9d5400, 0x0001c800, 0x0000aa04,
347                 /* INST 11: TEXR R0.y (TR0.xyzw), R2.zwzz, texture[1] */
348                 0x17020400, 0x1c9d5c08, 0x0001c800, 0x0001c800,
349                 /* INST 12: LRPH R0.y (TR0.xyzw), R0.zzzz, R1.xxxx, R0 */
350                 0x1f400480, 0x1c9d5400, 0x00000004, 0x0001c800,
351                 /* INST 13: LRPH R0.x (TR0.xyzw), R3.zzzz, R0, R0.yyyy */
352                 0x1f400280, 0x1c9d540c, 0x0001c900, 0x0000ab00,
353                 /* INST 14: MADH R0.xyz (TR0.xyzw), R0.xxxx, { 1.16, -0.87, 0.53, -1.08 }.xxxx, { 1.16, -0.87, 0.53, -1.08 }.yzww */
354                 0x04400e80, 0x1c9c0100, 0x00000002, 0x0001f202,
355                 0x3f9507c8, 0xbf5ee393, 0x3f078fef, 0xbf8a6762,
356                 /* INST 15: TEXR R1.yz (TR0.xyzw), attrib.texcoord[1], abs(texture[2]) */
357                 0x1704ac02, 0x1c9dc801, 0x0001c800, 0x0001c800,
358                 /* INST 16: MADH R0.xyz (TR0.xyzw), R1.yyyy, { 0.00, -0.39, 2.02, 0.00 }, R0 */
359                 0x04400e80, 0x1c9caa04, 0x0001c802, 0x0001c900,
360                 0x00000000, 0xbec890d6, 0x40011687, 0x00000000,
361                 /* INST 17: MADH R0.xyz (TR0.xyzw), R1.zzzz, { 1.60, -0.81, 0.00, 0.00 }, R0 + END */
362                 0x04400e81, 0x1c9d5404, 0x0001c802, 0x0001c900,
363                 0x3fcc432d, 0xbf501a37, 0x00000000, 0x00000000,
364         }
365 };
366
367 nv_shader_t nv30_fp_yv12_bilinear = {
368         .card_priv.NV30FP.num_regs = 2,
369         .size = (8*4),
370         .data = {
371                 /* INST 0: TEXR R0.x (TR0.xyzw), attrib.texcoord[0], abs(texture[1]) */
372                 0x17028200, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
373                 /* INST 1: MADR R1.xyz (TR0.xyzw), R0.xxxx, { 1.16, -0.87, 0.53, -1.08 }.xxxx, { 1.16, -0.87, 0.53, -1.08 }.yzww */
374                 0x04000e02, 0x1c9c0000, 0x00000002, 0x0001f202,
375                 0x3f9507c8, 0xbf5ee393, 0x3f078fef, 0xbf8a6762,
376                 /* INST 2: TEXR R0.yz (TR0.xyzw), attrib.texcoord[1], abs(texture[2]) */
377                 0x1704ac80, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
378                 /* INST 3: MADR R1.xyz (TR0.xyzw), R0.yyyy, { 0.00, -0.39, 2.02, 0.00 }, R1 */
379                 0x04000e02, 0x1c9cab00, 0x0001c802, 0x0001c804,
380                 0x00000000, 0xbec890d6, 0x40011687, 0x00000000,
381                 /* INST 4: MADR R0.xyz (TR0.xyzw), R0.zzzz, { 1.60, -0.81, 0.00, 0.00 }, R1 + END */
382                 0x04000e81, 0x1c9d5500, 0x0001c802, 0x0001c804,
383                 0x3fcc432d, 0xbf501a37, 0x00000000, 0x00000000,
384         }
385 };
386
387