Merge branch 'for-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/dvrabel/uwb
[linux-2.6] / drivers / gpu / drm / radeon / r300.c
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/seq_file.h>
29 #include "drmP.h"
30 #include "drm.h"
31 #include "radeon_reg.h"
32 #include "radeon.h"
33
34 /* r300,r350,rv350,rv370,rv380 depends on : */
35 void r100_hdp_reset(struct radeon_device *rdev);
36 int r100_cp_reset(struct radeon_device *rdev);
37 int r100_rb2d_reset(struct radeon_device *rdev);
38 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
39 int r100_pci_gart_enable(struct radeon_device *rdev);
40 void r100_pci_gart_disable(struct radeon_device *rdev);
41 void r100_mc_setup(struct radeon_device *rdev);
42 void r100_mc_disable_clients(struct radeon_device *rdev);
43 int r100_gui_wait_for_idle(struct radeon_device *rdev);
44 int r100_cs_packet_parse(struct radeon_cs_parser *p,
45                          struct radeon_cs_packet *pkt,
46                          unsigned idx);
47 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
48                               struct radeon_cs_reloc **cs_reloc);
49 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
50                           struct radeon_cs_packet *pkt,
51                           const unsigned *auth, unsigned n,
52                           radeon_packet0_check_t check);
53 void r100_cs_dump_packet(struct radeon_cs_parser *p,
54                          struct radeon_cs_packet *pkt);
55 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
56                                          struct radeon_cs_packet *pkt,
57                                          struct radeon_object *robj);
58
59 /* This files gather functions specifics to:
60  * r300,r350,rv350,rv370,rv380
61  *
62  * Some of these functions might be used by newer ASICs.
63  */
64 void r300_gpu_init(struct radeon_device *rdev);
65 int r300_mc_wait_for_idle(struct radeon_device *rdev);
66 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
67
68
69 /*
70  * rv370,rv380 PCIE GART
71  */
72 void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
73 {
74         uint32_t tmp;
75         int i;
76
77         /* Workaround HW bug do flush 2 times */
78         for (i = 0; i < 2; i++) {
79                 tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
80                 WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp | RADEON_PCIE_TX_GART_INVALIDATE_TLB);
81                 (void)RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
82                 WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
83                 mb();
84         }
85 }
86
87 int rv370_pcie_gart_enable(struct radeon_device *rdev)
88 {
89         uint32_t table_addr;
90         uint32_t tmp;
91         int r;
92
93         /* Initialize common gart structure */
94         r = radeon_gart_init(rdev);
95         if (r) {
96                 return r;
97         }
98         r = rv370_debugfs_pcie_gart_info_init(rdev);
99         if (r) {
100                 DRM_ERROR("Failed to register debugfs file for PCIE gart !\n");
101         }
102         rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
103         r = radeon_gart_table_vram_alloc(rdev);
104         if (r) {
105                 return r;
106         }
107         /* discard memory request outside of configured range */
108         tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
109         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
110         WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, rdev->mc.gtt_location);
111         tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 4096;
112         WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, tmp);
113         WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
114         WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
115         table_addr = rdev->gart.table_addr;
116         WREG32_PCIE(RADEON_PCIE_TX_GART_BASE, table_addr);
117         /* FIXME: setup default page */
118         WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, rdev->mc.vram_location);
119         WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_HI, 0);
120         /* Clear error */
121         WREG32_PCIE(0x18, 0);
122         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
123         tmp |= RADEON_PCIE_TX_GART_EN;
124         tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
125         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
126         rv370_pcie_gart_tlb_flush(rdev);
127         DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
128                  rdev->mc.gtt_size >> 20, table_addr);
129         rdev->gart.ready = true;
130         return 0;
131 }
132
133 void rv370_pcie_gart_disable(struct radeon_device *rdev)
134 {
135         uint32_t tmp;
136
137         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
138         tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
139         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
140         if (rdev->gart.table.vram.robj) {
141                 radeon_object_kunmap(rdev->gart.table.vram.robj);
142                 radeon_object_unpin(rdev->gart.table.vram.robj);
143         }
144 }
145
146 int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
147 {
148         void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
149
150         if (i < 0 || i > rdev->gart.num_gpu_pages) {
151                 return -EINVAL;
152         }
153         addr = (((u32)addr) >> 8) | ((upper_32_bits(addr) & 0xff) << 4) | 0xC;
154         writel(cpu_to_le32(addr), ((void __iomem *)ptr) + (i * 4));
155         return 0;
156 }
157
158 int r300_gart_enable(struct radeon_device *rdev)
159 {
160 #if __OS_HAS_AGP
161         if (rdev->flags & RADEON_IS_AGP) {
162                 if (rdev->family > CHIP_RV350) {
163                         rv370_pcie_gart_disable(rdev);
164                 } else {
165                         r100_pci_gart_disable(rdev);
166                 }
167                 return 0;
168         }
169 #endif
170         if (rdev->flags & RADEON_IS_PCIE) {
171                 rdev->asic->gart_disable = &rv370_pcie_gart_disable;
172                 rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush;
173                 rdev->asic->gart_set_page = &rv370_pcie_gart_set_page;
174                 return rv370_pcie_gart_enable(rdev);
175         }
176         return r100_pci_gart_enable(rdev);
177 }
178
179
180 /*
181  * MC
182  */
183 int r300_mc_init(struct radeon_device *rdev)
184 {
185         int r;
186
187         if (r100_debugfs_rbbm_init(rdev)) {
188                 DRM_ERROR("Failed to register debugfs file for RBBM !\n");
189         }
190
191         r300_gpu_init(rdev);
192         r100_pci_gart_disable(rdev);
193         if (rdev->flags & RADEON_IS_PCIE) {
194                 rv370_pcie_gart_disable(rdev);
195         }
196
197         /* Setup GPU memory space */
198         rdev->mc.vram_location = 0xFFFFFFFFUL;
199         rdev->mc.gtt_location = 0xFFFFFFFFUL;
200         if (rdev->flags & RADEON_IS_AGP) {
201                 r = radeon_agp_init(rdev);
202                 if (r) {
203                         printk(KERN_WARNING "[drm] Disabling AGP\n");
204                         rdev->flags &= ~RADEON_IS_AGP;
205                         rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
206                 } else {
207                         rdev->mc.gtt_location = rdev->mc.agp_base;
208                 }
209         }
210         r = radeon_mc_setup(rdev);
211         if (r) {
212                 return r;
213         }
214
215         /* Program GPU memory space */
216         r100_mc_disable_clients(rdev);
217         if (r300_mc_wait_for_idle(rdev)) {
218                 printk(KERN_WARNING "Failed to wait MC idle while "
219                        "programming pipes. Bad things might happen.\n");
220         }
221         r100_mc_setup(rdev);
222         return 0;
223 }
224
225 void r300_mc_fini(struct radeon_device *rdev)
226 {
227         if (rdev->flags & RADEON_IS_PCIE) {
228                 rv370_pcie_gart_disable(rdev);
229                 radeon_gart_table_vram_free(rdev);
230         } else {
231                 r100_pci_gart_disable(rdev);
232                 radeon_gart_table_ram_free(rdev);
233         }
234         radeon_gart_fini(rdev);
235 }
236
237
238 /*
239  * Fence emission
240  */
241 void r300_fence_ring_emit(struct radeon_device *rdev,
242                           struct radeon_fence *fence)
243 {
244         /* Who ever call radeon_fence_emit should call ring_lock and ask
245          * for enough space (today caller are ib schedule and buffer move) */
246         /* Write SC register so SC & US assert idle */
247         radeon_ring_write(rdev, PACKET0(0x43E0, 0));
248         radeon_ring_write(rdev, 0);
249         radeon_ring_write(rdev, PACKET0(0x43E4, 0));
250         radeon_ring_write(rdev, 0);
251         /* Flush 3D cache */
252         radeon_ring_write(rdev, PACKET0(0x4E4C, 0));
253         radeon_ring_write(rdev, (2 << 0));
254         radeon_ring_write(rdev, PACKET0(0x4F18, 0));
255         radeon_ring_write(rdev, (1 << 0));
256         /* Wait until IDLE & CLEAN */
257         radeon_ring_write(rdev, PACKET0(0x1720, 0));
258         radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
259         /* Emit fence sequence & fire IRQ */
260         radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
261         radeon_ring_write(rdev, fence->seq);
262         radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
263         radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
264 }
265
266
267 /*
268  * Global GPU functions
269  */
270 int r300_copy_dma(struct radeon_device *rdev,
271                   uint64_t src_offset,
272                   uint64_t dst_offset,
273                   unsigned num_pages,
274                   struct radeon_fence *fence)
275 {
276         uint32_t size;
277         uint32_t cur_size;
278         int i, num_loops;
279         int r = 0;
280
281         /* radeon pitch is /64 */
282         size = num_pages << PAGE_SHIFT;
283         num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
284         r = radeon_ring_lock(rdev, num_loops * 4 + 64);
285         if (r) {
286                 DRM_ERROR("radeon: moving bo (%d).\n", r);
287                 return r;
288         }
289         /* Must wait for 2D idle & clean before DMA or hangs might happen */
290         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0 ));
291         radeon_ring_write(rdev, (1 << 16));
292         for (i = 0; i < num_loops; i++) {
293                 cur_size = size;
294                 if (cur_size > 0x1FFFFF) {
295                         cur_size = 0x1FFFFF;
296                 }
297                 size -= cur_size;
298                 radeon_ring_write(rdev, PACKET0(0x720, 2));
299                 radeon_ring_write(rdev, src_offset);
300                 radeon_ring_write(rdev, dst_offset);
301                 radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
302                 src_offset += cur_size;
303                 dst_offset += cur_size;
304         }
305         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
306         radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
307         if (fence) {
308                 r = radeon_fence_emit(rdev, fence);
309         }
310         radeon_ring_unlock_commit(rdev);
311         return r;
312 }
313
314 void r300_ring_start(struct radeon_device *rdev)
315 {
316         unsigned gb_tile_config;
317         int r;
318
319         /* Sub pixel 1/12 so we can have 4K rendering according to doc */
320         gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
321         switch(rdev->num_gb_pipes) {
322         case 2:
323                 gb_tile_config |= R300_PIPE_COUNT_R300;
324                 break;
325         case 3:
326                 gb_tile_config |= R300_PIPE_COUNT_R420_3P;
327                 break;
328         case 4:
329                 gb_tile_config |= R300_PIPE_COUNT_R420;
330                 break;
331         case 1:
332         default:
333                 gb_tile_config |= R300_PIPE_COUNT_RV350;
334                 break;
335         }
336
337         r = radeon_ring_lock(rdev, 64);
338         if (r) {
339                 return;
340         }
341         radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
342         radeon_ring_write(rdev,
343                           RADEON_ISYNC_ANY2D_IDLE3D |
344                           RADEON_ISYNC_ANY3D_IDLE2D |
345                           RADEON_ISYNC_WAIT_IDLEGUI |
346                           RADEON_ISYNC_CPSCRATCH_IDLEGUI);
347         radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
348         radeon_ring_write(rdev, gb_tile_config);
349         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
350         radeon_ring_write(rdev,
351                           RADEON_WAIT_2D_IDLECLEAN |
352                           RADEON_WAIT_3D_IDLECLEAN);
353         radeon_ring_write(rdev, PACKET0(0x170C, 0));
354         radeon_ring_write(rdev, 1 << 31);
355         radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
356         radeon_ring_write(rdev, 0);
357         radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
358         radeon_ring_write(rdev, 0);
359         radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
360         radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
361         radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
362         radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
363         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
364         radeon_ring_write(rdev,
365                           RADEON_WAIT_2D_IDLECLEAN |
366                           RADEON_WAIT_3D_IDLECLEAN);
367         radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
368         radeon_ring_write(rdev, 0);
369         radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
370         radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
371         radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
372         radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
373         radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
374         radeon_ring_write(rdev,
375                           ((6 << R300_MS_X0_SHIFT) |
376                            (6 << R300_MS_Y0_SHIFT) |
377                            (6 << R300_MS_X1_SHIFT) |
378                            (6 << R300_MS_Y1_SHIFT) |
379                            (6 << R300_MS_X2_SHIFT) |
380                            (6 << R300_MS_Y2_SHIFT) |
381                            (6 << R300_MSBD0_Y_SHIFT) |
382                            (6 << R300_MSBD0_X_SHIFT)));
383         radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
384         radeon_ring_write(rdev,
385                           ((6 << R300_MS_X3_SHIFT) |
386                            (6 << R300_MS_Y3_SHIFT) |
387                            (6 << R300_MS_X4_SHIFT) |
388                            (6 << R300_MS_Y4_SHIFT) |
389                            (6 << R300_MS_X5_SHIFT) |
390                            (6 << R300_MS_Y5_SHIFT) |
391                            (6 << R300_MSBD1_SHIFT)));
392         radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
393         radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
394         radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
395         radeon_ring_write(rdev,
396                           R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
397         radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
398         radeon_ring_write(rdev,
399                           R300_GEOMETRY_ROUND_NEAREST |
400                           R300_COLOR_ROUND_NEAREST);
401         radeon_ring_unlock_commit(rdev);
402 }
403
404 void r300_errata(struct radeon_device *rdev)
405 {
406         rdev->pll_errata = 0;
407
408         if (rdev->family == CHIP_R300 &&
409             (RREG32(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) == RADEON_CFG_ATI_REV_A11) {
410                 rdev->pll_errata |= CHIP_ERRATA_R300_CG;
411         }
412 }
413
414 int r300_mc_wait_for_idle(struct radeon_device *rdev)
415 {
416         unsigned i;
417         uint32_t tmp;
418
419         for (i = 0; i < rdev->usec_timeout; i++) {
420                 /* read MC_STATUS */
421                 tmp = RREG32(0x0150);
422                 if (tmp & (1 << 4)) {
423                         return 0;
424                 }
425                 DRM_UDELAY(1);
426         }
427         return -1;
428 }
429
430 void r300_gpu_init(struct radeon_device *rdev)
431 {
432         uint32_t gb_tile_config, tmp;
433
434         r100_hdp_reset(rdev);
435         /* FIXME: rv380 one pipes ? */
436         if ((rdev->family == CHIP_R300) || (rdev->family == CHIP_R350)) {
437                 /* r300,r350 */
438                 rdev->num_gb_pipes = 2;
439         } else {
440                 /* rv350,rv370,rv380 */
441                 rdev->num_gb_pipes = 1;
442         }
443         gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
444         switch (rdev->num_gb_pipes) {
445         case 2:
446                 gb_tile_config |= R300_PIPE_COUNT_R300;
447                 break;
448         case 3:
449                 gb_tile_config |= R300_PIPE_COUNT_R420_3P;
450                 break;
451         case 4:
452                 gb_tile_config |= R300_PIPE_COUNT_R420;
453                 break;
454         default:
455         case 1:
456                 gb_tile_config |= R300_PIPE_COUNT_RV350;
457                 break;
458         }
459         WREG32(R300_GB_TILE_CONFIG, gb_tile_config);
460
461         if (r100_gui_wait_for_idle(rdev)) {
462                 printk(KERN_WARNING "Failed to wait GUI idle while "
463                        "programming pipes. Bad things might happen.\n");
464         }
465
466         tmp = RREG32(0x170C);
467         WREG32(0x170C, tmp | (1 << 31));
468
469         WREG32(R300_RB2D_DSTCACHE_MODE,
470                R300_DC_AUTOFLUSH_ENABLE |
471                R300_DC_DC_DISABLE_IGNORE_PE);
472
473         if (r100_gui_wait_for_idle(rdev)) {
474                 printk(KERN_WARNING "Failed to wait GUI idle while "
475                        "programming pipes. Bad things might happen.\n");
476         }
477         if (r300_mc_wait_for_idle(rdev)) {
478                 printk(KERN_WARNING "Failed to wait MC idle while "
479                        "programming pipes. Bad things might happen.\n");
480         }
481         DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes);
482 }
483
484 int r300_ga_reset(struct radeon_device *rdev)
485 {
486         uint32_t tmp;
487         bool reinit_cp;
488         int i;
489
490         reinit_cp = rdev->cp.ready;
491         rdev->cp.ready = false;
492         for (i = 0; i < rdev->usec_timeout; i++) {
493                 WREG32(RADEON_CP_CSQ_MODE, 0);
494                 WREG32(RADEON_CP_CSQ_CNTL, 0);
495                 WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
496                 (void)RREG32(RADEON_RBBM_SOFT_RESET);
497                 udelay(200);
498                 WREG32(RADEON_RBBM_SOFT_RESET, 0);
499                 /* Wait to prevent race in RBBM_STATUS */
500                 mdelay(1);
501                 tmp = RREG32(RADEON_RBBM_STATUS);
502                 if (tmp & ((1 << 20) | (1 << 26))) {
503                         DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)", tmp);
504                         /* GA still busy soft reset it */
505                         WREG32(0x429C, 0x200);
506                         WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
507                         WREG32(0x43E0, 0);
508                         WREG32(0x43E4, 0);
509                         WREG32(0x24AC, 0);
510                 }
511                 /* Wait to prevent race in RBBM_STATUS */
512                 mdelay(1);
513                 tmp = RREG32(RADEON_RBBM_STATUS);
514                 if (!(tmp & ((1 << 20) | (1 << 26)))) {
515                         break;
516                 }
517         }
518         for (i = 0; i < rdev->usec_timeout; i++) {
519                 tmp = RREG32(RADEON_RBBM_STATUS);
520                 if (!(tmp & ((1 << 20) | (1 << 26)))) {
521                         DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
522                                  tmp);
523                         if (reinit_cp) {
524                                 return r100_cp_init(rdev, rdev->cp.ring_size);
525                         }
526                         return 0;
527                 }
528                 DRM_UDELAY(1);
529         }
530         tmp = RREG32(RADEON_RBBM_STATUS);
531         DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
532         return -1;
533 }
534
535 int r300_gpu_reset(struct radeon_device *rdev)
536 {
537         uint32_t status;
538
539         /* reset order likely matter */
540         status = RREG32(RADEON_RBBM_STATUS);
541         /* reset HDP */
542         r100_hdp_reset(rdev);
543         /* reset rb2d */
544         if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
545                 r100_rb2d_reset(rdev);
546         }
547         /* reset GA */
548         if (status & ((1 << 20) | (1 << 26))) {
549                 r300_ga_reset(rdev);
550         }
551         /* reset CP */
552         status = RREG32(RADEON_RBBM_STATUS);
553         if (status & (1 << 16)) {
554                 r100_cp_reset(rdev);
555         }
556         /* Check if GPU is idle */
557         status = RREG32(RADEON_RBBM_STATUS);
558         if (status & (1 << 31)) {
559                 DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
560                 return -1;
561         }
562         DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
563         return 0;
564 }
565
566
567 /*
568  * r300,r350,rv350,rv380 VRAM info
569  */
570 void r300_vram_info(struct radeon_device *rdev)
571 {
572         uint32_t tmp;
573
574         /* DDR for all card after R300 & IGP */
575         rdev->mc.vram_is_ddr = true;
576         tmp = RREG32(RADEON_MEM_CNTL);
577         if (tmp & R300_MEM_NUM_CHANNELS_MASK) {
578                 rdev->mc.vram_width = 128;
579         } else {
580                 rdev->mc.vram_width = 64;
581         }
582         rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
583
584         rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
585         rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
586 }
587
588
589 /*
590  * Indirect registers accessor
591  */
592 uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
593 {
594         uint32_t r;
595
596         WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
597         (void)RREG32(RADEON_PCIE_INDEX);
598         r = RREG32(RADEON_PCIE_DATA);
599         return r;
600 }
601
602 void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
603 {
604         WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
605         (void)RREG32(RADEON_PCIE_INDEX);
606         WREG32(RADEON_PCIE_DATA, (v));
607         (void)RREG32(RADEON_PCIE_DATA);
608 }
609
610 /*
611  * PCIE Lanes
612  */
613
614 void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes)
615 {
616         uint32_t link_width_cntl, mask;
617
618         if (rdev->flags & RADEON_IS_IGP)
619                 return;
620
621         if (!(rdev->flags & RADEON_IS_PCIE))
622                 return;
623
624         /* FIXME wait for idle */
625
626         switch (lanes) {
627         case 0:
628                 mask = RADEON_PCIE_LC_LINK_WIDTH_X0;
629                 break;
630         case 1:
631                 mask = RADEON_PCIE_LC_LINK_WIDTH_X1;
632                 break;
633         case 2:
634                 mask = RADEON_PCIE_LC_LINK_WIDTH_X2;
635                 break;
636         case 4:
637                 mask = RADEON_PCIE_LC_LINK_WIDTH_X4;
638                 break;
639         case 8:
640                 mask = RADEON_PCIE_LC_LINK_WIDTH_X8;
641                 break;
642         case 12:
643                 mask = RADEON_PCIE_LC_LINK_WIDTH_X12;
644                 break;
645         case 16:
646         default:
647                 mask = RADEON_PCIE_LC_LINK_WIDTH_X16;
648                 break;
649         }
650
651         link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
652
653         if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) ==
654             (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT))
655                 return;
656
657         link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK |
658                              RADEON_PCIE_LC_RECONFIG_NOW |
659                              RADEON_PCIE_LC_RECONFIG_LATER |
660                              RADEON_PCIE_LC_SHORT_RECONFIG_EN);
661         link_width_cntl |= mask;
662         WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
663         WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, (link_width_cntl |
664                                                      RADEON_PCIE_LC_RECONFIG_NOW));
665
666         /* wait for lane set to complete */
667         link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
668         while (link_width_cntl == 0xffffffff)
669                 link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
670
671 }
672
673
674 /*
675  * Debugfs info
676  */
677 #if defined(CONFIG_DEBUG_FS)
678 static int rv370_debugfs_pcie_gart_info(struct seq_file *m, void *data)
679 {
680         struct drm_info_node *node = (struct drm_info_node *) m->private;
681         struct drm_device *dev = node->minor->dev;
682         struct radeon_device *rdev = dev->dev_private;
683         uint32_t tmp;
684
685         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
686         seq_printf(m, "PCIE_TX_GART_CNTL 0x%08x\n", tmp);
687         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_BASE);
688         seq_printf(m, "PCIE_TX_GART_BASE 0x%08x\n", tmp);
689         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_LO);
690         seq_printf(m, "PCIE_TX_GART_START_LO 0x%08x\n", tmp);
691         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_HI);
692         seq_printf(m, "PCIE_TX_GART_START_HI 0x%08x\n", tmp);
693         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_LO);
694         seq_printf(m, "PCIE_TX_GART_END_LO 0x%08x\n", tmp);
695         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_HI);
696         seq_printf(m, "PCIE_TX_GART_END_HI 0x%08x\n", tmp);
697         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_ERROR);
698         seq_printf(m, "PCIE_TX_GART_ERROR 0x%08x\n", tmp);
699         return 0;
700 }
701
702 static struct drm_info_list rv370_pcie_gart_info_list[] = {
703         {"rv370_pcie_gart_info", rv370_debugfs_pcie_gart_info, 0, NULL},
704 };
705 #endif
706
707 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev)
708 {
709 #if defined(CONFIG_DEBUG_FS)
710         return radeon_debugfs_add_files(rdev, rv370_pcie_gart_info_list, 1);
711 #else
712         return 0;
713 #endif
714 }
715
716
717 /*
718  * CS functions
719  */
720 struct r300_cs_track_cb {
721         struct radeon_object    *robj;
722         unsigned                pitch;
723         unsigned                cpp;
724         unsigned                offset;
725 };
726
727 struct r300_cs_track_array {
728         struct radeon_object    *robj;
729         unsigned                esize;
730 };
731
732 struct r300_cs_track_texture {
733         struct radeon_object    *robj;
734         unsigned                pitch;
735         unsigned                width;
736         unsigned                height;
737         unsigned                num_levels;
738         unsigned                cpp;
739         unsigned                tex_coord_type;
740         unsigned                txdepth;
741         unsigned                width_11;
742         unsigned                height_11;
743         bool                    use_pitch;
744         bool                    enabled;
745         bool                    roundup_w;
746         bool                    roundup_h;
747 };
748
749 struct r300_cs_track {
750         unsigned                        num_cb;
751         unsigned                        maxy;
752         unsigned                        vtx_size;
753         unsigned                        vap_vf_cntl;
754         unsigned                        immd_dwords;
755         unsigned                        num_arrays;
756         unsigned                        max_indx;
757         struct r300_cs_track_array      arrays[11];
758         struct r300_cs_track_cb         cb[4];
759         struct r300_cs_track_cb         zb;
760         struct r300_cs_track_texture    textures[16];
761         bool                            z_enabled;
762 };
763
764 static inline void r300_cs_track_texture_print(struct r300_cs_track_texture *t)
765 {
766         DRM_ERROR("pitch                      %d\n", t->pitch);
767         DRM_ERROR("width                      %d\n", t->width);
768         DRM_ERROR("height                     %d\n", t->height);
769         DRM_ERROR("num levels                 %d\n", t->num_levels);
770         DRM_ERROR("depth                      %d\n", t->txdepth);
771         DRM_ERROR("bpp                        %d\n", t->cpp);
772         DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
773         DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
774         DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
775 }
776
777 static inline int r300_cs_track_texture_check(struct radeon_device *rdev,
778                                               struct r300_cs_track *track)
779 {
780         struct radeon_object *robj;
781         unsigned long size;
782         unsigned u, i, w, h;
783
784         for (u = 0; u < 16; u++) {
785                 if (!track->textures[u].enabled)
786                         continue;
787                 robj = track->textures[u].robj;
788                 if (robj == NULL) {
789                         DRM_ERROR("No texture bound to unit %u\n", u);
790                         return -EINVAL;
791                 }
792                 size = 0;
793                 for (i = 0; i <= track->textures[u].num_levels; i++) {
794                         if (track->textures[u].use_pitch) {
795                                 w = track->textures[u].pitch / (1 << i);
796                         } else {
797                                 w = track->textures[u].width / (1 << i);
798                                 if (rdev->family >= CHIP_RV515)
799                                         w |= track->textures[u].width_11;
800                                 if (track->textures[u].roundup_w)
801                                         w = roundup_pow_of_two(w);
802                         }
803                         h = track->textures[u].height / (1 << i);
804                         if (rdev->family >= CHIP_RV515)
805                                 h |= track->textures[u].height_11;
806                         if (track->textures[u].roundup_h)
807                                 h = roundup_pow_of_two(h);
808                         size += w * h;
809                 }
810                 size *= track->textures[u].cpp;
811                 switch (track->textures[u].tex_coord_type) {
812                 case 0:
813                         break;
814                 case 1:
815                         size *= (1 << track->textures[u].txdepth);
816                         break;
817                 case 2:
818                         size *= 6;
819                         break;
820                 default:
821                         DRM_ERROR("Invalid texture coordinate type %u for unit "
822                                   "%u\n", track->textures[u].tex_coord_type, u);
823                         return -EINVAL;
824                 }
825                 if (size > radeon_object_size(robj)) {
826                         DRM_ERROR("Texture of unit %u needs %lu bytes but is "
827                                   "%lu\n", u, size, radeon_object_size(robj));
828                         r300_cs_track_texture_print(&track->textures[u]);
829                         return -EINVAL;
830                 }
831         }
832         return 0;
833 }
834
835 int r300_cs_track_check(struct radeon_device *rdev, struct r300_cs_track *track)
836 {
837         unsigned i;
838         unsigned long size;
839         unsigned prim_walk;
840         unsigned nverts;
841
842         for (i = 0; i < track->num_cb; i++) {
843                 if (track->cb[i].robj == NULL) {
844                         DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
845                         return -EINVAL;
846                 }
847                 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
848                 size += track->cb[i].offset;
849                 if (size > radeon_object_size(track->cb[i].robj)) {
850                         DRM_ERROR("[drm] Buffer too small for color buffer %d "
851                                   "(need %lu have %lu) !\n", i, size,
852                                   radeon_object_size(track->cb[i].robj));
853                         DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
854                                   i, track->cb[i].pitch, track->cb[i].cpp,
855                                   track->cb[i].offset, track->maxy);
856                         return -EINVAL;
857                 }
858         }
859         if (track->z_enabled) {
860                 if (track->zb.robj == NULL) {
861                         DRM_ERROR("[drm] No buffer for z buffer !\n");
862                         return -EINVAL;
863                 }
864                 size = track->zb.pitch * track->zb.cpp * track->maxy;
865                 size += track->zb.offset;
866                 if (size > radeon_object_size(track->zb.robj)) {
867                         DRM_ERROR("[drm] Buffer too small for z buffer "
868                                   "(need %lu have %lu) !\n", size,
869                                   radeon_object_size(track->zb.robj));
870                         return -EINVAL;
871                 }
872         }
873         prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
874         nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
875         switch (prim_walk) {
876         case 1:
877                 for (i = 0; i < track->num_arrays; i++) {
878                         size = track->arrays[i].esize * track->max_indx * 4;
879                         if (track->arrays[i].robj == NULL) {
880                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
881                                           "bound\n", prim_walk, i);
882                                 return -EINVAL;
883                         }
884                         if (size > radeon_object_size(track->arrays[i].robj)) {
885                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
886                                            "have %lu dwords\n", prim_walk, i,
887                                            size >> 2,
888                                            radeon_object_size(track->arrays[i].robj) >> 2);
889                                 DRM_ERROR("Max indices %u\n", track->max_indx);
890                                 return -EINVAL;
891                         }
892                 }
893                 break;
894         case 2:
895                 for (i = 0; i < track->num_arrays; i++) {
896                         size = track->arrays[i].esize * (nverts - 1) * 4;
897                         if (track->arrays[i].robj == NULL) {
898                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
899                                           "bound\n", prim_walk, i);
900                                 return -EINVAL;
901                         }
902                         if (size > radeon_object_size(track->arrays[i].robj)) {
903                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
904                                            "have %lu dwords\n", prim_walk, i, size >> 2,
905                                            radeon_object_size(track->arrays[i].robj) >> 2);
906                                 return -EINVAL;
907                         }
908                 }
909                 break;
910         case 3:
911                 size = track->vtx_size * nverts;
912                 if (size != track->immd_dwords) {
913                         DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
914                                   track->immd_dwords, size);
915                         DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
916                                   nverts, track->vtx_size);
917                         return -EINVAL;
918                 }
919                 break;
920         default:
921                 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
922                           prim_walk);
923                 return -EINVAL;
924         }
925         return r300_cs_track_texture_check(rdev, track);
926 }
927
928 static inline void r300_cs_track_clear(struct r300_cs_track *track)
929 {
930         unsigned i;
931
932         track->num_cb = 4;
933         track->maxy = 4096;
934         for (i = 0; i < track->num_cb; i++) {
935                 track->cb[i].robj = NULL;
936                 track->cb[i].pitch = 8192;
937                 track->cb[i].cpp = 16;
938                 track->cb[i].offset = 0;
939         }
940         track->z_enabled = true;
941         track->zb.robj = NULL;
942         track->zb.pitch = 8192;
943         track->zb.cpp = 4;
944         track->zb.offset = 0;
945         track->vtx_size = 0x7F;
946         track->immd_dwords = 0xFFFFFFFFUL;
947         track->num_arrays = 11;
948         track->max_indx = 0x00FFFFFFUL;
949         for (i = 0; i < track->num_arrays; i++) {
950                 track->arrays[i].robj = NULL;
951                 track->arrays[i].esize = 0x7F;
952         }
953         for (i = 0; i < 16; i++) {
954                 track->textures[i].pitch = 16536;
955                 track->textures[i].width = 16536;
956                 track->textures[i].height = 16536;
957                 track->textures[i].width_11 = 1 << 11;
958                 track->textures[i].height_11 = 1 << 11;
959                 track->textures[i].num_levels = 12;
960                 track->textures[i].txdepth = 16;
961                 track->textures[i].cpp = 64;
962                 track->textures[i].tex_coord_type = 1;
963                 track->textures[i].robj = NULL;
964                 /* CS IB emission code makes sure texture unit are disabled */
965                 track->textures[i].enabled = false;
966                 track->textures[i].roundup_w = true;
967                 track->textures[i].roundup_h = true;
968         }
969 }
970
971 static const unsigned r300_reg_safe_bm[159] = {
972         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
973         0xFFFFFFBF, 0xFFFFFFFF, 0xFFFFFFBF, 0xFFFFFFFF,
974         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
975         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
976         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
977         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
978         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
979         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
980         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
981         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
982         0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF,
983         0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF,
984         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
985         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
986         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F,
987         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
988         0xFFFFFFFF, 0xFFFFEFCE, 0xF00EBFFF, 0x007C0000,
989         0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF,
990         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
991         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
992         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
993         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
994         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
995         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
996         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
997         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
998         0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
999         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1000         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1001         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1002         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1003         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1004         0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
1005         0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF,
1006         0x0000FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
1007         0x00000000, 0x0000C100, 0x00000000, 0x00000000,
1008         0x00000000, 0x00000000, 0x00000000, 0x00000000,
1009         0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF,
1010         0x00000000, 0x00000000, 0x00000000, 0x00000000,
1011         0x0003FC01, 0xFFFFFFF8, 0xFE800B19,
1012 };
1013
1014 static int r300_packet0_check(struct radeon_cs_parser *p,
1015                 struct radeon_cs_packet *pkt,
1016                 unsigned idx, unsigned reg)
1017 {
1018         struct radeon_cs_chunk *ib_chunk;
1019         struct radeon_cs_reloc *reloc;
1020         struct r300_cs_track *track;
1021         volatile uint32_t *ib;
1022         uint32_t tmp;
1023         unsigned i;
1024         int r;
1025
1026         ib = p->ib->ptr;
1027         ib_chunk = &p->chunks[p->chunk_ib_idx];
1028         track = (struct r300_cs_track*)p->track;
1029         switch(reg) {
1030         case RADEON_DST_PITCH_OFFSET:
1031         case RADEON_SRC_PITCH_OFFSET:
1032                 r = r100_cs_packet_next_reloc(p, &reloc);
1033                 if (r) {
1034                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1035                                         idx, reg);
1036                         r100_cs_dump_packet(p, pkt);
1037                         return r;
1038                 }
1039                 tmp = ib_chunk->kdata[idx] & 0x003fffff;
1040                 tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1041                 ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
1042                 break;
1043         case R300_RB3D_COLOROFFSET0:
1044         case R300_RB3D_COLOROFFSET1:
1045         case R300_RB3D_COLOROFFSET2:
1046         case R300_RB3D_COLOROFFSET3:
1047                 i = (reg - R300_RB3D_COLOROFFSET0) >> 2;
1048                 r = r100_cs_packet_next_reloc(p, &reloc);
1049                 if (r) {
1050                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1051                                         idx, reg);
1052                         r100_cs_dump_packet(p, pkt);
1053                         return r;
1054                 }
1055                 track->cb[i].robj = reloc->robj;
1056                 track->cb[i].offset = ib_chunk->kdata[idx];
1057                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1058                 break;
1059         case R300_ZB_DEPTHOFFSET:
1060                 r = r100_cs_packet_next_reloc(p, &reloc);
1061                 if (r) {
1062                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1063                                         idx, reg);
1064                         r100_cs_dump_packet(p, pkt);
1065                         return r;
1066                 }
1067                 track->zb.robj = reloc->robj;
1068                 track->zb.offset = ib_chunk->kdata[idx];
1069                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1070                 break;
1071         case R300_TX_OFFSET_0:
1072         case R300_TX_OFFSET_0+4:
1073         case R300_TX_OFFSET_0+8:
1074         case R300_TX_OFFSET_0+12:
1075         case R300_TX_OFFSET_0+16:
1076         case R300_TX_OFFSET_0+20:
1077         case R300_TX_OFFSET_0+24:
1078         case R300_TX_OFFSET_0+28:
1079         case R300_TX_OFFSET_0+32:
1080         case R300_TX_OFFSET_0+36:
1081         case R300_TX_OFFSET_0+40:
1082         case R300_TX_OFFSET_0+44:
1083         case R300_TX_OFFSET_0+48:
1084         case R300_TX_OFFSET_0+52:
1085         case R300_TX_OFFSET_0+56:
1086         case R300_TX_OFFSET_0+60:
1087                 i = (reg - R300_TX_OFFSET_0) >> 2;
1088                 r = r100_cs_packet_next_reloc(p, &reloc);
1089                 if (r) {
1090                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1091                                         idx, reg);
1092                         r100_cs_dump_packet(p, pkt);
1093                         return r;
1094                 }
1095                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1096                 track->textures[i].robj = reloc->robj;
1097                 break;
1098         /* Tracked registers */
1099         case 0x2084:
1100                 /* VAP_VF_CNTL */
1101                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1102                 break;
1103         case 0x20B4:
1104                 /* VAP_VTX_SIZE */
1105                 track->vtx_size = ib_chunk->kdata[idx] & 0x7F;
1106                 break;
1107         case 0x2134:
1108                 /* VAP_VF_MAX_VTX_INDX */
1109                 track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL;
1110                 break;
1111         case 0x43E4:
1112                 /* SC_SCISSOR1 */
1113                 track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1;
1114                 if (p->rdev->family < CHIP_RV515) {
1115                         track->maxy -= 1440;
1116                 }
1117                 break;
1118         case 0x4E00:
1119                 /* RB3D_CCTL */
1120                 track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1;
1121                 break;
1122         case 0x4E38:
1123         case 0x4E3C:
1124         case 0x4E40:
1125         case 0x4E44:
1126                 /* RB3D_COLORPITCH0 */
1127                 /* RB3D_COLORPITCH1 */
1128                 /* RB3D_COLORPITCH2 */
1129                 /* RB3D_COLORPITCH3 */
1130                 i = (reg - 0x4E38) >> 2;
1131                 track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
1132                 switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
1133                 case 9:
1134                 case 11:
1135                 case 12:
1136                         track->cb[i].cpp = 1;
1137                         break;
1138                 case 3:
1139                 case 4:
1140                 case 13:
1141                 case 15:
1142                         track->cb[i].cpp = 2;
1143                         break;
1144                 case 6:
1145                         track->cb[i].cpp = 4;
1146                         break;
1147                 case 10:
1148                         track->cb[i].cpp = 8;
1149                         break;
1150                 case 7:
1151                         track->cb[i].cpp = 16;
1152                         break;
1153                 default:
1154                         DRM_ERROR("Invalid color buffer format (%d) !\n",
1155                                   ((ib_chunk->kdata[idx] >> 21) & 0xF));
1156                         return -EINVAL;
1157                 }
1158                 break;
1159         case 0x4F00:
1160                 /* ZB_CNTL */
1161                 if (ib_chunk->kdata[idx] & 2) {
1162                         track->z_enabled = true;
1163                 } else {
1164                         track->z_enabled = false;
1165                 }
1166                 break;
1167         case 0x4F10:
1168                 /* ZB_FORMAT */
1169                 switch ((ib_chunk->kdata[idx] & 0xF)) {
1170                 case 0:
1171                 case 1:
1172                         track->zb.cpp = 2;
1173                         break;
1174                 case 2:
1175                         track->zb.cpp = 4;
1176                         break;
1177                 default:
1178                         DRM_ERROR("Invalid z buffer format (%d) !\n",
1179                                   (ib_chunk->kdata[idx] & 0xF));
1180                         return -EINVAL;
1181                 }
1182                 break;
1183         case 0x4F24:
1184                 /* ZB_DEPTHPITCH */
1185                 track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
1186                 break;
1187         case 0x4104:
1188                 for (i = 0; i < 16; i++) {
1189                         bool enabled;
1190
1191                         enabled = !!(ib_chunk->kdata[idx] & (1 << i));
1192                         track->textures[i].enabled = enabled;
1193                 }
1194                 break;
1195         case 0x44C0:
1196         case 0x44C4:
1197         case 0x44C8:
1198         case 0x44CC:
1199         case 0x44D0:
1200         case 0x44D4:
1201         case 0x44D8:
1202         case 0x44DC:
1203         case 0x44E0:
1204         case 0x44E4:
1205         case 0x44E8:
1206         case 0x44EC:
1207         case 0x44F0:
1208         case 0x44F4:
1209         case 0x44F8:
1210         case 0x44FC:
1211                 /* TX_FORMAT1_[0-15] */
1212                 i = (reg - 0x44C0) >> 2;
1213                 tmp = (ib_chunk->kdata[idx] >> 25) & 0x3;
1214                 track->textures[i].tex_coord_type = tmp;
1215                 switch ((ib_chunk->kdata[idx] & 0x1F)) {
1216                 case 0:
1217                 case 2:
1218                 case 5:
1219                 case 18:
1220                 case 20:
1221                 case 21:
1222                         track->textures[i].cpp = 1;
1223                         break;
1224                 case 1:
1225                 case 3:
1226                 case 6:
1227                 case 7:
1228                 case 10:
1229                 case 11:
1230                 case 19:
1231                 case 22:
1232                 case 24:
1233                         track->textures[i].cpp = 2;
1234                         break;
1235                 case 4:
1236                 case 8:
1237                 case 9:
1238                 case 12:
1239                 case 13:
1240                 case 23:
1241                 case 25:
1242                 case 27:
1243                 case 30:
1244                         track->textures[i].cpp = 4;
1245                         break;
1246                 case 14:
1247                 case 26:
1248                 case 28:
1249                         track->textures[i].cpp = 8;
1250                         break;
1251                 case 29:
1252                         track->textures[i].cpp = 16;
1253                         break;
1254                 default:
1255                         DRM_ERROR("Invalid texture format %u\n",
1256                                   (ib_chunk->kdata[idx] & 0x1F));
1257                         return -EINVAL;
1258                         break;
1259                 }
1260                 break;
1261         case 0x4400:
1262         case 0x4404:
1263         case 0x4408:
1264         case 0x440C:
1265         case 0x4410:
1266         case 0x4414:
1267         case 0x4418:
1268         case 0x441C:
1269         case 0x4420:
1270         case 0x4424:
1271         case 0x4428:
1272         case 0x442C:
1273         case 0x4430:
1274         case 0x4434:
1275         case 0x4438:
1276         case 0x443C:
1277                 /* TX_FILTER0_[0-15] */
1278                 i = (reg - 0x4400) >> 2;
1279                 tmp = ib_chunk->kdata[idx] & 0x7;;
1280                 if (tmp == 2 || tmp == 4 || tmp == 6) {
1281                         track->textures[i].roundup_w = false;
1282                 }
1283                 tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;;
1284                 if (tmp == 2 || tmp == 4 || tmp == 6) {
1285                         track->textures[i].roundup_h = false;
1286                 }
1287                 break;
1288         case 0x4500:
1289         case 0x4504:
1290         case 0x4508:
1291         case 0x450C:
1292         case 0x4510:
1293         case 0x4514:
1294         case 0x4518:
1295         case 0x451C:
1296         case 0x4520:
1297         case 0x4524:
1298         case 0x4528:
1299         case 0x452C:
1300         case 0x4530:
1301         case 0x4534:
1302         case 0x4538:
1303         case 0x453C:
1304                 /* TX_FORMAT2_[0-15] */
1305                 i = (reg - 0x4500) >> 2;
1306                 tmp = ib_chunk->kdata[idx] & 0x3FFF;
1307                 track->textures[i].pitch = tmp + 1;
1308                 if (p->rdev->family >= CHIP_RV515) {
1309                         tmp = ((ib_chunk->kdata[idx] >> 15) & 1) << 11;
1310                         track->textures[i].width_11 = tmp;
1311                         tmp = ((ib_chunk->kdata[idx] >> 16) & 1) << 11;
1312                         track->textures[i].height_11 = tmp;
1313                 }
1314                 break;
1315         case 0x4480:
1316         case 0x4484:
1317         case 0x4488:
1318         case 0x448C:
1319         case 0x4490:
1320         case 0x4494:
1321         case 0x4498:
1322         case 0x449C:
1323         case 0x44A0:
1324         case 0x44A4:
1325         case 0x44A8:
1326         case 0x44AC:
1327         case 0x44B0:
1328         case 0x44B4:
1329         case 0x44B8:
1330         case 0x44BC:
1331                 /* TX_FORMAT0_[0-15] */
1332                 i = (reg - 0x4480) >> 2;
1333                 tmp = ib_chunk->kdata[idx] & 0x7FF;
1334                 track->textures[i].width = tmp + 1;
1335                 tmp = (ib_chunk->kdata[idx] >> 11) & 0x7FF;
1336                 track->textures[i].height = tmp + 1;
1337                 tmp = (ib_chunk->kdata[idx] >> 26) & 0xF;
1338                 track->textures[i].num_levels = tmp;
1339                 tmp = ib_chunk->kdata[idx] & (1 << 31);
1340                 track->textures[i].use_pitch = !!tmp;
1341                 tmp = (ib_chunk->kdata[idx] >> 22) & 0xF;
1342                 track->textures[i].txdepth = tmp;
1343                 break;
1344         default:
1345                 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1346                        reg, idx);
1347                 return -EINVAL;
1348         }
1349         return 0;
1350 }
1351
1352 static int r300_packet3_check(struct radeon_cs_parser *p,
1353                               struct radeon_cs_packet *pkt)
1354 {
1355         struct radeon_cs_chunk *ib_chunk;
1356         struct radeon_cs_reloc *reloc;
1357         struct r300_cs_track *track;
1358         volatile uint32_t *ib;
1359         unsigned idx;
1360         unsigned i, c;
1361         int r;
1362
1363         ib = p->ib->ptr;
1364         ib_chunk = &p->chunks[p->chunk_ib_idx];
1365         idx = pkt->idx + 1;
1366         track = (struct r300_cs_track*)p->track;
1367         switch(pkt->opcode) {
1368         case PACKET3_3D_LOAD_VBPNTR:
1369                 c = ib_chunk->kdata[idx++] & 0x1F;
1370                 track->num_arrays = c;
1371                 for (i = 0; i < (c - 1); i+=2, idx+=3) {
1372                         r = r100_cs_packet_next_reloc(p, &reloc);
1373                         if (r) {
1374                                 DRM_ERROR("No reloc for packet3 %d\n",
1375                                           pkt->opcode);
1376                                 r100_cs_dump_packet(p, pkt);
1377                                 return r;
1378                         }
1379                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1380                         track->arrays[i + 0].robj = reloc->robj;
1381                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1382                         track->arrays[i + 0].esize &= 0x7F;
1383                         r = r100_cs_packet_next_reloc(p, &reloc);
1384                         if (r) {
1385                                 DRM_ERROR("No reloc for packet3 %d\n",
1386                                           pkt->opcode);
1387                                 r100_cs_dump_packet(p, pkt);
1388                                 return r;
1389                         }
1390                         ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
1391                         track->arrays[i + 1].robj = reloc->robj;
1392                         track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24;
1393                         track->arrays[i + 1].esize &= 0x7F;
1394                 }
1395                 if (c & 1) {
1396                         r = r100_cs_packet_next_reloc(p, &reloc);
1397                         if (r) {
1398                                 DRM_ERROR("No reloc for packet3 %d\n",
1399                                           pkt->opcode);
1400                                 r100_cs_dump_packet(p, pkt);
1401                                 return r;
1402                         }
1403                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1404                         track->arrays[i + 0].robj = reloc->robj;
1405                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1406                         track->arrays[i + 0].esize &= 0x7F;
1407                 }
1408                 break;
1409         case PACKET3_INDX_BUFFER:
1410                 r = r100_cs_packet_next_reloc(p, &reloc);
1411                 if (r) {
1412                         DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1413                         r100_cs_dump_packet(p, pkt);
1414                         return r;
1415                 }
1416                 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1417                 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1418                 if (r) {
1419                         return r;
1420                 }
1421                 break;
1422         /* Draw packet */
1423         case PACKET3_3D_DRAW_IMMD:
1424                 /* Number of dwords is vtx_size * (num_vertices - 1)
1425                  * PRIM_WALK must be equal to 3 vertex data in embedded
1426                  * in cmd stream */
1427                 if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) {
1428                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1429                         return -EINVAL;
1430                 }
1431                 track->vap_vf_cntl = ib_chunk->kdata[idx+1];
1432                 track->immd_dwords = pkt->count - 1;
1433                 r = r300_cs_track_check(p->rdev, track);
1434                 if (r) {
1435                         return r;
1436                 }
1437                 break;
1438         case PACKET3_3D_DRAW_IMMD_2:
1439                 /* Number of dwords is vtx_size * (num_vertices - 1)
1440                  * PRIM_WALK must be equal to 3 vertex data in embedded
1441                  * in cmd stream */
1442                 if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) {
1443                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1444                         return -EINVAL;
1445                 }
1446                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1447                 track->immd_dwords = pkt->count;
1448                 r = r300_cs_track_check(p->rdev, track);
1449                 if (r) {
1450                         return r;
1451                 }
1452                 break;
1453         case PACKET3_3D_DRAW_VBUF:
1454                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1455                 r = r300_cs_track_check(p->rdev, track);
1456                 if (r) {
1457                         return r;
1458                 }
1459                 break;
1460         case PACKET3_3D_DRAW_VBUF_2:
1461                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1462                 r = r300_cs_track_check(p->rdev, track);
1463                 if (r) {
1464                         return r;
1465                 }
1466                 break;
1467         case PACKET3_3D_DRAW_INDX:
1468                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1469                 r = r300_cs_track_check(p->rdev, track);
1470                 if (r) {
1471                         return r;
1472                 }
1473                 break;
1474         case PACKET3_3D_DRAW_INDX_2:
1475                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1476                 r = r300_cs_track_check(p->rdev, track);
1477                 if (r) {
1478                         return r;
1479                 }
1480                 break;
1481         case PACKET3_NOP:
1482                 break;
1483         default:
1484                 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1485                 return -EINVAL;
1486         }
1487         return 0;
1488 }
1489
1490 int r300_cs_parse(struct radeon_cs_parser *p)
1491 {
1492         struct radeon_cs_packet pkt;
1493         struct r300_cs_track track;
1494         int r;
1495
1496         r300_cs_track_clear(&track);
1497         p->track = &track;
1498         do {
1499                 r = r100_cs_packet_parse(p, &pkt, p->idx);
1500                 if (r) {
1501                         return r;
1502                 }
1503                 p->idx += pkt.count + 2;
1504                 switch (pkt.type) {
1505                 case PACKET_TYPE0:
1506                         r = r100_cs_parse_packet0(p, &pkt,
1507                                                   p->rdev->config.r300.reg_safe_bm,
1508                                                   p->rdev->config.r300.reg_safe_bm_size,
1509                                                   &r300_packet0_check);
1510                         break;
1511                 case PACKET_TYPE2:
1512                         break;
1513                 case PACKET_TYPE3:
1514                         r = r300_packet3_check(p, &pkt);
1515                         break;
1516                 default:
1517                         DRM_ERROR("Unknown packet type %d !\n", pkt.type);
1518                         return -EINVAL;
1519                 }
1520                 if (r) {
1521                         return r;
1522                 }
1523         } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1524         return 0;
1525 }
1526
1527 int r300_init(struct radeon_device *rdev)
1528 {
1529         rdev->config.r300.reg_safe_bm = r300_reg_safe_bm;
1530         rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(r300_reg_safe_bm);
1531         return 0;
1532 }