1 #include "nv_include.h"
4 /* --------------------------------------------------------------------------- */
5 /* Some documentation of the NVidia DMA command buffers and graphics objects */
6 /* --------------------------------------------------------------------------- */
8 #define HashTableBits 9
9 #define HashTableSize (1 << HashTableBits)
11 /* NVidia uses context objects to drive drawing operations.
13 Context objects can be selected into 8 subchannels in the FIFO,
14 and then used via DMA command buffers.
16 A context object is referenced by a user defined handle (CARD32). The HW looks up graphics
17 objects in a hash table in the instance RAM.
19 An entry in the hash table consists of 2 CARD32. The first CARD32 contains the handle,
20 the second one a bitfield, that contains the address of the object in instance RAM.
22 The format of the second CARD32 seems to be:
26 15: 0 instance_addr >> 4
27 17:16 engine (here uses 1 = graphics)
28 28:24 channel id (here uses 0)
33 15: 0 instance_addr >> 4 (maybe 19-0)
34 21:20 engine (here uses 1 = graphics)
35 I'm unsure about the other bits, but using 0 seems to work.
37 The key into the hash table depends on the object handle and channel id and is given as:
39 static CARD32 hash(CARD32 handle, int chid)
44 for (i = 32; i > 0; i -= HashTableBits) {
45 result ^= (handle & ((1 << HashTableBits) - 1));
46 handle >>= HashTableBits;
48 result ^= chid << (HashTableBits - 4);
53 /* Where is the hash table located:
55 Base address and size can be calculated from this register:
57 ht_base = 0x1000 * GetBitField (pNv->PFIFO[0x0210/4],8:4);
58 ht_size = 0x1000 << GetBitField (pNv->PFIFO[0x0210/4],17:16);
60 and the hash table will be located between address PRAMIN + ht_base and
61 PRAMIN + ht_base + ht_size. Each hash table entry has two longwords.
63 Please note that PRAMIN starts at 0x700000, whereas the drivers
64 PRAMIN pointer starts at 0x710000. Thus we have to subtrace 0x10000
65 from these numbers to get the correct offset relative to the PRAMIN
68 static void initHashTable(NVPtr pNv)
71 const CARD32 offset = 0x10000;
72 /* 4k hash table size at 0x10000, search 128 */
73 pNv->RAMHT = pNv->PRAMIN;
74 pNv->PFIFO[NV_PFIFO_RAMHT/4] = (0x03 << 24) | ((HashTableBits - 9) << 16) | ((offset >> 16) << 4);
75 for (i = 0; i < HashTableSize; ++i)
77 /* our first object in instance RAM can be at 0x718000 */
78 pNv->pramin_free = 0x1200;
82 static CARD32 getObject(NVPtr pNv)
84 CARD32 object = pNv->pramin_free;
85 pNv->pramin_free += pNv->Architecture >= NV_ARCH_40 ? 2 : 1;
90 DMA objects are used to reference a piece of memory in the
91 framebuffer, PCI or AGP address space. Each object is 16 bytes big
95 11:0 class (seems like I can always use 0 here)
96 12 page table present?
98 15:14 access: 0 rw, 1 ro, 2 wo
99 17:16 target: 0 NV memory, 1 NV memory tiled, 2 PCI, 3 AGP
100 31:20 dma adjust (bits 0-11 of the address)
104 1 0 readonly, 1 readwrite
105 31:12 dma frame address (bits 12-31 of the address)
107 Non linear page tables seem to need a list of frame addresses afterwards,
108 the rivatv project has some info on this.
110 The method below creates a DMA object in instance RAM and returns a handle to it
111 that can be used to set up context objects.
113 CARD32 NVDmaCreateDMAObject(NVPtr pNv, int target, CARD32 base_address, CARD32 size, int access)
115 /* adjust: adjusts byte offset in a page */
116 CARD32 frame_address, adjust, object, pramin_offset;
117 if (target == NV_DMA_TARGET_AGP)
118 base_address += pNv->agpPhysical;
120 frame_address = base_address & ~0xfff;
121 adjust = base_address & 0xfff;
122 NVDEBUG("NVDmaCreateDMAObject: target = %d, base=%x fram=%x adjust=%x\n", target, base_address, frame_address, adjust);
124 /* we take the next empty spot in instance RAM and write our DMA object to it */
125 object = getObject(pNv);
127 pramin_offset = (object - 0x1000) << 2;
129 pNv->PRAMIN[pramin_offset] = (1<<12)|(1<<13)|(adjust<<20)|(access<<14)|(target<<16);
130 pNv->PRAMIN[pramin_offset+1] = size - 1;
131 pNv->PRAMIN[pramin_offset+2] = frame_address | ((access != NV_DMA_ACCES_RO) ? (1<<1) : 0);
132 pNv->PRAMIN[pramin_offset+3] = 0xffffffff;
134 pNv->Architecture >= NV_ARCH_40
135 ? 0 : ((access != NV_DMA_ACCES_RO) ? (1<<1) : 0);
142 A DMA notifier is a DMA object that references a small (32 byte it
143 seems, we use 256 for saftey) memory area that will be used by the HW to give feedback
144 about a DMA operation.
146 CARD32 NVDmaCreateNotifier(NVPtr pNv, int target, CARD32 base_address)
148 return NVDmaCreateDMAObject(pNv, target, base_address, 0x100, NV_DMA_ACCES_RW);
152 How do we wait for DMA completion (by notifiers) ?
154 Either repeatedly read the notifier address and wait until it changes,
155 or enable a 'wakeup' interrupt by writing NOTIFY_WRITE_LE_AWAKEN into
156 the 'notify' field of the object in the channel. My guess is that
157 this causes an interrupt in PGRAPH/NOTIFY as soon as the transfer is
158 completed. Clients probably can use poll on the nv* devices to get this
159 event. All this is a guess. I don't know any details, and I have not
160 tested is. Also, I have no idea how the 'nvdriver' reacts if it gets
161 notify events that are not registered.
163 Writing NV_NOTIFY_WRITE_LE_AWAKEN into the 'Notify' field of an object
164 in a channel really causes an interrupt in the PGRAPH engine. Thus
165 we can determine whether a DMA transfer has finished in the interrupt
168 We can't use interrupts in user land, so we do the simple polling approach.
169 The method returns FALSE in case of an error.
171 Bool NVDmaWaitForNotifier(NVPtr pNv, int target, CARD32 base_address)
174 unsigned char *notifier = (target == NV_DMA_TARGET_AGP)
177 notifier += base_address;
178 n = (volatile U032 *)notifier;
179 NVDEBUG("NVDmaWaitForNotifier @%p", n);
185 NVDEBUG("status: n[0]=%x, n[1]=%x, n[2]=%x, n[3]=%x\n", a, b, c, status);
186 if (status == 0xffffffff)
196 /* Context objects in the instance RAM have the following structure. On NV40 they are 32 byte long,
197 on NV30 and smaller 16 bytes.
206 17:15 patch config: scrcopy_and, rop_and, blend_and, scrcopy, srccopy_pre, blend_pre
207 18 synchronize enable
208 19 endian: 1 big, 0 little
210 23 single step enable
211 24 patch status: 0 invalid, 1 valid
212 25 context_surface 0: 1 valid
213 26 context surface 1: 1 valid
214 27 context pattern: 1 valid
215 28 context rop: 1 valid
216 29,30 context beta, beta4
220 31:16 notify instance address
222 15:0 dma 0 instance address
223 31:16 dma 1 instance address
228 No idea what the exact format is. Here's what can be deducted:
231 11:0 class (maybe uses more bits here?)
234 25 patch status valid ?
236 15:0 DMA notifier (maybe 20:0)
238 15:0 DMA 0 instance (maybe 20:0)
241 15:0 DMA 1 instance (maybe 20:0)
246 void NVDmaCreateContextObject(NVPtr pNv, int handle, int class, CARD32 flags,
247 CARD32 dma_in, CARD32 dma_out, CARD32 dma_notifier)
249 CARD32 pramin_offset;
250 CARD32 object = getObject(pNv);
251 pramin_offset = (object - 0x1000) << 2;
252 NVDEBUG("NVDmaCreateContextObject: storing object at %x\n", pramin_offset);
254 if (pNv->Architecture >= NV_ARCH_40) {
255 CARD32 nv_flags0 = 0;
256 CARD32 nv_flags1 = 0;
257 CARD32 nv_flags2 = 0;
258 if (flags & NV_DMA_CONTEXT_FLAGS_PATCH_ROP_AND)
259 nv_flags0 |= 0x02080000;
260 else if (flags & NV_DMA_CONTEXT_FLAGS_PATCH_SRCCOPY)
261 nv_flags0 |= 0x02080000;
262 if (flags & NV_DMA_CONTEXT_FLAGS_CLIP_ENABLE)
263 nv_flags0 |= 0x00020000;
264 #if X_BYTE_ORDER == X_BIG_ENDIAN
265 if (flags & NV_DMA_CONTEXT_FLAGS_MONO)
266 nv_flags1 |= 0x01000000;
267 nv_flags2 |= 0x01000000;
269 if (flags & NV_DMA_CONTEXT_FLAGS_MONO)
270 nv_flags1 |= 0x02000000;
272 pNv->PRAMIN[pramin_offset] = class | nv_flags0;
273 pNv->PRAMIN[pramin_offset+1] = dma_notifier | nv_flags1;
274 pNv->PRAMIN[pramin_offset+2] = dma_in | nv_flags2;
275 pNv->PRAMIN[pramin_offset+3] = dma_out;
276 pNv->PRAMIN[pramin_offset+4] = 0;
277 pNv->PRAMIN[pramin_offset+5] = 0;
278 pNv->PRAMIN[pramin_offset+6] = 0;
279 pNv->PRAMIN[pramin_offset+7] = 0;
282 CARD32 nv_flags0 = 0;
283 CARD32 nv_flags1 = 0;
284 if (flags & NV_DMA_CONTEXT_FLAGS_PATCH_ROP_AND)
285 nv_flags0 |= 0x01008000;
286 else if (flags & NV_DMA_CONTEXT_FLAGS_PATCH_SRCCOPY)
287 nv_flags0 |= 0x01018000;
288 if (flags & NV_DMA_CONTEXT_FLAGS_CLIP_ENABLE)
289 nv_flags0 |= 0x00002000;
290 #if X_BYTE_ORDER == X_BIG_ENDIAN
291 nv_flags0 |= 0x00080000;
292 if (flags & NV_DMA_CONTEXT_FLAGS_MONO)
293 nv_flags1 |= 0x00000001;
295 if (flags & NV_DMA_CONTEXT_FLAGS_MONO)
296 nv_flags1 |= 0x00000002;
298 pNv->PRAMIN[pramin_offset] = class | nv_flags0;
299 pNv->PRAMIN[pramin_offset+1] = (dma_notifier << 16) | nv_flags1;
300 pNv->PRAMIN[pramin_offset+2] = dma_in | (dma_out << 16);
301 pNv->PRAMIN[pramin_offset+3] = 0;
305 /* insert the created object into the hash table */
306 CARD32 h = hash(handle, 0);
307 NVDEBUG("storing object %x at hash table offset %d\n", handle, h);
308 while (pNv->RAMHT[h]) {
310 if (h == HashTableSize)
313 pNv->RAMHT[h] = handle;
314 if (pNv->Architecture >= NV_ARCH_40) {
315 pNv->RAMHT[h+1] = (1<<20) | object;
317 pNv->RAMHT[h+1] = (1<<31) | (1<<16) | object;
323 /* Below is the basic structure of DMA command buffers */
324 #define NV_FIFO_DMA_OPCODE ( 0*32+31):( 0*32+29) /* ...VF */
325 #define NV_FIFO_DMA_OPCODE_METHOD 0x00000000 /* ...-V */
326 #define NV_FIFO_DMA_OPCODE_JUMP 0x00000001 /* ...-V */
327 #define NV_FIFO_DMA_OPCODE_NONINC_METHOD 0x00000002 /* ...-V */
328 #define NV_FIFO_DMA_OPCODE_CALL 0x00000003 /* ...-V */
329 #define NV_FIFO_DMA_METHOD_COUNT ( 0*32+28):( 0*32+18) /* ...VF */
330 #define NV_FIFO_DMA_METHOD_SUBCHANNEL ( 0*32+15):( 0*32+13) /* ...VF */
331 #define NV_FIFO_DMA_METHOD_ADDRESS ( 0*32+12):( 0*32+ 2) /* ...VF */
332 #define NV_FIFO_DMA_DATA ( 1*32+31):( 1*32+ 0) /* ...VF */
333 #define NV_FIFO_DMA_NOP 0x00000000 /* ...-V */
334 #define NV_FIFO_DMA_OPCODE ( 0*32+31):( 0*32+29) /* ...VF */
335 #define NV_FIFO_DMA_OPCODE_JUMP 0x00000001 /* ...-V */
336 #define NV_FIFO_DMA_JUMP_OFFSET 28:2 /* ...VF */
337 #define NV_FIFO_DMA_OPCODE ( 0*32+31):( 0*32+29) /* ...VF */
338 #define NV_FIFO_DMA_OPCODE_CALL 0x00000003 /* ...-V */
339 #define NV_FIFO_DMA_CALL_OFFSET 28:2 /* ...VF */
340 #define NV_FIFO_DMA_RETURN 0x00020000 /* ...-V */
344 void NVInitDma(ScrnInfoPtr pScrn)
346 NVPtr pNv = NVPTR(pScrn);
354 xf86DrvMsg(pScrn->scrnIndex, X_INFO,"In NVInitDma\n");
355 NVDEBUG("\nNVInitDma!\n");
357 for (i = ((pNv->pramin_free - 0x1000) << 2); i < 0x1000; ++i)
360 dma_fb = NVDmaCreateDMAObject(pNv, NV_DMA_TARGET_VIDMEM, 0, pNv->FbMapSize, NV_DMA_ACCES_RW);
362 NVDmaCreateContextObject (pNv, NvContextSurfaces,
363 (pNv->Architecture >= NV_ARCH_10) ? NV10_CONTEXT_SURFACES_2D : NV4_SURFACE,
364 NV_DMA_CONTEXT_FLAGS_PATCH_ROP_AND,
366 NVDmaCreateContextObject (pNv, NvRop,
368 NV_DMA_CONTEXT_FLAGS_PATCH_ROP_AND,
370 NVDmaCreateContextObject (pNv, NvImagePattern,
372 NV_DMA_CONTEXT_FLAGS_PATCH_ROP_AND|NV_DMA_CONTEXT_FLAGS_MONO,
374 NVDmaCreateContextObject (pNv, NvClipRectangle,
375 NV_IMAGE_BLACK_RECTANGLE,
376 NV_DMA_CONTEXT_FLAGS_PATCH_ROP_AND,
378 NVDmaCreateContextObject (pNv, NvSolidLine,
379 NV4_RENDER_SOLID_LIN,
380 NV_DMA_CONTEXT_FLAGS_PATCH_ROP_AND|NV_DMA_CONTEXT_FLAGS_CLIP_ENABLE,
382 NVDmaCreateContextObject (pNv, NvImageBlit,
383 pNv->WaitVSyncPossible ? NV12_IMAGE_BLIT : NV_IMAGE_BLIT,
384 NV_DMA_CONTEXT_FLAGS_PATCH_ROP_AND,
386 NVDmaCreateContextObject (pNv, NvRectangle,
387 NV4_GDI_RECTANGLE_TEXT,
388 NV_DMA_CONTEXT_FLAGS_PATCH_ROP_AND|NV_DMA_CONTEXT_FLAGS_MONO,
390 NVDmaCreateContextObject (pNv, NvScaledImage,
391 NV_SCALED_IMAGE_FROM_MEMORY,
392 NV_DMA_CONTEXT_FLAGS_PATCH_SRCCOPY,
396 if (NVDRIScreenInit(pScrn) && NVInitAGP(pScrn) && pNv->agpMemory) {
397 dma_agp = NVDmaCreateDMAObject(pNv, NV_DMA_TARGET_AGP, 0x10000, pNv->agpSize - 0x10000,
399 dma_notifier = NVDmaCreateNotifier(pNv, NV_DMA_TARGET_AGP, 0);
401 NVDmaCreateContextObject (pNv, NvGraphicsToAGP,
402 NV_MEMORY_TO_MEMORY_FORMAT,
404 dma_fb, dma_agp, dma_notifier);
406 NVDmaCreateContextObject (pNv, NvAGPToGraphics,
407 NV_MEMORY_TO_MEMORY_FORMAT,
409 dma_agp, dma_fb, dma_notifier);
416 ErrorF("Hash table:\n");
417 for (i = 0; i < HashTableSize; i += 2)
418 ErrorF(" %x %x\n", pNv->RAMHT[i], pNv->RAMHT[i+1]);
419 ErrorF("Context/DMA objects:\n");
420 for (i = 0x800; i < 0x900; i += 8) {
421 ErrorF("%x: %x %x %x %x\n", i, /*(i*4 + 0x10000)/16, */
422 pNv->RAMHT[i], pNv->RAMHT[i+1], pNv->RAMHT[i+2], pNv->RAMHT[i+3]);
423 ErrorF(" %x %x %x %x\n",
424 pNv->RAMHT[i+4], pNv->RAMHT[i+5], pNv->RAMHT[i+6], pNv->RAMHT[i+7]);