1 /***************************************************************************\
3 |* Copyright 1993-2003 NVIDIA, Corporation. All rights reserved. *|
5 |* NOTICE TO USER: The source code is copyrighted under U.S. and *|
6 |* international laws. Users and possessors of this source code are *|
7 |* hereby granted a nonexclusive, royalty-free copyright license to *|
8 |* use this code in individual and commercial software. *|
10 |* Any use of this source code must include, in the user documenta- *|
11 |* tion and internal comments to the code, notices to the end user *|
14 |* Copyright 1993-2003 NVIDIA, Corporation. All rights reserved. *|
16 |* NVIDIA, CORPORATION MAKES NO REPRESENTATION ABOUT THE SUITABILITY *|
17 |* OF THIS SOURCE CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" *|
18 |* WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. NVIDIA, CORPOR- *|
19 |* ATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOURCE CODE, *|
20 |* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGE- *|
21 |* MENT, AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL *|
22 |* NVIDIA, CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT, INCI- *|
23 |* DENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RE- *|
24 |* SULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION *|
25 |* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF *|
26 |* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOURCE CODE. *|
28 |* U.S. Government End Users. This source code is a "commercial *|
29 |* item," as that term is defined at 48 C.F.R. 2.101 (OCT 1995), *|
30 |* consisting of "commercial computer software" and "commercial *|
31 |* computer software documentation," as such terms are used in *|
32 |* 48 C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Govern- *|
33 |* ment only as a commercial end item. Consistent with 48 C.F.R. *|
34 |* 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), *|
35 |* all U.S. Government End Users acquire the source code with only *|
36 |* those rights set forth herein. *|
38 \***************************************************************************/
39 /* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/nv/nv_hw.c,v 1.21 2006/06/16 00:19:33 mvojkovi Exp $ */
41 #include "nv_include.h"
45 uint8_t nvReadVGA(NVPtr pNv, uint8_t index)
47 volatile const uint8_t *ptr = pNv->cur_head ? pNv->PCIO1 : pNv->PCIO0;
48 VGA_WR08(ptr, 0x03D4, index);
49 return VGA_RD08(ptr, 0x03D5);
52 void nvWriteVGA(NVPtr pNv, uint8_t index, uint8_t data)
54 volatile const uint8_t *ptr = pNv->cur_head ? pNv->PCIO1 : pNv->PCIO0;
55 VGA_WR08(ptr, 0x03D4, index);
56 VGA_WR08(ptr, 0x03D5, data);
59 CARD32 nvReadRAMDAC(NVPtr pNv, uint8_t head, uint32_t ramdac_reg)
61 volatile const void *ptr = head ? pNv->PRAMDAC1 : pNv->PRAMDAC0;
62 return MMIO_IN32(ptr, ramdac_reg);
65 void nvWriteRAMDAC(NVPtr pNv, uint8_t head, uint32_t ramdac_reg, CARD32 val)
67 volatile const void *ptr = head ? pNv->PRAMDAC1 : pNv->PRAMDAC0;
68 NV_WR32(ptr, ramdac_reg, val);
71 CARD32 nvReadCRTC(NVPtr pNv, uint8_t head, uint32_t reg)
73 volatile const void *ptr = head ? pNv->PCRTC1 : pNv->PCRTC0;
74 return MMIO_IN32(ptr, reg);
77 void nvWriteCRTC(NVPtr pNv, uint8_t head, uint32_t reg, CARD32 val)
79 volatile const void *ptr = head ? pNv->PCRTC1 : pNv->PCRTC0;
80 NV_WR32(ptr, reg, val);
83 /****************************************************************************\
85 * The video arbitration routines calculate some "magic" numbers. Fixes *
86 * the snow seen when accessing the framebuffer without it. *
87 * It just works (I hope). *
89 \****************************************************************************/
94 int graphics_burst_size;
116 int graphics_burst_size;
117 int video_burst_size;
137 static void nvGetClocks(NVPtr pNv, unsigned int *MClk, unsigned int *NVClk)
139 unsigned int pll, N, M, MB, NB, P;
141 if(pNv->Architecture >= NV_ARCH_40) {
142 pll = nvReadMC(pNv, 0x4020);
143 P = (pll >> 16) & 0x07;
144 pll = nvReadMC(pNv, 0x4024);
146 N = (pll >> 8) & 0xFF;
147 if(((pNv->Chipset & 0xfff0) == CHIPSET_G71) ||
148 ((pNv->Chipset & 0xfff0) == CHIPSET_G73))
153 MB = (pll >> 16) & 0xFF;
154 NB = (pll >> 24) & 0xFF;
156 *MClk = ((N * NB * pNv->CrystalFreqKHz) / (M * MB)) >> P;
158 pll = nvReadMC(pNv, 0x4000);
159 P = (pll >> 16) & 0x07;
160 pll = nvReadMC(pNv, 0x4004);
162 N = (pll >> 8) & 0xFF;
163 MB = (pll >> 16) & 0xFF;
164 NB = (pll >> 24) & 0xFF;
166 *NVClk = ((N * NB * pNv->CrystalFreqKHz) / (M * MB)) >> P;
168 if(pNv->twoStagePLL) {
169 pll = nvReadRAMDAC0(pNv, NV_RAMDAC_MPLL);
171 N = (pll >> 8) & 0xFF;
172 P = (pll >> 16) & 0x0F;
173 pll = nvReadRAMDAC0(pNv, NV_RAMDAC_MPLL_B);
174 if(pll & 0x80000000) {
176 NB = (pll >> 8) & 0xFF;
181 *MClk = ((N * NB * pNv->CrystalFreqKHz) / (M * MB)) >> P;
183 pll = nvReadRAMDAC0(pNv, NV_RAMDAC_NVPLL);
185 N = (pll >> 8) & 0xFF;
186 P = (pll >> 16) & 0x0F;
187 pll = nvReadRAMDAC0(pNv, NV_RAMDAC_NVPLL_B);
188 if(pll & 0x80000000) {
190 NB = (pll >> 8) & 0xFF;
195 *NVClk = ((N * NB * pNv->CrystalFreqKHz) / (M * MB)) >> P;
197 if(((pNv->Chipset & 0x0ff0) == CHIPSET_NV30) ||
198 ((pNv->Chipset & 0x0ff0) == CHIPSET_NV35))
200 pll = nvReadRAMDAC0(pNv, NV_RAMDAC_MPLL);
202 N = (pll >> 8) & 0xFF;
203 P = (pll >> 16) & 0x07;
204 if(pll & 0x00000080) {
205 MB = (pll >> 4) & 0x07;
206 NB = (pll >> 19) & 0x1f;
211 *MClk = ((N * NB * pNv->CrystalFreqKHz) / (M * MB)) >> P;
213 pll = nvReadRAMDAC0(pNv, NV_RAMDAC_NVPLL);
215 N = (pll >> 8) & 0xFF;
216 P = (pll >> 16) & 0x07;
217 if(pll & 0x00000080) {
218 MB = (pll >> 4) & 0x07;
219 NB = (pll >> 19) & 0x1f;
224 *NVClk = ((N * NB * pNv->CrystalFreqKHz) / (M * MB)) >> P;
226 pll = nvReadRAMDAC0(pNv, NV_RAMDAC_MPLL);
228 N = (pll >> 8) & 0xFF;
229 P = (pll >> 16) & 0x0F;
230 *MClk = (N * pNv->CrystalFreqKHz / M) >> P;
232 pll = nvReadRAMDAC0(pNv, NV_RAMDAC_NVPLL);
234 N = (pll >> 8) & 0xFF;
235 P = (pll >> 16) & 0x0F;
236 *NVClk = (N * pNv->CrystalFreqKHz / M) >> P;
240 ErrorF("NVClock = %i MHz, MEMClock = %i MHz\n", *NVClk/1000, *MClk/1000);
245 void nv4CalcArbitration (
250 int data, pagemiss, cas,width, video_enable, bpp;
251 int nvclks, mclks, pclks, vpagemiss, crtpagemiss, vbs;
252 int found, mclk_extra, mclk_loop, cbs, m1, p1;
253 int mclk_freq, pclk_freq, nvclk_freq, mp_enable;
254 int us_m, us_n, us_p, video_drain_rate, crtc_drain_rate;
255 int vpm_us, us_video, vlwm, video_fill_us, cpm_us, us_crt,clwm;
258 pclk_freq = arb->pclk_khz;
259 mclk_freq = arb->mclk_khz;
260 nvclk_freq = arb->nvclk_khz;
261 pagemiss = arb->mem_page_miss;
262 cas = arb->mem_latency;
263 width = arb->memory_width >> 6;
264 video_enable = arb->enable_video;
266 mp_enable = arb->enable_mp;
297 mclk_loop = mclks+mclk_extra;
298 us_m = mclk_loop *1000*1000 / mclk_freq;
299 us_n = nvclks*1000*1000 / nvclk_freq;
300 us_p = nvclks*1000*1000 / pclk_freq;
303 video_drain_rate = pclk_freq * 2;
304 crtc_drain_rate = pclk_freq * bpp/8;
308 vpm_us = (vpagemiss * pagemiss)*1000*1000/mclk_freq;
309 if (nvclk_freq * 2 > mclk_freq * width)
310 video_fill_us = cbs*1000*1000 / 16 / nvclk_freq ;
312 video_fill_us = cbs*1000*1000 / (8 * width) / mclk_freq;
313 us_video = vpm_us + us_m + us_n + us_p + video_fill_us;
314 vlwm = us_video * video_drain_rate/(1000*1000);
317 if (vlwm > 128) vbs = 64;
318 if (vlwm > (256-64)) vbs = 32;
319 if (nvclk_freq * 2 > mclk_freq * width)
320 video_fill_us = vbs *1000*1000/ 16 / nvclk_freq ;
322 video_fill_us = vbs*1000*1000 / (8 * width) / mclk_freq;
323 cpm_us = crtpagemiss * pagemiss *1000*1000/ mclk_freq;
330 clwm = us_crt * crtc_drain_rate/(1000*1000);
335 crtc_drain_rate = pclk_freq * bpp/8;
338 cpm_us = crtpagemiss * pagemiss *1000*1000/ mclk_freq;
339 us_crt = cpm_us + us_m + us_n + us_p ;
340 clwm = us_crt * crtc_drain_rate/(1000*1000);
343 m1 = clwm + cbs - 512;
344 p1 = m1 * pclk_freq / mclk_freq;
346 if ((p1 < m1) && (m1 > 0))
350 if (mclk_extra ==0) found = 1;
353 else if (video_enable)
355 if ((clwm > 511) || (vlwm > 255))
359 if (mclk_extra ==0) found = 1;
369 if (mclk_extra ==0) found = 1;
373 if (clwm < 384) clwm = 384;
374 if (vlwm < 128) vlwm = 128;
376 fifo->graphics_lwm = data;
377 fifo->graphics_burst_size = 128;
378 data = (int)((vlwm+15));
379 fifo->video_lwm = data;
380 fifo->video_burst_size = vbs;
384 void nv4UpdateArbitrationSettings (
392 nv4_fifo_info fifo_data;
393 nv4_sim_state sim_data;
394 unsigned int MClk, NVClk, cfg1;
396 nvGetClocks(pNv, &MClk, &NVClk);
398 cfg1 = nvReadFB(pNv, NV_PFB_CFG1);
399 sim_data.pix_bpp = (char)pixelDepth;
400 sim_data.enable_video = 0;
401 sim_data.enable_mp = 0;
402 sim_data.memory_width = (nvReadEXTDEV(pNv, 0x0000) & 0x10) ? 128 : 64;
403 sim_data.mem_latency = (char)cfg1 & 0x0F;
404 sim_data.mem_aligned = 1;
405 sim_data.mem_page_miss = (char)(((cfg1 >> 4) &0x0F) + ((cfg1 >> 31) & 0x01));
406 sim_data.gr_during_vid = 0;
407 sim_data.pclk_khz = VClk;
408 sim_data.mclk_khz = MClk;
409 sim_data.nvclk_khz = NVClk;
410 nv4CalcArbitration(&fifo_data, &sim_data);
413 int b = fifo_data.graphics_burst_size >> 4;
415 while (b >>= 1) (*burst)++;
416 *lwm = fifo_data.graphics_lwm >> 3;
420 void nv10CalcArbitration (
421 nv10_fifo_info *fifo,
425 int data, pagemiss, width, video_enable, bpp;
426 int nvclks, mclks, pclks, vpagemiss, crtpagemiss;
428 int found, mclk_extra, mclk_loop, cbs, m1;
429 int mclk_freq, pclk_freq, nvclk_freq, mp_enable;
430 int us_m, us_m_min, us_n, us_p, crtc_drain_rate;
432 int vpm_us, us_video, cpm_us, us_crt,clwm;
434 int m2us, us_pipe_min, p1clk, p2;
436 int us_min_mclk_extra;
439 pclk_freq = arb->pclk_khz; /* freq in KHz */
440 mclk_freq = arb->mclk_khz;
441 nvclk_freq = arb->nvclk_khz;
442 pagemiss = arb->mem_page_miss;
443 width = arb->memory_width/64;
444 video_enable = arb->enable_video;
446 mp_enable = arb->enable_mp;
451 pclks = 4; /* lwm detect. */
453 nvclks = 3; /* lwm -> sync. */
454 nvclks += 2; /* fbi bus cycles (1 req + 1 busy) */
456 mclks = 1; /* 2 edge sync. may be very close to edge so just put one. */
458 mclks += 1; /* arb_hp_req */
459 mclks += 5; /* ap_hp_req tiling pipeline */
461 mclks += 2; /* tc_req latency fifo */
462 mclks += 2; /* fb_cas_n_ memory request to fbio block */
463 mclks += 7; /* sm_d_rdv data returned from fbio block */
465 /* fb.rd.d.Put_gc need to accumulate 256 bits for read */
466 if (arb->memory_type == 0)
467 if (arb->memory_width == 64) /* 64 bit bus */
472 if (arb->memory_width == 64) /* 64 bit bus */
477 if ((!video_enable) && (arb->memory_width == 128))
479 mclk_extra = (bpp == 32) ? 31 : 42; /* Margin of error */
484 mclk_extra = (bpp == 32) ? 8 : 4; /* Margin of error */
485 /* mclk_extra = 4; */ /* Margin of error */
489 nvclks += 1; /* 2 edge sync. may be very close to edge so just put one. */
490 nvclks += 1; /* fbi_d_rdv_n */
491 nvclks += 1; /* Fbi_d_rdata */
492 nvclks += 1; /* crtfifo load */
495 mclks+=4; /* Mp can get in with a burst of 8. */
496 /* Extra clocks determined by heuristics */
504 mclk_loop = mclks+mclk_extra;
505 us_m = mclk_loop *1000*1000 / mclk_freq; /* Mclk latency in us */
506 us_m_min = mclks * 1000*1000 / mclk_freq; /* Minimum Mclk latency in us */
507 us_min_mclk_extra = min_mclk_extra *1000*1000 / mclk_freq;
508 us_n = nvclks*1000*1000 / nvclk_freq;/* nvclk latency in us */
509 us_p = pclks*1000*1000 / pclk_freq;/* nvclk latency in us */
510 us_pipe_min = us_m_min + us_n + us_p;
512 vus_m = mclk_loop *1000*1000 / mclk_freq; /* Mclk latency in us */
515 crtc_drain_rate = pclk_freq * bpp/8; /* MB/s */
517 vpagemiss = 1; /* self generating page miss */
518 vpagemiss += 1; /* One higher priority before */
520 crtpagemiss = 2; /* self generating page miss */
522 crtpagemiss += 1; /* if MA0 conflict */
524 vpm_us = (vpagemiss * pagemiss)*1000*1000/mclk_freq;
526 us_video = vpm_us + vus_m; /* Video has separate read return path */
528 cpm_us = crtpagemiss * pagemiss *1000*1000/ mclk_freq;
530 us_video /* Wait for video */
531 +cpm_us /* CRT Page miss */
532 +us_m + us_n +us_p /* other latency */
535 clwm = us_crt * crtc_drain_rate/(1000*1000);
536 clwm++; /* fixed point <= float_point - 1. Fixes that */
538 crtc_drain_rate = pclk_freq * bpp/8; /* bpp * pclk/8 */
540 crtpagemiss = 1; /* self generating page miss */
541 crtpagemiss += 1; /* MA0 page miss */
543 crtpagemiss += 1; /* if MA0 conflict */
544 cpm_us = crtpagemiss * pagemiss *1000*1000/ mclk_freq;
545 us_crt = cpm_us + us_m + us_n + us_p ;
546 clwm = us_crt * crtc_drain_rate/(1000*1000);
547 clwm++; /* fixed point <= float_point - 1. Fixes that */
549 /* Finally, a heuristic check when width == 64 bits */
551 nvclk_fill = nvclk_freq * 8;
552 if(crtc_drain_rate * 100 >= nvclk_fill * 102)
553 clwm = 0xfff; /*Large number to fail */
555 else if(crtc_drain_rate * 100 >= nvclk_fill * 98) {
568 clwm_rnd_down = ((int)clwm/8)*8;
569 if (clwm_rnd_down < clwm)
572 m1 = clwm + cbs - 1024; /* Amount of overfill */
573 m2us = us_pipe_min + us_min_mclk_extra;
575 /* pclk cycles to drain */
576 p1clk = m2us * pclk_freq/(1000*1000);
577 p2 = p1clk * bpp / 8; /* bytes drained. */
579 if((p2 < m1) && (m1 > 0)) {
582 if(min_mclk_extra == 0) {
584 found = 1; /* Can't adjust anymore! */
586 cbs = cbs/2; /* reduce the burst size */
592 if (clwm > 1023){ /* Have some margin */
595 if(min_mclk_extra == 0)
596 found = 1; /* Can't adjust anymore! */
602 if(clwm < (1024-cbs+8)) clwm = 1024-cbs+8;
604 /* printf("CRT LWM: %f bytes, prog: 0x%x, bs: 256\n", clwm, data ); */
605 fifo->graphics_lwm = data; fifo->graphics_burst_size = cbs;
607 fifo->video_lwm = 1024; fifo->video_burst_size = 512;
611 void nv10UpdateArbitrationSettings (
619 nv10_fifo_info fifo_data;
620 nv10_sim_state sim_data;
621 unsigned int MClk, NVClk, cfg1;
623 nvGetClocks(pNv, &MClk, &NVClk);
625 cfg1 = nvReadFB(pNv, NV_PFB_CFG1);
626 sim_data.pix_bpp = (char)pixelDepth;
627 sim_data.enable_video = 1;
628 sim_data.enable_mp = 0;
629 sim_data.memory_type = (nvReadFB(pNv, NV_PFB_CFG0) & 0x01) ? 1 : 0;
630 sim_data.memory_width = (nvReadEXTDEV(pNv, 0x0000) & 0x10) ? 128 : 64;
631 sim_data.mem_latency = (char)cfg1 & 0x0F;
632 sim_data.mem_aligned = 1;
633 sim_data.mem_page_miss = (char)(((cfg1>>4) &0x0F) + ((cfg1>>31) & 0x01));
634 sim_data.gr_during_vid = 0;
635 sim_data.pclk_khz = VClk;
636 sim_data.mclk_khz = MClk;
637 sim_data.nvclk_khz = NVClk;
638 nv10CalcArbitration(&fifo_data, &sim_data);
639 if (fifo_data.valid) {
640 int b = fifo_data.graphics_burst_size >> 4;
642 while (b >>= 1) (*burst)++;
643 *lwm = fifo_data.graphics_lwm >> 3;
648 void nv30UpdateArbitrationSettings (NVPtr pNv,
652 unsigned int MClk, NVClk;
653 unsigned int fifo_size, burst_size, graphics_lwm;
657 graphics_lwm = fifo_size - burst_size;
659 nvGetClocks(pNv, &MClk, &NVClk);
663 while(burst_size >>= 1) (*burst)++;
664 *lwm = graphics_lwm >> 3;
667 void nForceUpdateArbitrationSettings (unsigned VClk,
674 nv10_fifo_info fifo_data;
675 nv10_sim_state sim_data;
676 unsigned int M, N, P, pll, MClk, NVClk, memctrl;
678 if((pNv->Chipset & 0x0FF0) == CHIPSET_NFORCE) {
679 unsigned int uMClkPostDiv;
681 uMClkPostDiv = (pciReadLong(pciTag(0, 0, 3), 0x6C) >> 8) & 0xf;
682 if(!uMClkPostDiv) uMClkPostDiv = 4;
683 MClk = 400000 / uMClkPostDiv;
685 MClk = pciReadLong(pciTag(0, 0, 5), 0x4C) / 1000;
688 pll = nvReadRAMDAC0(pNv, NV_RAMDAC_NVPLL);
689 M = (pll >> 0) & 0xFF; N = (pll >> 8) & 0xFF; P = (pll >> 16) & 0x0F;
690 NVClk = (N * pNv->CrystalFreqKHz / M) >> P;
691 sim_data.pix_bpp = (char)pixelDepth;
692 sim_data.enable_video = 0;
693 sim_data.enable_mp = 0;
694 sim_data.memory_type = (pciReadLong(pciTag(0, 0, 1), 0x7C) >> 12) & 1;
695 sim_data.memory_width = 64;
697 memctrl = pciReadLong(pciTag(0, 0, 3), 0x00) >> 16;
699 if((memctrl == 0x1A9) || (memctrl == 0x1AB) || (memctrl == 0x1ED)) {
702 dimm[0] = (pciReadLong(pciTag(0, 0, 2), 0x40) >> 8) & 0x4F;
703 dimm[1] = (pciReadLong(pciTag(0, 0, 2), 0x44) >> 8) & 0x4F;
704 dimm[2] = (pciReadLong(pciTag(0, 0, 2), 0x48) >> 8) & 0x4F;
706 if((dimm[0] + dimm[1]) != dimm[2]) {
708 "your nForce DIMMs are not arranged in optimal banks!\n");
712 sim_data.mem_latency = 3;
713 sim_data.mem_aligned = 1;
714 sim_data.mem_page_miss = 10;
715 sim_data.gr_during_vid = 0;
716 sim_data.pclk_khz = VClk;
717 sim_data.mclk_khz = MClk;
718 sim_data.nvclk_khz = NVClk;
719 nv10CalcArbitration(&fifo_data, &sim_data);
722 int b = fifo_data.graphics_burst_size >> 4;
724 while (b >>= 1) (*burst)++;
725 *lwm = fifo_data.graphics_lwm >> 3;