Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
[linux-2.6] / arch / mips / mm / pg-sb1.c
1 /*
2  * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
3  * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org)
4  * Copyright (C) 2000 SiByte, Inc.
5  * Copyright (C) 2005 Thiemo Seufer
6  *
7  * Written by Justin Carlson of SiByte, Inc.
8  *         and Kip Walker of Broadcom Corp.
9  *
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU General Public License
13  * as published by the Free Software Foundation; either version 2
14  * of the License, or (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
24  */
25 #include <linux/module.h>
26 #include <linux/sched.h>
27 #include <linux/smp.h>
28
29 #include <asm/io.h>
30 #include <asm/sibyte/sb1250.h>
31 #include <asm/sibyte/sb1250_regs.h>
32 #include <asm/sibyte/sb1250_dma.h>
33
34 #ifdef CONFIG_SB1_PASS_1_WORKAROUNDS
35 #define SB1_PREF_LOAD_STREAMED_HINT "0"
36 #define SB1_PREF_STORE_STREAMED_HINT "1"
37 #else
38 #define SB1_PREF_LOAD_STREAMED_HINT "4"
39 #define SB1_PREF_STORE_STREAMED_HINT "5"
40 #endif
41
42 static inline void clear_page_cpu(void *page)
43 {
44         unsigned char *addr = (unsigned char *) page;
45         unsigned char *end = addr + PAGE_SIZE;
46
47         /*
48          * JDCXXX - This should be bottlenecked by the write buffer, but these
49          * things tend to be mildly unpredictable...should check this on the
50          * performance model
51          *
52          * We prefetch 4 lines ahead.  We're also "cheating" slightly here...
53          * since we know we're on an SB1, we force the assembler to take
54          * 64-bit operands to speed things up
55          */
56         __asm__ __volatile__(
57         "       .set    push            \n"
58         "       .set    mips4           \n"
59         "       .set    noreorder       \n"
60 #ifdef CONFIG_CPU_HAS_PREFETCH
61         "       daddiu  %0, %0, 128     \n"
62         "       pref    " SB1_PREF_STORE_STREAMED_HINT ", -128(%0)  \n"
63                                              /* Prefetch the first 4 lines */
64         "       pref    " SB1_PREF_STORE_STREAMED_HINT ",  -96(%0)  \n"
65         "       pref    " SB1_PREF_STORE_STREAMED_HINT ",  -64(%0)  \n"
66         "       pref    " SB1_PREF_STORE_STREAMED_HINT ",  -32(%0)  \n"
67         "1:     sd      $0, -128(%0)    \n"  /* Throw out a cacheline of 0's */
68         "       sd      $0, -120(%0)    \n"
69         "       sd      $0, -112(%0)    \n"
70         "       sd      $0, -104(%0)    \n"
71         "       daddiu  %0, %0, 32      \n"
72         "       bnel    %0, %1, 1b      \n"
73         "        pref   " SB1_PREF_STORE_STREAMED_HINT ",  -32(%0)  \n"
74         "       daddiu  %0, %0, -128    \n"
75 #endif
76         "       sd      $0, 0(%0)       \n"  /* Throw out a cacheline of 0's */
77         "1:     sd      $0, 8(%0)       \n"
78         "       sd      $0, 16(%0)      \n"
79         "       sd      $0, 24(%0)      \n"
80         "       daddiu  %0, %0, 32      \n"
81         "       bnel    %0, %1, 1b      \n"
82         "        sd     $0, 0(%0)       \n"
83         "       .set    pop             \n"
84         : "+r" (addr)
85         : "r" (end)
86         : "memory");
87 }
88
89 static inline void copy_page_cpu(void *to, void *from)
90 {
91         unsigned char *src = (unsigned char *)from;
92         unsigned char *dst = (unsigned char *)to;
93         unsigned char *end = src + PAGE_SIZE;
94
95         /*
96          * The pref's used here are using "streaming" hints, which cause the
97          * copied data to be kicked out of the cache sooner.  A page copy often
98          * ends up copying a lot more data than is commonly used, so this seems
99          * to make sense in terms of reducing cache pollution, but I've no real
100          * performance data to back this up
101          */
102         __asm__ __volatile__(
103         "       .set    push            \n"
104         "       .set    mips4           \n"
105         "       .set    noreorder       \n"
106 #ifdef CONFIG_CPU_HAS_PREFETCH
107         "       daddiu  %0, %0, 128     \n"
108         "       daddiu  %1, %1, 128     \n"
109         "       pref    " SB1_PREF_LOAD_STREAMED_HINT  ", -128(%0)\n"
110                                              /* Prefetch the first 4 lines */
111         "       pref    " SB1_PREF_STORE_STREAMED_HINT ", -128(%1)\n"
112         "       pref    " SB1_PREF_LOAD_STREAMED_HINT  ",  -96(%0)\n"
113         "       pref    " SB1_PREF_STORE_STREAMED_HINT ",  -96(%1)\n"
114         "       pref    " SB1_PREF_LOAD_STREAMED_HINT  ",  -64(%0)\n"
115         "       pref    " SB1_PREF_STORE_STREAMED_HINT ",  -64(%1)\n"
116         "       pref    " SB1_PREF_LOAD_STREAMED_HINT  ",  -32(%0)\n"
117         "1:     pref    " SB1_PREF_STORE_STREAMED_HINT ",  -32(%1)\n"
118 # ifdef CONFIG_64BIT
119         "       ld      $8, -128(%0)    \n"  /* Block copy a cacheline */
120         "       ld      $9, -120(%0)    \n"
121         "       ld      $10, -112(%0)   \n"
122         "       ld      $11, -104(%0)   \n"
123         "       sd      $8, -128(%1)    \n"
124         "       sd      $9, -120(%1)    \n"
125         "       sd      $10, -112(%1)   \n"
126         "       sd      $11, -104(%1)   \n"
127 # else
128         "       lw      $2, -128(%0)    \n"  /* Block copy a cacheline */
129         "       lw      $3, -124(%0)    \n"
130         "       lw      $6, -120(%0)    \n"
131         "       lw      $7, -116(%0)    \n"
132         "       lw      $8, -112(%0)    \n"
133         "       lw      $9, -108(%0)    \n"
134         "       lw      $10, -104(%0)   \n"
135         "       lw      $11, -100(%0)   \n"
136         "       sw      $2, -128(%1)    \n"
137         "       sw      $3, -124(%1)    \n"
138         "       sw      $6, -120(%1)    \n"
139         "       sw      $7, -116(%1)    \n"
140         "       sw      $8, -112(%1)    \n"
141         "       sw      $9, -108(%1)    \n"
142         "       sw      $10, -104(%1)   \n"
143         "       sw      $11, -100(%1)   \n"
144 # endif
145         "       daddiu  %0, %0, 32      \n"
146         "       daddiu  %1, %1, 32      \n"
147         "       bnel    %0, %2, 1b      \n"
148         "        pref   " SB1_PREF_LOAD_STREAMED_HINT  ",  -32(%0)\n"
149         "       daddiu  %0, %0, -128    \n"
150         "       daddiu  %1, %1, -128    \n"
151 #endif
152 #ifdef CONFIG_64BIT
153         "       ld      $8, 0(%0)       \n"  /* Block copy a cacheline */
154         "1:     ld      $9, 8(%0)       \n"
155         "       ld      $10, 16(%0)     \n"
156         "       ld      $11, 24(%0)     \n"
157         "       sd      $8, 0(%1)       \n"
158         "       sd      $9, 8(%1)       \n"
159         "       sd      $10, 16(%1)     \n"
160         "       sd      $11, 24(%1)     \n"
161 #else
162         "       lw      $2, 0(%0)       \n"  /* Block copy a cacheline */
163         "1:     lw      $3, 4(%0)       \n"
164         "       lw      $6, 8(%0)       \n"
165         "       lw      $7, 12(%0)      \n"
166         "       lw      $8, 16(%0)      \n"
167         "       lw      $9, 20(%0)      \n"
168         "       lw      $10, 24(%0)     \n"
169         "       lw      $11, 28(%0)     \n"
170         "       sw      $2, 0(%1)       \n"
171         "       sw      $3, 4(%1)       \n"
172         "       sw      $6, 8(%1)       \n"
173         "       sw      $7, 12(%1)      \n"
174         "       sw      $8, 16(%1)      \n"
175         "       sw      $9, 20(%1)      \n"
176         "       sw      $10, 24(%1)     \n"
177         "       sw      $11, 28(%1)     \n"
178 #endif
179         "       daddiu  %0, %0, 32      \n"
180         "       daddiu  %1, %1, 32      \n"
181         "       bnel    %0, %2, 1b      \n"
182 #ifdef CONFIG_64BIT
183         "        ld     $8, 0(%0)       \n"
184 #else
185         "        lw     $2, 0(%0)       \n"
186 #endif
187         "       .set    pop             \n"
188         : "+r" (src), "+r" (dst)
189         : "r" (end)
190 #ifdef CONFIG_64BIT
191         : "$8","$9","$10","$11","memory");
192 #else
193         : "$2","$3","$6","$7","$8","$9","$10","$11","memory");
194 #endif
195 }
196
197
198 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
199
200 /*
201  * Pad descriptors to cacheline, since each is exclusively owned by a
202  * particular CPU.
203  */
204 typedef struct dmadscr_s {
205         u64 dscr_a;
206         u64 dscr_b;
207         u64 pad_a;
208         u64 pad_b;
209 } dmadscr_t;
210
211 static dmadscr_t page_descr[DM_NUM_CHANNELS]
212         __attribute__((aligned(SMP_CACHE_BYTES)));
213
214 void sb1_dma_init(void)
215 {
216         int i;
217
218         for (i = 0; i < DM_NUM_CHANNELS; i++) {
219                 const u64 base_val = CPHYSADDR(&page_descr[i]) |
220                                      V_DM_DSCR_BASE_RINGSZ(1);
221                 volatile void *base_reg =
222                         IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE));
223
224                 __raw_writeq(base_val, base_reg);
225                 __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg);
226                 __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg);
227         }
228 }
229
230 void clear_page(void *page)
231 {
232         u64 to_phys = CPHYSADDR(page);
233         unsigned int cpu = smp_processor_id();
234
235         /* if the page is not in KSEG0, use old way */
236         if ((long)KSEGX(page) != (long)CKSEG0)
237                 return clear_page_cpu(page);
238
239         page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM |
240                                  M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
241         page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
242         __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
243
244         /*
245          * Don't really want to do it this way, but there's no
246          * reliable way to delay completion detection.
247          */
248         while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
249                  & M_DM_DSCR_BASE_INTERRUPT))
250                 ;
251         __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
252 }
253
254 void copy_page(void *to, void *from)
255 {
256         u64 from_phys = CPHYSADDR(from);
257         u64 to_phys = CPHYSADDR(to);
258         unsigned int cpu = smp_processor_id();
259
260         /* if any page is not in KSEG0, use old way */
261         if ((long)KSEGX(to) != (long)CKSEG0
262             || (long)KSEGX(from) != (long)CKSEG0)
263                 return copy_page_cpu(to, from);
264
265         page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST |
266                                  M_DM_DSCRA_INTERRUPT;
267         page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
268         __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
269
270         /*
271          * Don't really want to do it this way, but there's no
272          * reliable way to delay completion detection.
273          */
274         while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
275                  & M_DM_DSCR_BASE_INTERRUPT))
276                 ;
277         __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
278 }
279
280 #else /* !CONFIG_SIBYTE_DMA_PAGEOPS */
281
282 void clear_page(void *page)
283 {
284         return clear_page_cpu(page);
285 }
286
287 void copy_page(void *to, void *from)
288 {
289         return copy_page_cpu(to, from);
290 }
291
292 #endif /* !CONFIG_SIBYTE_DMA_PAGEOPS */
293
294 EXPORT_SYMBOL(clear_page);
295 EXPORT_SYMBOL(copy_page);