Merge branch 'merge' of git://git.secretlab.ca/git/linux-2.6 into merge
[linux-2.6] / arch / parisc / lib / io.c
1 /*
2  * arch/parisc/lib/io.c
3  *
4  * Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard
5  * Copyright (c) Randolph Chung 2001 <tausq@debian.org>
6  *
7  * IO accessing functions which shouldn't be inlined because they're too big
8  */
9
10 #include <linux/kernel.h>
11 #include <linux/module.h>
12 #include <asm/io.h>
13
14 /* Copies a block of memory to a device in an efficient manner.
15  * Assumes the device can cope with 32-bit transfers.  If it can't,
16  * don't use this function.
17  */
18 void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
19 {
20         if (((unsigned long)dst & 3) != ((unsigned long)src & 3))
21                 goto bytecopy;
22         while ((unsigned long)dst & 3) {
23                 writeb(*(char *)src, dst++);
24                 src++;
25                 count--;
26         }
27         while (count > 3) {
28                 __raw_writel(*(u32 *)src, dst);
29                 src += 4;
30                 dst += 4;
31                 count -= 4;
32         }
33  bytecopy:
34         while (count--) {
35                 writeb(*(char *)src, dst++);
36                 src++;
37         }
38 }
39
40 /*
41 ** Copies a block of memory from a device in an efficient manner.
42 ** Assumes the device can cope with 32-bit transfers.  If it can't,
43 ** don't use this function.
44 **
45 ** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM:
46 **      27341/64    = 427 cyc per int
47 **      61311/128   = 478 cyc per short
48 **      122637/256  = 479 cyc per byte
49 ** Ergo bus latencies dominant (not transfer size).
50 **      Minimize total number of transfers at cost of CPU cycles.
51 **      TODO: only look at src alignment and adjust the stores to dest.
52 */
53 void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
54 {
55         /* first compare alignment of src/dst */ 
56         if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) )
57                 goto bytecopy;
58
59         if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) )
60                 goto shortcopy;
61
62         /* Then check for misaligned start address */
63         if ((unsigned long)src & 1) {
64                 *(u8 *)dst = readb(src);
65                 src++;
66                 dst++;
67                 count--;
68                 if (count < 2) goto bytecopy;
69         }
70
71         if ((unsigned long)src & 2) {
72                 *(u16 *)dst = __raw_readw(src);
73                 src += 2;
74                 dst += 2;
75                 count -= 2;
76         }
77
78         while (count > 3) {
79                 *(u32 *)dst = __raw_readl(src);
80                 dst += 4;
81                 src += 4;
82                 count -= 4;
83         }
84
85  shortcopy:
86         while (count > 1) {
87                 *(u16 *)dst = __raw_readw(src);
88                 src += 2;
89                 dst += 2;
90                 count -= 2;
91         }
92
93  bytecopy:
94         while (count--) {
95                 *(char *)dst = readb(src);
96                 src++;
97                 dst++;
98         }
99 }
100
101 /* Sets a block of memory on a device to a given value.
102  * Assumes the device can cope with 32-bit transfers.  If it can't,
103  * don't use this function.
104  */
105 void memset_io(volatile void __iomem *addr, unsigned char val, int count)
106 {
107         u32 val32 = (val << 24) | (val << 16) | (val << 8) | val;
108         while ((unsigned long)addr & 3) {
109                 writeb(val, addr++);
110                 count--;
111         }
112         while (count > 3) {
113                 __raw_writel(val32, addr);
114                 addr += 4;
115                 count -= 4;
116         }
117         while (count--) {
118                 writeb(val, addr++);
119         }
120 }
121
122 /*
123  * Read COUNT 8-bit bytes from port PORT into memory starting at
124  * SRC.
125  */
126 void insb (unsigned long port, void *dst, unsigned long count)
127 {
128         unsigned char *p;
129
130         p = (unsigned char *)dst;
131
132         while (((unsigned long)p) & 0x3) {
133                 if (!count)
134                         return;
135                 count--;
136                 *p = inb(port);
137                 p++;
138         }
139
140         while (count >= 4) {
141                 unsigned int w;
142                 count -= 4;
143                 w = inb(port) << 24;
144                 w |= inb(port) << 16;
145                 w |= inb(port) << 8;
146                 w |= inb(port);
147                 *(unsigned int *) p = w;
148                 p += 4;
149         }
150
151         while (count) {
152                 --count;
153                 *p = inb(port);
154                 p++;
155         }
156 }
157
158
159 /*
160  * Read COUNT 16-bit words from port PORT into memory starting at
161  * SRC.  SRC must be at least short aligned.  This is used by the
162  * IDE driver to read disk sectors.  Performance is important, but
163  * the interfaces seems to be slow: just using the inlined version
164  * of the inw() breaks things.
165  */
166 void insw (unsigned long port, void *dst, unsigned long count)
167 {
168         unsigned int l = 0, l2;
169         unsigned char *p;
170
171         p = (unsigned char *)dst;
172         
173         if (!count)
174                 return;
175         
176         switch (((unsigned long)p) & 0x3)
177         {
178          case 0x00:                     /* Buffer 32-bit aligned */
179                 while (count>=2) {
180                         
181                         count -= 2;
182                         l = cpu_to_le16(inw(port)) << 16;
183                         l |= cpu_to_le16(inw(port));
184                         *(unsigned int *)p = l;
185                         p += 4;
186                 }
187                 if (count) {
188                         *(unsigned short *)p = cpu_to_le16(inw(port));
189                 }
190                 break;
191         
192          case 0x02:                     /* Buffer 16-bit aligned */
193                 *(unsigned short *)p = cpu_to_le16(inw(port));
194                 p += 2;
195                 count--;
196                 while (count>=2) {
197                         
198                         count -= 2;
199                         l = cpu_to_le16(inw(port)) << 16;
200                         l |= cpu_to_le16(inw(port));
201                         *(unsigned int *)p = l;
202                         p += 4;
203                 }
204                 if (count) {
205                         *(unsigned short *)p = cpu_to_le16(inw(port));
206                 }
207                 break;
208                 
209          case 0x01:                     /* Buffer 8-bit aligned */
210          case 0x03:
211                 /* I don't bother with 32bit transfers
212                  * in this case, 16bit will have to do -- DE */
213                 --count;
214                 
215                 l = cpu_to_le16(inw(port));
216                 *p = l >> 8;
217                 p++;
218                 while (count--)
219                 {
220                         l2 = cpu_to_le16(inw(port));
221                         *(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8);
222                         p += 2;
223                         l = l2;
224                 }
225                 *p = l & 0xff;
226                 break;
227         }
228 }
229
230
231
232 /*
233  * Read COUNT 32-bit words from port PORT into memory starting at
234  * SRC. Now works with any alignment in SRC. Performance is important,
235  * but the interfaces seems to be slow: just using the inlined version
236  * of the inl() breaks things.
237  */
238 void insl (unsigned long port, void *dst, unsigned long count)
239 {
240         unsigned int l = 0, l2;
241         unsigned char *p;
242
243         p = (unsigned char *)dst;
244         
245         if (!count)
246                 return;
247         
248         switch (((unsigned long) dst) & 0x3)
249         {
250          case 0x00:                     /* Buffer 32-bit aligned */
251                 while (count--)
252                 {
253                         *(unsigned int *)p = cpu_to_le32(inl(port));
254                         p += 4;
255                 }
256                 break;
257         
258          case 0x02:                     /* Buffer 16-bit aligned */
259                 --count;
260                 
261                 l = cpu_to_le32(inl(port));
262                 *(unsigned short *)p = l >> 16;
263                 p += 2;
264                 
265                 while (count--)
266                 {
267                         l2 = cpu_to_le32(inl(port));
268                         *(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16);
269                         p += 4;
270                         l = l2;
271                 }
272                 *(unsigned short *)p = l & 0xffff;
273                 break;
274          case 0x01:                     /* Buffer 8-bit aligned */
275                 --count;
276                 
277                 l = cpu_to_le32(inl(port));
278                 *(unsigned char *)p = l >> 24;
279                 p++;
280                 *(unsigned short *)p = (l >> 8) & 0xffff;
281                 p += 2;
282                 while (count--)
283                 {
284                         l2 = cpu_to_le32(inl(port));
285                         *(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8);
286                         p += 4;
287                         l = l2;
288                 }
289                 *p = l & 0xff;
290                 break;
291          case 0x03:                     /* Buffer 8-bit aligned */
292                 --count;
293                 
294                 l = cpu_to_le32(inl(port));
295                 *p = l >> 24;
296                 p++;
297                 while (count--)
298                 {
299                         l2 = cpu_to_le32(inl(port));
300                         *(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24;
301                         p += 4;
302                         l = l2;
303                 }
304                 *(unsigned short *)p = (l >> 8) & 0xffff;
305                 p += 2;
306                 *p = l & 0xff;
307                 break;
308         }
309 }
310
311
312 /*
313  * Like insb but in the opposite direction.
314  * Don't worry as much about doing aligned memory transfers:
315  * doing byte reads the "slow" way isn't nearly as slow as
316  * doing byte writes the slow way (no r-m-w cycle).
317  */
318 void outsb(unsigned long port, const void * src, unsigned long count)
319 {
320         const unsigned char *p;
321
322         p = (const unsigned char *)src;
323         while (count) {
324                 count--;
325                 outb(*p, port);
326                 p++;
327         }
328 }
329
330 /*
331  * Like insw but in the opposite direction.  This is used by the IDE
332  * driver to write disk sectors.  Performance is important, but the
333  * interfaces seems to be slow: just using the inlined version of the
334  * outw() breaks things.
335  */
336 void outsw (unsigned long port, const void *src, unsigned long count)
337 {
338         unsigned int l = 0, l2;
339         const unsigned char *p;
340
341         p = (const unsigned char *)src;
342         
343         if (!count)
344                 return;
345         
346         switch (((unsigned long)p) & 0x3)
347         {
348          case 0x00:                     /* Buffer 32-bit aligned */
349                 while (count>=2) {
350                         count -= 2;
351                         l = *(unsigned int *)p;
352                         p += 4;
353                         outw(le16_to_cpu(l >> 16), port);
354                         outw(le16_to_cpu(l & 0xffff), port);
355                 }
356                 if (count) {
357                         outw(le16_to_cpu(*(unsigned short*)p), port);
358                 }
359                 break;
360         
361          case 0x02:                     /* Buffer 16-bit aligned */
362                 
363                 outw(le16_to_cpu(*(unsigned short*)p), port);
364                 p += 2;
365                 count--;
366                 
367                 while (count>=2) {
368                         count -= 2;
369                         l = *(unsigned int *)p;
370                         p += 4;
371                         outw(le16_to_cpu(l >> 16), port);
372                         outw(le16_to_cpu(l & 0xffff), port);
373                 }
374                 if (count) {
375                         outw(le16_to_cpu(*(unsigned short *)p), port);
376                 }
377                 break;
378                 
379          case 0x01:                     /* Buffer 8-bit aligned */      
380                 /* I don't bother with 32bit transfers
381                  * in this case, 16bit will have to do -- DE */
382                 
383                 l  = *p << 8;
384                 p++;
385                 count--;
386                 while (count)
387                 {
388                         count--;
389                         l2 = *(unsigned short *)p;
390                         p += 2;
391                         outw(le16_to_cpu(l | l2 >> 8), port);
392                         l = l2 << 8;
393                 }
394                 l2 = *(unsigned char *)p;
395                 outw (le16_to_cpu(l | l2>>8), port);
396                 break;
397         
398         }
399 }
400
401
402 /*
403  * Like insl but in the opposite direction.  This is used by the IDE
404  * driver to write disk sectors.  Works with any alignment in SRC.
405  *  Performance is important, but the interfaces seems to be slow:
406  * just using the inlined version of the outl() breaks things.
407  */
408 void outsl (unsigned long port, const void *src, unsigned long count)
409 {
410         unsigned int l = 0, l2;
411         const unsigned char *p;
412
413         p = (const unsigned char *)src;
414         
415         if (!count)
416                 return;
417         
418         switch (((unsigned long)p) & 0x3)
419         {
420          case 0x00:                     /* Buffer 32-bit aligned */
421                 while (count--)
422                 {
423                         outl(le32_to_cpu(*(unsigned int *)p), port);
424                         p += 4;
425                 }
426                 break;
427         
428          case 0x02:                     /* Buffer 16-bit aligned */
429                 --count;
430                 
431                 l = *(unsigned short *)p;
432                 p += 2;
433                 
434                 while (count--)
435                 {
436                         l2 = *(unsigned int *)p;
437                         p += 4;
438                         outl (le32_to_cpu(l << 16 | l2 >> 16), port);
439                         l = l2;
440                 }
441                 l2 = *(unsigned short *)p;
442                 outl (le32_to_cpu(l << 16 | l2), port);
443                 break;
444          case 0x01:                     /* Buffer 8-bit aligned */
445                 --count;
446
447                 l = *p << 24;
448                 p++;
449                 l |= *(unsigned short *)p << 8;
450                 p += 2;
451
452                 while (count--)
453                 {
454                         l2 = *(unsigned int *)p;
455                         p += 4;
456                         outl (le32_to_cpu(l | l2 >> 24), port);
457                         l = l2 << 8;
458                 }
459                 l2 = *p;
460                 outl (le32_to_cpu(l | l2), port);
461                 break;
462          case 0x03:                     /* Buffer 8-bit aligned */
463                 --count;
464                 
465                 l = *p << 24;
466                 p++;
467
468                 while (count--)
469                 {
470                         l2 = *(unsigned int *)p;
471                         p += 4;
472                         outl (le32_to_cpu(l | l2 >> 8), port);
473                         l = l2 << 24;
474                 }
475                 l2 = *(unsigned short *)p << 16;
476                 p += 2;
477                 l2 |= *p;
478                 outl (le32_to_cpu(l | l2), port);
479                 break;
480         }
481 }
482
483 EXPORT_SYMBOL(insb);
484 EXPORT_SYMBOL(insw);
485 EXPORT_SYMBOL(insl);
486 EXPORT_SYMBOL(outsb);
487 EXPORT_SYMBOL(outsw);
488 EXPORT_SYMBOL(outsl);