Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
[linux-2.6] / arch / frv / lib / memset.S
1 /* memset.S: optimised assembly memset
2  *
3  * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12
13         .text
14         .p2align        4
15
16 ###############################################################################
17 #
18 # void *memset(void *p, char ch, size_t count)
19 #
20 # - NOTE: must not use any stack. exception detection performs function return
21 #         to caller's fixup routine, aborting the remainder of the set
22 #         GR4, GR7, GR8, and GR11 must be managed
23 #
24 ###############################################################################
25         .globl          memset,__memset_end
26         .type           memset,@function
27 memset:
28         orcc.p          gr10,gr0,gr5,icc3               ; GR5 = count
29         andi            gr9,#0xff,gr9
30         or.p            gr8,gr0,gr4                     ; GR4 = address
31         beqlr           icc3,#0
32
33         # conditionally write a byte to 2b-align the address
34         setlos.p        #1,gr6
35         andicc          gr4,#1,gr0,icc0
36         ckne            icc0,cc7
37         cstb.p          gr9,@(gr4,gr0)          ,cc7,#1
38         csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
39         cadd.p          gr4,gr6,gr4             ,cc7,#1
40         beqlr           icc3,#0
41
42         # conditionally write a word to 4b-align the address
43         andicc.p        gr4,#2,gr0,icc0
44         subicc          gr5,#2,gr0,icc1
45         setlos.p        #2,gr6
46         ckne            icc0,cc7
47         slli.p          gr9,#8,gr12                     ; need to double up the pattern
48         cknc            icc1,cc5
49         or.p            gr9,gr12,gr12
50         andcr           cc7,cc5,cc7
51
52         csth.p          gr12,@(gr4,gr0)         ,cc7,#1
53         csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
54         cadd.p          gr4,gr6,gr4             ,cc7,#1
55         beqlr           icc3,#0
56
57         # conditionally write a dword to 8b-align the address
58         andicc.p        gr4,#4,gr0,icc0
59         subicc          gr5,#4,gr0,icc1
60         setlos.p        #4,gr6
61         ckne            icc0,cc7
62         slli.p          gr12,#16,gr13                   ; need to quadruple-up the pattern
63         cknc            icc1,cc5
64         or.p            gr13,gr12,gr12
65         andcr           cc7,cc5,cc7
66
67         cst.p           gr12,@(gr4,gr0)         ,cc7,#1
68         csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
69         cadd.p          gr4,gr6,gr4             ,cc7,#1
70         beqlr           icc3,#0
71
72         or.p            gr12,gr12,gr13                  ; need to octuple-up the pattern
73
74         # the address is now 8b-aligned - loop around writing 64b chunks
75         setlos          #8,gr7
76         subi.p          gr4,#8,gr4                      ; store with update index does weird stuff
77         setlos          #64,gr6
78
79         subicc          gr5,#64,gr0,icc0
80 0:      cknc            icc0,cc7
81         cstdu           gr12,@(gr4,gr7)         ,cc7,#1
82         cstdu           gr12,@(gr4,gr7)         ,cc7,#1
83         cstdu           gr12,@(gr4,gr7)         ,cc7,#1
84         cstdu           gr12,@(gr4,gr7)         ,cc7,#1
85         cstdu           gr12,@(gr4,gr7)         ,cc7,#1
86         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
87         csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
88         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
89         subicc          gr5,#64,gr0,icc0
90         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
91         beqlr           icc3,#0
92         bnc             icc0,#2,0b
93
94         # now do 32-byte remnant
95         subicc.p        gr5,#32,gr0,icc0
96         setlos          #32,gr6
97         cknc            icc0,cc7
98         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
99         csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
100         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
101         setlos          #16,gr6
102         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
103         subicc          gr5,#16,gr0,icc0
104         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
105         beqlr           icc3,#0
106
107         # now do 16-byte remnant
108         cknc            icc0,cc7
109         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
110         csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
111         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
112         beqlr           icc3,#0
113
114         # now do 8-byte remnant
115         subicc          gr5,#8,gr0,icc1
116         cknc            icc1,cc7
117         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
118         csubcc          gr5,gr7,gr5             ,cc7,#1 ; also set ICC3
119         setlos.p        #4,gr7
120         beqlr           icc3,#0
121
122         # now do 4-byte remnant
123         subicc          gr5,#4,gr0,icc0
124         addi.p          gr4,#4,gr4
125         cknc            icc0,cc7
126         cstu.p          gr12,@(gr4,gr7)         ,cc7,#1
127         csubcc          gr5,gr7,gr5             ,cc7,#1 ; also set ICC3
128         subicc.p        gr5,#2,gr0,icc1
129         beqlr           icc3,#0
130
131         # now do 2-byte remnant
132         setlos          #2,gr7
133         addi.p          gr4,#2,gr4
134         cknc            icc1,cc7
135         csthu.p         gr12,@(gr4,gr7)         ,cc7,#1
136         csubcc          gr5,gr7,gr5             ,cc7,#1 ; also set ICC3
137         subicc.p        gr5,#1,gr0,icc0
138         beqlr           icc3,#0
139
140         # now do 1-byte remnant
141         setlos          #0,gr7
142         addi.p          gr4,#2,gr4
143         cknc            icc0,cc7
144         cstb.p          gr12,@(gr4,gr0)         ,cc7,#1
145         bralr
146 __memset_end:
147
148         .size           memset, __memset_end-memset
149
150 ###############################################################################
151 #
152 # clear memory in userspace
153 # - return the number of bytes that could not be cleared (0 on complete success)
154 #
155 # long __memset_user(void *p, size_t count)
156 #
157 ###############################################################################
158         .globl          __memset_user, __memset_user_error_lr, __memset_user_error_handler
159         .type           __memset_user,@function
160 __memset_user:
161         movsg           lr,gr11
162
163         # abuse memset to do the dirty work
164         or.p            gr9,gr9,gr10
165         setlos          #0,gr9
166         call            memset
167 __memset_user_error_lr:
168         jmpl.p          @(gr11,gr0)
169         setlos          #0,gr8
170
171         # deal any exception generated by memset
172         # GR4  - memset's address tracking pointer
173         # GR7  - memset's step value (index register for store insns)
174         # GR8  - memset's original start address
175         # GR10 - memset's original count
176 __memset_user_error_handler:
177         add.p           gr4,gr7,gr4
178         add             gr8,gr10,gr8
179         jmpl.p          @(gr11,gr0)
180         sub             gr8,gr4,gr8             ; we return the amount left uncleared
181
182         .size           __memset_user, .-__memset_user