riched20: Fix test crash on WinXP-SP2.
[wine] / libs / port / interlocked.c
1 /*
2  * interlocked functions
3  *
4  * Copyright 1996 Alexandre Julliard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  */
20
21 #include "config.h"
22 #include "wine/port.h"
23 #include <assert.h>
24
25 #ifdef __i386__
26
27 #ifdef __GNUC__
28
29 __ASM_GLOBAL_FUNC(interlocked_cmpxchg,
30                   "movl 12(%esp),%eax\n\t"
31                   "movl 8(%esp),%ecx\n\t"
32                   "movl 4(%esp),%edx\n\t"
33                   "lock; cmpxchgl %ecx,(%edx)\n\t"
34                   "ret")
35 __ASM_GLOBAL_FUNC(interlocked_cmpxchg_ptr,
36                   "movl 12(%esp),%eax\n\t"
37                   "movl 8(%esp),%ecx\n\t"
38                   "movl 4(%esp),%edx\n\t"
39                   "lock; cmpxchgl %ecx,(%edx)\n\t"
40                   "ret")
41  __ASM_GLOBAL_FUNC(interlocked_cmpxchg64,
42                    "push %ebx\n\t"
43                    "push %esi\n\t"
44                    "movl 12(%esp),%esi\n\t"
45                    "movl 16(%esp),%ebx\n\t"
46                    "movl 20(%esp),%ecx\n\t"
47                    "movl 24(%esp),%eax\n\t"
48                    "movl 28(%esp),%edx\n\t"
49                    "lock; cmpxchg8b (%esi)\n\t"
50                    "pop %esi\n\t"
51                    "pop %ebx\n\t"
52                    "ret");
53 __ASM_GLOBAL_FUNC(interlocked_xchg,
54                   "movl 8(%esp),%eax\n\t"
55                   "movl 4(%esp),%edx\n\t"
56                   "lock; xchgl %eax,(%edx)\n\t"
57                   "ret")
58 __ASM_GLOBAL_FUNC(interlocked_xchg_ptr,
59                   "movl 8(%esp),%eax\n\t"
60                   "movl 4(%esp),%edx\n\t"
61                   "lock; xchgl %eax,(%edx)\n\t"
62                   "ret")
63 __ASM_GLOBAL_FUNC(interlocked_xchg_add,
64                   "movl 8(%esp),%eax\n\t"
65                   "movl 4(%esp),%edx\n\t"
66                   "lock; xaddl %eax,(%edx)\n\t"
67                   "ret")
68
69 #elif defined(_MSC_VER)
70
71 __declspec(naked) int interlocked_cmpxchg( int *dest, int xchg, int compare )
72 {
73     __asm mov eax, 12[esp];
74     __asm mov ecx, 8[esp];
75     __asm mov edx, 4[esp];
76     __asm lock cmpxchg [edx], ecx;
77     __asm ret;
78 }
79
80 __declspec(naked) void *interlocked_cmpxchg_ptr( void **dest, void *xchg, void *compare )
81 {
82     __asm mov eax, 12[esp];
83     __asm mov ecx, 8[esp];
84     __asm mov edx, 4[esp];
85     __asm lock cmpxchg [edx], ecx;
86     __asm ret;
87 }
88
89 __declspec(naked) __int64 interlocked_cmpxchg64( __int64 *dest, __int64 xchg, __int64 compare)
90 {
91     __asm push ebx;
92     __asm push esi;
93     __asm mov esi, 12[esp];
94     __asm mov ebx, 16[esp];
95     __asm mov ecx, 20[esp];
96     __asm mov eax, 24[esp];
97     __asm mov edx, 28[esp];
98     __asm lock cmpxchg8b [esi];
99     __asm pop esi;
100     __asm pop ebx;
101     __asm ret;
102 }
103
104 __declspec(naked) int interlocked_xchg( int *dest, int val )
105 {
106     __asm mov eax, 8[esp];
107     __asm mov edx, 4[esp];
108     __asm lock xchg [edx], eax;
109     __asm ret;
110 }
111
112 __declspec(naked) void *interlocked_xchg_ptr( void **dest, void *val )
113 {
114     __asm mov eax, 8[esp];
115     __asm mov edx, 4[esp];
116     __asm lock xchg [edx], eax;
117     __asm ret;
118 }
119
120 __declspec(naked) int interlocked_xchg_add( int *dest, int incr )
121 {
122     __asm mov eax, 8[esp];
123     __asm mov edx, 4[esp];
124     __asm lock xadd [edx], eax;
125     __asm ret;
126 }
127
128 #else
129 # error You must implement the interlocked* functions for your compiler
130 #endif
131
132 #elif defined(__x86_64__)
133
134 #ifdef __GNUC__
135
136 __ASM_GLOBAL_FUNC(interlocked_cmpxchg,
137                   "mov %edx, %eax\n\t"
138                   "lock cmpxchgl %esi,(%rdi)\n\t"
139                   "ret")
140 __ASM_GLOBAL_FUNC(interlocked_cmpxchg_ptr,
141                   "mov %rdx, %rax\n\t"
142                   "lock cmpxchgq %rsi,(%rdi)\n\t"
143                   "ret")
144 __ASM_GLOBAL_FUNC(interlocked_cmpxchg64,
145                   "mov %rdx, %rax\n\t"
146                   "lock cmpxchgq %rsi,(%rdi)\n\t"
147                   "ret")
148 __ASM_GLOBAL_FUNC(interlocked_xchg,
149                   "mov %esi, %eax\n\t"
150                   "lock xchgl %eax, (%rdi)\n\t"
151                   "ret")
152 __ASM_GLOBAL_FUNC(interlocked_xchg_ptr,
153                   "mov %rsi, %rax\n\t"
154                   "lock xchgq %rax,(%rdi)\n\t"
155                   "ret")
156 __ASM_GLOBAL_FUNC(interlocked_xchg_add,
157                   "mov %esi, %eax\n\t"
158                   "lock xaddl %eax, (%rdi)\n\t"
159                   "ret")
160
161 #else
162 # error You must implement the interlocked* functions for your compiler
163 #endif
164
165 #elif defined(__powerpc__)
166 void* interlocked_cmpxchg_ptr( void **dest, void* xchg, void* compare)
167 {
168     void *ret = 0;
169     void *scratch;
170     __asm__ __volatile__(
171         "0:    lwarx %0,0,%2\n"
172         "      xor. %1,%4,%0\n"
173         "      bne 1f\n"
174         "      stwcx. %3,0,%2\n"
175         "      bne- 0b\n"
176         "      isync\n"
177         "1:    "
178         : "=&r"(ret), "=&r"(scratch)
179         : "r"(dest), "r"(xchg), "r"(compare)
180         : "cr0","memory");
181     return ret;
182 }
183
184 __int64 interlocked_cmpxchg64( __int64 *dest, __int64 xchg, __int64 compare)
185 {
186     /* FIXME: add code */
187     assert(0);
188 }
189
190 int interlocked_cmpxchg( int *dest, int xchg, int compare)
191 {
192     int ret = 0;
193     int scratch;
194     __asm__ __volatile__(
195         "0:    lwarx %0,0,%2\n"
196         "      xor. %1,%4,%0\n"
197         "      bne 1f\n"
198         "      stwcx. %3,0,%2\n"
199         "      bne- 0b\n"
200         "      isync\n"
201         "1:    "
202         : "=&r"(ret), "=&r"(scratch)
203         : "r"(dest), "r"(xchg), "r"(compare)
204         : "cr0","memory","r0");
205     return ret;
206 }
207
208 int interlocked_xchg_add( int *dest, int incr )
209 {
210     int ret = 0;
211     int zero = 0;
212     __asm__ __volatile__(
213         "0:    lwarx %0, %3, %1\n"
214         "      add %0, %2, %0\n"
215         "      stwcx. %0, %3, %1\n"
216         "      bne- 0b\n"
217         "      isync\n"
218         : "=&r" (ret)
219         : "r"(dest), "r"(incr), "r"(zero)
220         : "cr0", "memory", "r0"
221     );
222     return ret-incr;
223 }
224
225 int interlocked_xchg( int* dest, int val )
226 {
227     int ret = 0;
228     __asm__ __volatile__(
229         "0:    lwarx %0,0,%1\n"
230         "      stwcx. %2,0,%1\n"
231         "      bne- 0b\n"
232         "      isync\n"
233         : "=&r"(ret)
234         : "r"(dest), "r"(val)
235         : "cr0","memory","r0");
236     return ret;
237 }
238
239 void* interlocked_xchg_ptr( void** dest, void* val )
240 {
241     void *ret = NULL;
242     __asm__ __volatile__(
243         "0:    lwarx %0,0,%1\n"
244         "      stwcx. %2,0,%1\n"
245         "      bne- 0b \n"
246         "      isync\n"
247         : "=&r"(ret)
248         : "r"(dest), "r"(val)
249         : "cr0","memory","r0");
250     return ret;
251 }
252
253 #elif defined(__sparc__) && defined(__sun__)
254
255 /*
256  * As the earlier Sparc processors lack necessary atomic instructions,
257  * I'm simply falling back to the library-provided _lwp_mutex routines
258  * to ensure mutual exclusion in a way appropriate for the current
259  * architecture.
260  *
261  * FIXME:  If we have the compare-and-swap instruction (Sparc v9 and above)
262  *         we could use this to speed up the Interlocked operations ...
263  */
264 #include <synch.h>
265 static lwp_mutex_t interlocked_mutex = DEFAULTMUTEX;
266
267 int interlocked_cmpxchg( int *dest, int xchg, int compare )
268 {
269     _lwp_mutex_lock( &interlocked_mutex );
270     if (*dest == compare) *dest = xchg;
271     else compare = *dest;
272     _lwp_mutex_unlock( &interlocked_mutex );
273     return compare;
274 }
275
276 void *interlocked_cmpxchg_ptr( void **dest, void *xchg, void *compare )
277 {
278     _lwp_mutex_lock( &interlocked_mutex );
279     if (*dest == compare) *dest = xchg;
280     else compare = *dest;
281     _lwp_mutex_unlock( &interlocked_mutex );
282     return compare;
283 }
284
285 __int64 interlocked_cmpxchg64( __int64 *dest, __int64 xchg, __int64 compare )
286 {
287     _lwp_mutex_lock( &interlocked_mutex );
288     if (*dest == compare) *dest = xchg;
289     else compare = *dest;
290     _lwp_mutex_unlock( &interlocked_mutex );
291     return compare;
292 }
293
294 int interlocked_xchg( int *dest, int val )
295 {
296     int retv;
297     _lwp_mutex_lock( &interlocked_mutex );
298     retv = *dest;
299     *dest = val;
300     _lwp_mutex_unlock( &interlocked_mutex );
301     return retv;
302 }
303
304 void *interlocked_xchg_ptr( void **dest, void *val )
305 {
306     void *retv;
307     _lwp_mutex_lock( &interlocked_mutex );
308     retv = *dest;
309     *dest = val;
310     _lwp_mutex_unlock( &interlocked_mutex );
311     return retv;
312 }
313
314 int interlocked_xchg_add( int *dest, int incr )
315 {
316     int retv;
317     _lwp_mutex_lock( &interlocked_mutex );
318     retv = *dest;
319     *dest += incr;
320     _lwp_mutex_unlock( &interlocked_mutex );
321     return retv;
322 }
323
324 #elif defined(__ALPHA__) && defined(__GNUC__)
325
326 __ASM_GLOBAL_FUNC(interlocked_cmpxchg,
327                   "L0cmpxchg:\n\t"
328                   "ldl_l $0,0($16)\n\t"
329                   "cmpeq $0,$18,$1\n\t"
330                   "beq   $1,L1cmpxchg\n\t"
331                   "mov   $17,$0\n\t"
332                   "stl_c $0,0($16)\n\t"
333                   "beq   $0,L0cmpxchg\n\t"
334                   "mov   $18,$0\n"
335                   "L1cmpxchg:\n\t"
336                   "mb")
337
338 __ASM_GLOBAL_FUNC(interlocked_cmpxchg_ptr,
339                   "L0cmpxchg_ptr:\n\t"
340                   "ldq_l $0,0($16)\n\t"
341                   "cmpeq $0,$18,$1\n\t"
342                   "beq   $1,L1cmpxchg_ptr\n\t"
343                   "mov   $17,$0\n\t"
344                   "stq_c $0,0($16)\n\t"
345                   "beq   $0,L0cmpxchg_ptr\n\t"
346                   "mov   $18,$0\n"
347                   "L1cmpxchg_ptr:\n\t"
348                   "mb")
349
350 __int64 interlocked_cmpxchg64(__int64 *dest, __int64 xchg, __int64 compare)
351 {
352     /* FIXME: add code */
353     assert(0);
354 }
355
356 __ASM_GLOBAL_FUNC(interlocked_xchg,
357                   "L0xchg:\n\t"
358                   "ldl_l $0,0($16)\n\t"
359                   "mov   $17,$1\n\t"
360                   "stl_c $1,0($16)\n\t"
361                   "beq   $1,L0xchg\n\t"
362                   "mb")
363
364 __ASM_GLOBAL_FUNC(interlocked_xchg_ptr,
365                   "L0xchg_ptr:\n\t"
366                   "ldq_l $0,0($16)\n\t"
367                   "mov   $17,$1\n\t"
368                   "stq_c $1,0($16)\n\t"
369                   "beq   $1,L0xchg_ptr\n\t"
370                   "mb")
371
372 __ASM_GLOBAL_FUNC(interlocked_xchg_add,
373                   "L0xchg_add:\n\t"
374                   "ldl_l $0,0($16)\n\t"
375                   "addl  $0,$17,$1\n\t"
376                   "stl_c $1,0($16)\n\t"
377                   "beq   $1,L0xchg_add\n\t"
378                   "mb")
379
380 #else
381 # error You must implement the interlocked* functions for your CPU
382 #endif