summaryrefslogtreecommitdiff
path: root/src/atomic_ops/sysdeps/gcc/powerpc.h
blob: 83d7d39352a667d17b18b1adcde0c79c4ce0ef2f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
/*
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
 *
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose,  provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 *
 */

/* Memory model documented at http://www-106.ibm.com/developerworks/    */
/* eserver/articles/archguide.html and (clearer)                        */
/* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */
/* There appears to be no implicit ordering between any kind of         */
/* independent memory references.                                       */
/* Architecture enforces some ordering based on control dependence.     */
/* I don't know if that could help.                                     */
/* Data-dependent loads are always ordered.                             */
/* Based on the above references, eieio is intended for use on          */
/* uncached memory, which we don't support.  It does not order loads    */
/* from cached memory.                                                  */
/* Thanks to Maged Michael, Doug Lea, and Roger Hoover for helping to   */
/* track some of this down and correcting my misunderstandings. -HB     */
/* Earl Chew subsequently contributed further fixes & additions.        */

#include "../all_aligned_atomic_load_store.h"

#include "../test_and_set_t_is_ao_t.h"
        /* There seems to be no byte equivalent of lwarx, so this       */
        /* may really be what we want, at least in the 32-bit case.     */

AO_INLINE void
AO_nop_full(void)
{
  __asm__ __volatile__("sync" : : : "memory");
}
#define AO_HAVE_nop_full

/* lwsync apparently works for everything but a StoreLoad barrier.      */
AO_INLINE void
AO_lwsync(void)
{
#ifdef __NO_LWSYNC__
  __asm__ __volatile__("sync" : : : "memory");
#else
  __asm__ __volatile__("lwsync" : : : "memory");
#endif
}

#define AO_nop_write() AO_lwsync()
#define AO_HAVE_nop_write

#define AO_nop_read() AO_lwsync()
#define AO_HAVE_nop_read

/* We explicitly specify load_acquire, since it is important, and can   */
/* be implemented relatively cheaply.  It could be implemented          */
/* with an ordinary load followed by a lwsync.  But the general wisdom  */
/* seems to be that a data dependent branch followed by an isync is     */
/* cheaper.  And the documentation is fairly explicit that this also    */
/* has acquire semantics.                                               */
/* ppc64 uses ld not lwz */
AO_INLINE AO_t
AO_load_acquire(const volatile AO_t *addr)
{
  AO_t result;
#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
   __asm__ __volatile__ (
    "ld%U1%X1 %0,%1\n"
    "cmpw %0,%0\n"
    "bne- 1f\n"
    "1: isync\n"
    : "=r" (result)
    : "m"(*addr) : "memory", "cr0");
#else
  /* FIXME: We should get gcc to allocate one of the condition  */
  /* registers.  I always got "impossible constraint" when I    */
  /* tried the "y" constraint.                                  */
  __asm__ __volatile__ (
    "lwz%U1%X1 %0,%1\n"
    "cmpw %0,%0\n"
    "bne- 1f\n"
    "1: isync\n"
    : "=r" (result)
    : "m"(*addr) : "memory", "cc");
#endif
  return result;
}
#define AO_HAVE_load_acquire

/* We explicitly specify store_release, since it relies         */
/* on the fact that lwsync is also a LoadStore barrier.         */
AO_INLINE void
AO_store_release(volatile AO_t *addr, AO_t value)
{
  AO_lwsync();
  *addr = value;
}
#define AO_HAVE_store_release

/* This is similar to the code in the garbage collector.  Deleting      */
/* this and having it synthesized from compare_and_swap would probably  */
/* only cost us a load immediate instruction.                           */
AO_INLINE AO_TS_VAL_t
AO_test_and_set(volatile AO_TS_t *addr) {
#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
/* Completely untested.  And we should be using smaller objects anyway. */
  unsigned long oldval;
  unsigned long temp = 1; /* locked value */

  __asm__ __volatile__(
               "1:ldarx %0,0,%1\n"   /* load and reserve               */
               "cmpdi %0, 0\n"       /* if load is                     */
               "bne 2f\n"            /*   non-zero, return already set */
               "stdcx. %2,0,%1\n"    /* else store conditional         */
               "bne- 1b\n"           /* retry if lost reservation      */
               "2:\n"                /* oldval is zero if we set       */
              : "=&r"(oldval)
              : "r"(addr), "r"(temp)
              : "memory", "cr0");
#else
  int oldval;
  int temp = 1; /* locked value */

  __asm__ __volatile__(
               "1:lwarx %0,0,%1\n"   /* load and reserve               */
               "cmpwi %0, 0\n"       /* if load is                     */
               "bne 2f\n"            /*   non-zero, return already set */
               "stwcx. %2,0,%1\n"    /* else store conditional         */
               "bne- 1b\n"           /* retry if lost reservation      */
               "2:\n"                /* oldval is zero if we set       */
              : "=&r"(oldval)
              : "r"(addr), "r"(temp)
              : "memory", "cr0");
#endif
  return (AO_TS_VAL_t)oldval;
}
#define AO_HAVE_test_and_set

AO_INLINE AO_TS_VAL_t
AO_test_and_set_acquire(volatile AO_TS_t *addr) {
  AO_TS_VAL_t result = AO_test_and_set(addr);
  AO_lwsync();
  return result;
}
#define AO_HAVE_test_and_set_acquire

AO_INLINE AO_TS_VAL_t
AO_test_and_set_release(volatile AO_TS_t *addr) {
  AO_lwsync();
  return AO_test_and_set(addr);
}
#define AO_HAVE_test_and_set_release

AO_INLINE AO_TS_VAL_t
AO_test_and_set_full(volatile AO_TS_t *addr) {
  AO_TS_VAL_t result;
  AO_lwsync();
  result = AO_test_and_set(addr);
  AO_lwsync();
  return result;
}
#define AO_HAVE_test_and_set_full

AO_INLINE int
AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) {
  AO_t oldval;
  int result = 0;
#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
/* FIXME: Completely untested.  */
  __asm__ __volatile__(
               "1:ldarx %0,0,%2\n"   /* load and reserve              */
               "cmpd %0, %4\n"      /* if load is not equal to  */
               "bne 2f\n"            /*   old, fail                     */
               "stdcx. %3,0,%2\n"    /* else store conditional         */
               "bne- 1b\n"           /* retry if lost reservation      */
               "li %1,1\n"           /* result = 1;                     */
               "2:\n"
              : "=&r"(oldval), "=&r"(result)
              : "r"(addr), "r"(new_val), "r"(old), "1"(result)
              : "memory", "cr0");
#else
  __asm__ __volatile__(
               "1:lwarx %0,0,%2\n"   /* load and reserve              */
               "cmpw %0, %4\n"      /* if load is not equal to  */
               "bne 2f\n"            /*   old, fail                     */
               "stwcx. %3,0,%2\n"    /* else store conditional         */
               "bne- 1b\n"           /* retry if lost reservation      */
               "li %1,1\n"           /* result = 1;                     */
               "2:\n"
              : "=&r"(oldval), "=&r"(result)
              : "r"(addr), "r"(new_val), "r"(old), "1"(result)
              : "memory", "cr0");
#endif
  return result;
}
#define AO_HAVE_compare_and_swap

AO_INLINE int
AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) {
  int result = AO_compare_and_swap(addr, old, new_val);
  AO_lwsync();
  return result;
}
#define AO_HAVE_compare_and_swap_acquire

AO_INLINE int
AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val) {
  AO_lwsync();
  return AO_compare_and_swap(addr, old, new_val);
}
#define AO_HAVE_compare_and_swap_release

AO_INLINE int
AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) {
  int result;
  AO_lwsync();
  result = AO_compare_and_swap(addr, old, new_val);
  AO_lwsync();
  return result;
}
#define AO_HAVE_compare_and_swap_full

AO_INLINE AO_t
AO_fetch_and_add(volatile AO_t *addr, AO_t incr) {
  AO_t oldval;
  AO_t newval;
#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
/* FIXME: Completely untested.                                          */
  __asm__ __volatile__(
               "1:ldarx %0,0,%2\n"   /* load and reserve                */
               "add %1,%0,%3\n"      /* increment                       */
               "stdcx. %1,0,%2\n"    /* store conditional               */
               "bne- 1b\n"           /* retry if lost reservation       */
              : "=&r"(oldval), "=&r"(newval)
               : "r"(addr), "r"(incr)
              : "memory", "cr0");
#else
  __asm__ __volatile__(
               "1:lwarx %0,0,%2\n"   /* load and reserve                */
               "add %1,%0,%3\n"      /* increment                       */
               "stwcx. %1,0,%2\n"    /* store conditional               */
               "bne- 1b\n"           /* retry if lost reservation       */
              : "=&r"(oldval), "=&r"(newval)
               : "r"(addr), "r"(incr)
              : "memory", "cr0");
#endif
  return oldval;
}
#define AO_HAVE_fetch_and_add

AO_INLINE AO_t
AO_fetch_and_add_acquire(volatile AO_t *addr, AO_t incr) {
  AO_t result = AO_fetch_and_add(addr, incr);
  AO_lwsync();
  return result;
}
#define AO_HAVE_fetch_and_add_acquire

AO_INLINE AO_t
AO_fetch_and_add_release(volatile AO_t *addr, AO_t incr) {
  AO_lwsync();
  return AO_fetch_and_add(addr, incr);
}
#define AO_HAVE_fetch_and_add_release

AO_INLINE AO_t
AO_fetch_and_add_full(volatile AO_t *addr, AO_t incr) {
  AO_t result;
  AO_lwsync();
  result = AO_fetch_and_add(addr, incr);
  AO_lwsync();
  return result;
}
#define AO_HAVE_fetch_and_add_full

#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
#else
# include "../ao_t_is_int.h"
#endif