summaryrefslogtreecommitdiff
path: root/src/atomic_ops_stack.c
blob: dd29848f577587737e9543b172c9d945aa649b7f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
/*
 * Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
 * Original Author: Hans Boehm
 *
 * This file may be redistributed and/or modified under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 2, or (at your option) any later version.
 *
 * It is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE.  See the GNU General Public License in the
 * file doc/COPYING for more details.
 */

#if defined(HAVE_CONFIG_H)
# include "config.h"
#endif

#include <string.h>
#include <stdlib.h>
#include <assert.h>
#define AO_REQUIRE_CAS
#include "atomic_ops_stack.h"

#if defined(_MSC_VER) \
    || defined(_WIN32) && !defined(__CYGWIN32__) && !defined(__CYGWIN__)
  /* AO_pause not defined elsewhere */
  /* FIXME: At least AO_spin should be factored out.    */
#include <windows.h>

AO_t dummy;

/* Spin for 2**n units. */
static void AO_spin(int n)
{
  int i;
  AO_T j = AO_load(&dummy);

  for (i = 0; i < (2 << n); ++i)
    {
       j *= 5;
       j -= 4;
    }
  AO_store(&dummy, j);
}

void AO_pause(int n)
{
    if (n < 12)
      AO_spin(n);
    else
      {
        DWORD msecs;

        /* Short async-signal-safe sleep. */
        msecs = (n > 18? 100 : (1 << (n - 12)));
        Sleep(msecs);
      }
}

#else

/* AO_pause is available elsewhere */

extern void AO_pause(int);

#endif

#ifdef AO_USE_ALMOST_LOCK_FREE

/* LIFO linked lists based on compare-and-swap.  We need to avoid       */
/* the case of a node deletion and reinsertion while I'm deleting       */
/* it, since that may cause my CAS to succeed eventhough the next       */
/* pointer is now wrong.  Our solution is not fully lock-free, but it   */
/* is good enough for signal handlers, provided we have a suitably low  */
/* bound on the number of recursive signal handler reentries.           */
/* A list consists of a first pointer and a blacklist                   */
/* of pointer values that are currently being removed.  No list element */
/* on the blacklist may be inserted.  If we would otherwise do so, we   */
/* are allowed to insert a variant that differs only in the least       */
/* significant, ignored, bits.  If the list is full, we wait.           */

/* Crucial observation: A particular padded pointer x (i.e. pointer     */
/* plus arbitrary low order bits) can never be newly inserted into      */
/* a list while it's in the corresponding auxiliary data structure.     */

/* The second argument is a pointer to the link field of the element    */
/* to be inserted.                                                      */
/* Both list headers and link fields contain "perturbed" pointers, i.e. */
/* pointers with extra bits "or"ed into the low order bits.             */
void
AO_stack_push_explicit_aux_release(volatile AO_t *list, AO_t *x,
                                   AO_stack_aux *a)
{
  AO_t x_bits = (AO_t)x;
  AO_t next;

  /* No deletions of x can start here, since x is not currently in the  */
  /* list.                                                              */
 retry:
# if AO_BL_SIZE == 2
  {
    /* Start all loads as close to concurrently as possible. */
    AO_t entry1 = AO_load(a -> AO_stack_bl);
    AO_t entry2 = AO_load(a -> AO_stack_bl + 1);
    if (entry1 == x_bits || entry2 == x_bits)
      {
        /* Entry is currently being removed.  Change it a little.       */
          ++x_bits;
          if ((x_bits & AO_BIT_MASK) == 0)
            /* Version count overflowed;         */
            /* EXTREMELY unlikely, but possible. */
            x_bits = (AO_t)x;
        goto retry;
      }
  }
# else
  {
    int i;
    for (i = 0; i < AO_BL_SIZE; ++i)
      {
        if (AO_load(a -> AO_stack_bl + i) == x_bits)
          {
            /* Entry is currently being removed.  Change it a little.   */
              ++x_bits;
              if ((x_bits & AO_BIT_MASK) == 0)
                /* Version count overflowed;         */
                /* EXTREMELY unlikely, but possible. */
                x_bits = (AO_t)x;
            goto retry;
          }
      }
  }
# endif
  /* x_bits is not currently being deleted */
  do
    {
      next = AO_load(list);
      *x = next;
    }
  while(!AO_compare_and_swap_release(list, next, x_bits));
}

/*
 * I concluded experimentally that checking a value first before
 * performing a compare-and-swap is usually beneficial on X86, but
 * slows things down appreciably with contention on Itanium.
 * Since the Itanium behavior makes more sense to me (more cache line
 * movement unless we're mostly reading, but back-off should guard
 * against that), we take Itanium as the default.  Measurements on
 * other multiprocessor architectures would be useful.  (On a uniprocessor,
 * the initial check is almost certainly a very small loss.) - HB
 */
#ifdef __i386__
# define PRECHECK(a) (a) == 0 &&
#else
# define PRECHECK(a)
#endif

AO_t *
AO_stack_pop_explicit_aux_acquire(volatile AO_t *list, AO_stack_aux * a)
{
  unsigned i;
  int j = 0;
  AO_t first;
  AO_t * first_ptr;
  AO_t next;

 retry:
  first = AO_load(list);
  if (0 == first) return 0;
  /* Insert first into aux black list.                                  */
  /* This may spin if more than AO_BL_SIZE removals using auxiliary     */
  /* structure a are currently in progress.                             */
  for (i = 0; ; )
    {
      if (PRECHECK(a -> AO_stack_bl[i])
          AO_compare_and_swap_acquire(a->AO_stack_bl+i, 0, first))
        break;
      ++i;
      if ( i >= AO_BL_SIZE )
        {
          i = 0;
          AO_pause(++j);
        }
    }
  assert(i >= 0 && i < AO_BL_SIZE);
  assert(a -> AO_stack_bl[i] == first);
  /* First is on the auxiliary black list.  It may be removed by        */
  /* another thread before we get to it, but a new insertion of x       */
  /* cannot be started here.                                            */
  /* Only we can remove it from the black list.                         */
  /* We need to make sure that first is still the first entry on the    */
  /* list.  Otherwise it's possible that a reinsertion of it was        */
  /* already started before we added the black list entry.              */
  if (first != AO_load(list)) {
    AO_store_release(a->AO_stack_bl+i, 0);
    goto retry;
  }
  first_ptr = AO_REAL_NEXT_PTR(first);
  next = AO_load(first_ptr);
  if (!AO_compare_and_swap_release(list, first, next)) {
    AO_store_release(a->AO_stack_bl+i, 0);
    goto retry;
  }
  assert(*list != first);
  /* Since we never insert an entry on the black list, this cannot have */
  /* succeeded unless first remained on the list while we were running. */
  /* Thus its next link cannot have changed out from under us, and we   */
  /* removed exactly one entry and preserved the rest of the list.      */
  /* Note that it is quite possible that an additional entry was        */
  /* inserted and removed while we were running; this is OK since the   */
  /* part of the list following first must have remained unchanged, and */
  /* first must again have been at the head of the list when the        */
  /* compare_and_swap succeeded.                                        */
  AO_store_release(a->AO_stack_bl+i, 0);
  return first_ptr;
}

#else /* ! USE_ALMOST_LOCK_FREE */

/* Better names for fields in AO_stack_t */
#define ptr AO_val2
#define version AO_val1

#if defined(AO_HAVE_compare_double_and_swap_double)

void AO_stack_push_release(AO_stack_t *list, AO_t *element)
{
    AO_t next;

    do {
      next = AO_load(&(list -> ptr));
      *element = next;
    } while (!AO_compare_and_swap_release
                    ( &(list -> ptr), next, (AO_t) element));
    /* This uses a narrow CAS here, an old optimization suggested       */
    /* by Treiber.  Pop is still safe, since we run into the ABA        */
    /* problem only if there were both intervening "pop"s and "push"es. */
    /* In that case we still see a change in the version number.        */
}

AO_t *AO_stack_pop_acquire(AO_stack_t *list)
{
#   ifdef __clang__
      AO_t *volatile cptr;
                        /* Use volatile to workaround a bug in          */
                        /* clang-1.1/x86 causing test_stack failure.    */
#   else
      AO_t *cptr;
#   endif
    AO_t next;
    AO_t cversion;

    do {
      /* Version must be loaded first.  */
      cversion = AO_load_acquire(&(list -> version));
      cptr = (AO_t *)AO_load(&(list -> ptr));
      if (cptr == 0) return 0;
      next = *cptr;
    } while (!AO_compare_double_and_swap_double_release
                    (list, cversion, (AO_t) cptr, cversion+1, (AO_t) next));
    return cptr;
}


#elif defined(AO_HAVE_compare_and_swap_double)

/* Needed for future IA64 processors.  No current clients? */

#error Untested!  Probably doesnt work.

/* We have a wide CAS, but only does an AO_t-wide comparison.   */
/* We can't use the Treiber optimization, since we only check   */
/* for an unchanged version number, not an unchanged pointer.   */
void AO_stack_push_release(AO_stack_t *list, AO_t *element)
{
    AO_t version;
    AO_t next_ptr;

    do {
      /* Again version must be loaded first, for different reason.      */
      version = AO_load_acquire(&(list -> version));
      next_ptr = AO_load(&(list -> ptr));
      *element = next_ptr;
    } while (!AO_compare_and_swap_double_release(
                           list, version,
                           version+1, (AO_t) element));
}

AO_t *AO_stack_pop_acquire(AO_stack_t *list)
{
    AO_t *cptr;
    AO_t next;
    AO_t cversion;

    do {
      cversion = AO_load_acquire(&(list -> version));
      cptr = (AO_t *)AO_load(&(list -> ptr));
      if (cptr == 0) return 0;
      next = *cptr;
    } while (!AO_compare_double_and_swap_double_release
                    (list, cversion, (AO_t) cptr, cversion+1, next));
    return cptr;
}


#endif /* AO_HAVE_compare_and_swap_double */

#endif /* ! USE_ALMOST_LOCK_FREE */