diff options
author | K <5229241+Krasjet@users.noreply.github.com> | 2023-01-28 10:38:05 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-28 16:38:05 +0100 |
commit | 75516ffe6d384223fbb32da704ad17a6af340961 (patch) | |
tree | 3552ddd87803432559d199f2379429faf5421c2e | |
parent | f18660aa033d8647a618b1e34ad845fa93e81754 (diff) | |
download | jack2-75516ffe6d384223fbb32da704ad17a6af340961.tar.gz |
Fix ringbuffer thread safety on ARM. Fix #715 #388 (#886)
* fix ringbuffer thread safety on ARM. fix #715 #388
This patch addresses the thread safety problem of `jack_ringbuffer_t`
mentioned in #715 and #388. The overbound read bug caused by this problem
is impossible to reproduce on x86 due to its strong memory ordering, but
it is a problem on ARM and other weakly ordered architectures.
Basically, the main problem is that, on a weakly ordered architecture,
it is possible that the pointer increment after `memcpy` becomes visible
to the other thread before `memcpy` finishes:
memcpy (&(rb->buf[rb->write_ptr]), src, n1);
// vvv can be visible to reading thread before memcpy finishes
rb->write_ptr = (rb->write_ptr + n1) & rb->size_mask;
If this happens, the other thread can read the remaining garbage values
in `rb->buf` due to be overwritten by the unfinished `memcpy`.
To fix this, an explicit pair of release/acquire memory fences [1] is
used to ensure the copy on the other thread *happens after* the `memcpy`
finishes so no garbage values can be read.
[1]: https://preshing.com/20130922/acquire-and-release-fences/
* remove volatile qualifier on ringbuf r/w pointers
The volatile constraints are excess when compiler barriers are present.
It generates unnecessary `mov` instructions when pointers aren't going
to be updated.
* simplify read/write space calculations
This optimization is possible because the buffer size is always a power
of 2. See [1] for details.
[1]: https://github.com/drobilla/zix/pull/1#issuecomment-1212687196
* move acq fences to separate lines
-rw-r--r-- | common/jack/ringbuffer.h | 4 | ||||
-rw-r--r-- | common/ringbuffer.c | 85 |
2 files changed, 52 insertions, 37 deletions
diff --git a/common/jack/ringbuffer.h b/common/jack/ringbuffer.h index 5204383c..70985720 100644 --- a/common/jack/ringbuffer.h +++ b/common/jack/ringbuffer.h @@ -50,8 +50,8 @@ jack_ringbuffer_data_t ; typedef struct { char *buf; - volatile size_t write_ptr; - volatile size_t read_ptr; + size_t write_ptr; + size_t read_ptr; size_t size; size_t size_mask; int mlocked; diff --git a/common/ringbuffer.c b/common/ringbuffer.c index 9de844f6..a44fc787 100644 --- a/common/ringbuffer.c +++ b/common/ringbuffer.c @@ -27,6 +27,35 @@ #endif /* USE_MLOCK */ #include "JackCompilerDeps.h" +/* Portable definitions for acquire and release fences */ +#if defined(_MSC_VER) + #if defined(_M_AMD64) || defined(_M_IX86) || defined(_M_X64) + /* Only compiler fences are needed on x86. In fact, GCC + * will generate no instructions for acq/rel fences on + * x86 */ + #include <intrin.h> + #define JACK_ACQ_FENCE() _ReadBarrier() + #define JACK_REL_FENCE() _WriteBarrier() + #else + /* Use full memory fence for non-x86 systems with msvc */ + #include <windows.h> + #define JACK_ACQ_FENCE() MemoryBarrier() + #define JACK_REL_FENCE() MemoryBarrier() + #endif +#elif defined(__GNUC__) + #ifdef __ATOMIC_ACQUIRE + #define JACK_ACQ_FENCE() __atomic_thread_fence(__ATOMIC_ACQUIRE) + #define JACK_REL_FENCE() __atomic_thread_fence(__ATOMIC_RELEASE) + #else + /* Fallback to the legacy __sync builtin (full memory fence) */ + #define JACK_ACQ_FENCE() __sync_synchronize() + #define JACK_REL_FENCE() __sync_synchronize() + #endif +#else + #define JACK_ACQ_FENCE() + #define JACK_REL_FENCE() +#endif + typedef struct { char *buf; size_t len; @@ -35,8 +64,8 @@ jack_ringbuffer_data_t ; typedef struct { char *buf; - volatile size_t write_ptr; - volatile size_t read_ptr; + size_t write_ptr; + size_t read_ptr; size_t size; size_t size_mask; int mlocked; @@ -126,7 +155,7 @@ jack_ringbuffer_reset (jack_ringbuffer_t * rb) { rb->read_ptr = 0; rb->write_ptr = 0; - memset(rb->buf, 0, rb->size); + memset(rb->buf, 0, rb->size); } /* Reset the read and write pointers to zero. This is not thread @@ -152,13 +181,10 @@ jack_ringbuffer_read_space (const jack_ringbuffer_t * rb) size_t w, r; w = rb->write_ptr; + JACK_ACQ_FENCE(); r = rb->read_ptr; - if (w > r) { - return w - r; - } else { - return (w - r + rb->size) & rb->size_mask; - } + return (w - r) & rb->size_mask; } /* Return the number of bytes available for writing. This is the @@ -172,14 +198,9 @@ jack_ringbuffer_write_space (const jack_ringbuffer_t * rb) w = rb->write_ptr; r = rb->read_ptr; + JACK_ACQ_FENCE(); - if (w > r) { - return ((r - w + rb->size) & rb->size_mask) - 1; - } else if (w < r) { - return (r - w) - 1; - } else { - return rb->size - 1; - } + return (r - w - 1) & rb->size_mask; } /* The copying data reader. Copy at most `cnt' bytes from `rb' to @@ -199,6 +220,8 @@ jack_ringbuffer_read (jack_ringbuffer_t * rb, char *dest, size_t cnt) to_read = cnt > free_cnt ? free_cnt : cnt; + /* note: relaxed load here, rb->read_ptr cannot be + * modified from writing thread */ cnt2 = rb->read_ptr + to_read; if (cnt2 > rb->size) { @@ -210,10 +233,12 @@ jack_ringbuffer_read (jack_ringbuffer_t * rb, char *dest, size_t cnt) } memcpy (dest, &(rb->buf[rb->read_ptr]), n1); + JACK_REL_FENCE(); /* ensure pointer increment happens after copy */ rb->read_ptr = (rb->read_ptr + n1) & rb->size_mask; if (n2) { memcpy (dest + n1, &(rb->buf[rb->read_ptr]), n2); + JACK_REL_FENCE(); /* ensure pointer increment happens after copy */ rb->read_ptr = (rb->read_ptr + n2) & rb->size_mask; } @@ -278,6 +303,8 @@ jack_ringbuffer_write (jack_ringbuffer_t * rb, const char *src, size_t cnt) to_write = cnt > free_cnt ? free_cnt : cnt; + /* note: relaxed load here, rb->write_ptr cannot be + * modified from reading thread */ cnt2 = rb->write_ptr + to_write; if (cnt2 > rb->size) { @@ -289,10 +316,12 @@ jack_ringbuffer_write (jack_ringbuffer_t * rb, const char *src, size_t cnt) } memcpy (&(rb->buf[rb->write_ptr]), src, n1); + JACK_REL_FENCE(); /* ensure pointer increment happens after copy */ rb->write_ptr = (rb->write_ptr + n1) & rb->size_mask; if (n2) { memcpy (&(rb->buf[rb->write_ptr]), src + n1, n2); + JACK_REL_FENCE(); /* ensure pointer increment happens after copy */ rb->write_ptr = (rb->write_ptr + n2) & rb->size_mask; } @@ -305,6 +334,7 @@ LIB_EXPORT void jack_ringbuffer_read_advance (jack_ringbuffer_t * rb, size_t cnt) { size_t tmp = (rb->read_ptr + cnt) & rb->size_mask; + JACK_REL_FENCE(); /* ensure pointer increment happens after copy (by user) */ rb->read_ptr = tmp; } @@ -314,6 +344,7 @@ LIB_EXPORT void jack_ringbuffer_write_advance (jack_ringbuffer_t * rb, size_t cnt) { size_t tmp = (rb->write_ptr + cnt) & rb->size_mask; + JACK_REL_FENCE(); /* ensure pointer increment happens after copy (by user) */ rb->write_ptr = tmp; } @@ -328,17 +359,10 @@ jack_ringbuffer_get_read_vector (const jack_ringbuffer_t * rb, { size_t free_cnt; size_t cnt2; - size_t w, r; + size_t r; - w = rb->write_ptr; r = rb->read_ptr; - - if (w > r) { - free_cnt = w - r; - } else { - free_cnt = (w - r + rb->size) & rb->size_mask; - } - + free_cnt = jack_ringbuffer_read_space(rb); cnt2 = r + free_cnt; if (cnt2 > rb->size) { @@ -372,19 +396,10 @@ jack_ringbuffer_get_write_vector (const jack_ringbuffer_t * rb, { size_t free_cnt; size_t cnt2; - size_t w, r; + size_t w; w = rb->write_ptr; - r = rb->read_ptr; - - if (w > r) { - free_cnt = ((r - w + rb->size) & rb->size_mask) - 1; - } else if (w < r) { - free_cnt = (r - w) - 1; - } else { - free_cnt = rb->size - 1; - } - + free_cnt = jack_ringbuffer_write_space(rb); cnt2 = w + free_cnt; if (cnt2 > rb->size) { |