summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorK <5229241+Krasjet@users.noreply.github.com>2023-01-28 10:38:05 -0500
committerGitHub <noreply@github.com>2023-01-28 16:38:05 +0100
commit75516ffe6d384223fbb32da704ad17a6af340961 (patch)
tree3552ddd87803432559d199f2379429faf5421c2e
parentf18660aa033d8647a618b1e34ad845fa93e81754 (diff)
downloadjack2-75516ffe6d384223fbb32da704ad17a6af340961.tar.gz
Fix ringbuffer thread safety on ARM. Fix #715 #388 (#886)
* fix ringbuffer thread safety on ARM. fix #715 #388 This patch addresses the thread safety problem of `jack_ringbuffer_t` mentioned in #715 and #388. The overbound read bug caused by this problem is impossible to reproduce on x86 due to its strong memory ordering, but it is a problem on ARM and other weakly ordered architectures. Basically, the main problem is that, on a weakly ordered architecture, it is possible that the pointer increment after `memcpy` becomes visible to the other thread before `memcpy` finishes: memcpy (&(rb->buf[rb->write_ptr]), src, n1); // vvv can be visible to reading thread before memcpy finishes rb->write_ptr = (rb->write_ptr + n1) & rb->size_mask; If this happens, the other thread can read the remaining garbage values in `rb->buf` due to be overwritten by the unfinished `memcpy`. To fix this, an explicit pair of release/acquire memory fences [1] is used to ensure the copy on the other thread *happens after* the `memcpy` finishes so no garbage values can be read. [1]: https://preshing.com/20130922/acquire-and-release-fences/ * remove volatile qualifier on ringbuf r/w pointers The volatile constraints are excess when compiler barriers are present. It generates unnecessary `mov` instructions when pointers aren't going to be updated. * simplify read/write space calculations This optimization is possible because the buffer size is always a power of 2. See [1] for details. [1]: https://github.com/drobilla/zix/pull/1#issuecomment-1212687196 * move acq fences to separate lines
-rw-r--r--common/jack/ringbuffer.h4
-rw-r--r--common/ringbuffer.c85
2 files changed, 52 insertions, 37 deletions
diff --git a/common/jack/ringbuffer.h b/common/jack/ringbuffer.h
index 5204383c..70985720 100644
--- a/common/jack/ringbuffer.h
+++ b/common/jack/ringbuffer.h
@@ -50,8 +50,8 @@ jack_ringbuffer_data_t ;
typedef struct {
char *buf;
- volatile size_t write_ptr;
- volatile size_t read_ptr;
+ size_t write_ptr;
+ size_t read_ptr;
size_t size;
size_t size_mask;
int mlocked;
diff --git a/common/ringbuffer.c b/common/ringbuffer.c
index 9de844f6..a44fc787 100644
--- a/common/ringbuffer.c
+++ b/common/ringbuffer.c
@@ -27,6 +27,35 @@
#endif /* USE_MLOCK */
#include "JackCompilerDeps.h"
+/* Portable definitions for acquire and release fences */
+#if defined(_MSC_VER)
+ #if defined(_M_AMD64) || defined(_M_IX86) || defined(_M_X64)
+ /* Only compiler fences are needed on x86. In fact, GCC
+ * will generate no instructions for acq/rel fences on
+ * x86 */
+ #include <intrin.h>
+ #define JACK_ACQ_FENCE() _ReadBarrier()
+ #define JACK_REL_FENCE() _WriteBarrier()
+ #else
+ /* Use full memory fence for non-x86 systems with msvc */
+ #include <windows.h>
+ #define JACK_ACQ_FENCE() MemoryBarrier()
+ #define JACK_REL_FENCE() MemoryBarrier()
+ #endif
+#elif defined(__GNUC__)
+ #ifdef __ATOMIC_ACQUIRE
+ #define JACK_ACQ_FENCE() __atomic_thread_fence(__ATOMIC_ACQUIRE)
+ #define JACK_REL_FENCE() __atomic_thread_fence(__ATOMIC_RELEASE)
+ #else
+ /* Fallback to the legacy __sync builtin (full memory fence) */
+ #define JACK_ACQ_FENCE() __sync_synchronize()
+ #define JACK_REL_FENCE() __sync_synchronize()
+ #endif
+#else
+ #define JACK_ACQ_FENCE()
+ #define JACK_REL_FENCE()
+#endif
+
typedef struct {
char *buf;
size_t len;
@@ -35,8 +64,8 @@ jack_ringbuffer_data_t ;
typedef struct {
char *buf;
- volatile size_t write_ptr;
- volatile size_t read_ptr;
+ size_t write_ptr;
+ size_t read_ptr;
size_t size;
size_t size_mask;
int mlocked;
@@ -126,7 +155,7 @@ jack_ringbuffer_reset (jack_ringbuffer_t * rb)
{
rb->read_ptr = 0;
rb->write_ptr = 0;
- memset(rb->buf, 0, rb->size);
+ memset(rb->buf, 0, rb->size);
}
/* Reset the read and write pointers to zero. This is not thread
@@ -152,13 +181,10 @@ jack_ringbuffer_read_space (const jack_ringbuffer_t * rb)
size_t w, r;
w = rb->write_ptr;
+ JACK_ACQ_FENCE();
r = rb->read_ptr;
- if (w > r) {
- return w - r;
- } else {
- return (w - r + rb->size) & rb->size_mask;
- }
+ return (w - r) & rb->size_mask;
}
/* Return the number of bytes available for writing. This is the
@@ -172,14 +198,9 @@ jack_ringbuffer_write_space (const jack_ringbuffer_t * rb)
w = rb->write_ptr;
r = rb->read_ptr;
+ JACK_ACQ_FENCE();
- if (w > r) {
- return ((r - w + rb->size) & rb->size_mask) - 1;
- } else if (w < r) {
- return (r - w) - 1;
- } else {
- return rb->size - 1;
- }
+ return (r - w - 1) & rb->size_mask;
}
/* The copying data reader. Copy at most `cnt' bytes from `rb' to
@@ -199,6 +220,8 @@ jack_ringbuffer_read (jack_ringbuffer_t * rb, char *dest, size_t cnt)
to_read = cnt > free_cnt ? free_cnt : cnt;
+ /* note: relaxed load here, rb->read_ptr cannot be
+ * modified from writing thread */
cnt2 = rb->read_ptr + to_read;
if (cnt2 > rb->size) {
@@ -210,10 +233,12 @@ jack_ringbuffer_read (jack_ringbuffer_t * rb, char *dest, size_t cnt)
}
memcpy (dest, &(rb->buf[rb->read_ptr]), n1);
+ JACK_REL_FENCE(); /* ensure pointer increment happens after copy */
rb->read_ptr = (rb->read_ptr + n1) & rb->size_mask;
if (n2) {
memcpy (dest + n1, &(rb->buf[rb->read_ptr]), n2);
+ JACK_REL_FENCE(); /* ensure pointer increment happens after copy */
rb->read_ptr = (rb->read_ptr + n2) & rb->size_mask;
}
@@ -278,6 +303,8 @@ jack_ringbuffer_write (jack_ringbuffer_t * rb, const char *src, size_t cnt)
to_write = cnt > free_cnt ? free_cnt : cnt;
+ /* note: relaxed load here, rb->write_ptr cannot be
+ * modified from reading thread */
cnt2 = rb->write_ptr + to_write;
if (cnt2 > rb->size) {
@@ -289,10 +316,12 @@ jack_ringbuffer_write (jack_ringbuffer_t * rb, const char *src, size_t cnt)
}
memcpy (&(rb->buf[rb->write_ptr]), src, n1);
+ JACK_REL_FENCE(); /* ensure pointer increment happens after copy */
rb->write_ptr = (rb->write_ptr + n1) & rb->size_mask;
if (n2) {
memcpy (&(rb->buf[rb->write_ptr]), src + n1, n2);
+ JACK_REL_FENCE(); /* ensure pointer increment happens after copy */
rb->write_ptr = (rb->write_ptr + n2) & rb->size_mask;
}
@@ -305,6 +334,7 @@ LIB_EXPORT void
jack_ringbuffer_read_advance (jack_ringbuffer_t * rb, size_t cnt)
{
size_t tmp = (rb->read_ptr + cnt) & rb->size_mask;
+ JACK_REL_FENCE(); /* ensure pointer increment happens after copy (by user) */
rb->read_ptr = tmp;
}
@@ -314,6 +344,7 @@ LIB_EXPORT void
jack_ringbuffer_write_advance (jack_ringbuffer_t * rb, size_t cnt)
{
size_t tmp = (rb->write_ptr + cnt) & rb->size_mask;
+ JACK_REL_FENCE(); /* ensure pointer increment happens after copy (by user) */
rb->write_ptr = tmp;
}
@@ -328,17 +359,10 @@ jack_ringbuffer_get_read_vector (const jack_ringbuffer_t * rb,
{
size_t free_cnt;
size_t cnt2;
- size_t w, r;
+ size_t r;
- w = rb->write_ptr;
r = rb->read_ptr;
-
- if (w > r) {
- free_cnt = w - r;
- } else {
- free_cnt = (w - r + rb->size) & rb->size_mask;
- }
-
+ free_cnt = jack_ringbuffer_read_space(rb);
cnt2 = r + free_cnt;
if (cnt2 > rb->size) {
@@ -372,19 +396,10 @@ jack_ringbuffer_get_write_vector (const jack_ringbuffer_t * rb,
{
size_t free_cnt;
size_t cnt2;
- size_t w, r;
+ size_t w;
w = rb->write_ptr;
- r = rb->read_ptr;
-
- if (w > r) {
- free_cnt = ((r - w + rb->size) & rb->size_mask) - 1;
- } else if (w < r) {
- free_cnt = (r - w) - 1;
- } else {
- free_cnt = rb->size - 1;
- }
-
+ free_cnt = jack_ringbuffer_write_space(rb);
cnt2 = w + free_cnt;
if (cnt2 > rb->size) {