summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErlang/OTP <otp@erlang.org>2018-11-09 15:24:45 +0100
committerErlang/OTP <otp@erlang.org>2018-11-09 15:24:45 +0100
commitc3cf08e8423e1e3fd1b76066b5c8008415ffd555 (patch)
treeb539d5aee92c6d99820046e4d199561cb55b4f0d
parent5fcec3e909f3a2eebef82c974e3cfb351398fd20 (diff)
parentd98da38562ec79360b58eed87eced3a506f1ff6d (diff)
downloaderlang-c3cf08e8423e1e3fd1b76066b5c8008415ffd555.tar.gz
Merge branch 'john/erts/OTP-18.3.4/minusminus_trapping/OTP-15371' into maint-18
* john/erts/OTP-18.3.4/minusminus_trapping/OTP-15371: Optimize operator '--' and yield on large inputs
-rw-r--r--erts/emulator/beam/erl_alloc.types3
-rw-r--r--erts/emulator/beam/erl_bif_lists.c810
-rw-r--r--lib/stdlib/doc/src/lists.xml6
-rw-r--r--lib/stdlib/test/lists_SUITE.erl42
-rw-r--r--system/doc/efficiency_guide/commoncaveats.xml48
5 files changed, 768 insertions, 141 deletions
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 7738531142..b988533d46 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -308,8 +308,11 @@ type RUNQ_BLNS LONG_LIVED SYSTEM run_queue_balancing
type THR_PRGR_IDATA LONG_LIVED SYSTEM thr_prgr_internal_data
type THR_PRGR_DATA LONG_LIVED SYSTEM thr_prgr_data
type T_THR_PRGR_DATA SHORT_LIVED SYSTEM temp_thr_prgr_data
+
+endif
+type LIST_TRAP SHORT_LIVED PROCESSES list_bif_trap_state
+
#
# Types used for special emulators
#
diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c
index 5583dcb371..a64828c86f 100644
--- a/erts/emulator/beam/erl_bif_lists.c
+++ b/erts/emulator/beam/erl_bif_lists.c
@@ -29,12 +29,21 @@
#include "sys.h"
#include "erl_vm.h"
#include "global.h"
-#include "erl_process.h"
-#include "error.h"
#include "bif.h"
+#include "erl_binary.h"
+
+#ifdef MAX
+#undef MAX
+#endif
+#define MAX(x,y) (((x)>(y))?(x):(y))
+#ifdef MIN
+#undef MIN
+#endif
+#define MIN(x,y) (((x)<(y))?(x):(y))
static Eterm keyfind(int Bif, Process* p, Eterm Key, Eterm Pos, Eterm List);
+
static BIF_RETTYPE append(Process* p, Eterm A, Eterm B)
{
Eterm list;
@@ -85,103 +94,730 @@ BIF_RETTYPE append_2(BIF_ALIST_2)
return append(BIF_P, BIF_ARG_1, BIF_ARG_2);
}
-/*
- * erlang:'--'/2
- */
+/* erlang:'--'/2
+ *
+ * Subtracts a list from another (LHS -- RHS), removing the first occurrence of
+ * each element in LHS from RHS. There is no type coercion so the elements must
+ * match exactly.
+ *
+ * The BIF is broken into several stages that can all trap individually, and it
+ * chooses its algorithm based on input size. If either input is small it will
+ * use a linear scan tuned to which side it's on, and if both inputs are large
+ * enough it will convert RHS into a multiset to provide good asymptotic
+ * behavior. */
-#define SMALL_VEC_SIZE 10
-static Eterm subtract(Process* p, Eterm A, Eterm B)
-{
- Eterm list;
- Eterm* hp;
- Uint need;
- Eterm res;
- Eterm small_vec[SMALL_VEC_SIZE]; /* Preallocated memory for small lists */
- Eterm* vec_p;
- Eterm* vp;
- int i;
- int n;
- int m;
-
- if ((n = erts_list_length(A)) < 0) {
- BIF_ERROR(p, BADARG);
+#define SUBTRACT_LHS_THRESHOLD 16
+#define SUBTRACT_RHS_THRESHOLD 16
+
+typedef enum {
+ SUBTRACT_STAGE_START,
+ SUBTRACT_STAGE_LEN_LHS,
+
+ /* Naive linear scan that's efficient when
+ * LEN_LHS <= SUBTRACT_LHS_THRESHOLD. */
+ SUBTRACT_STAGE_NAIVE_LHS,
+
+ SUBTRACT_STAGE_LEN_RHS,
+
+ /* As SUBTRACT_STAGE_NAIVE_LHS but for RHS. */
+ SUBTRACT_STAGE_NAIVE_RHS,
+
+ /* Creates a multiset from RHS for faster lookups before sweeping through
+ * LHS. The set is implemented as a red-black tree and duplicate elements
+ * are handled by a counter on each node. */
+ SUBTRACT_STAGE_SET_BUILD,
+ SUBTRACT_STAGE_SET_FINISH
+} ErtsSubtractCtxStage;
+
+typedef struct subtract_node__ {
+ struct subtract_node__ *parent;
+ struct subtract_node__ *left;
+ struct subtract_node__ *right;
+ int is_red;
+
+ Eterm key;
+ Uint count;
+} subtract_tree_t;
+
+typedef struct {
+ ErtsSubtractCtxStage stage;
+
+ Eterm lhs_original;
+ Eterm rhs_original;
+
+ Uint lhs_remaining;
+ Uint rhs_remaining;
+
+ Eterm iterator;
+
+ Eterm *result_cdr;
+ Eterm result;
+
+ union {
+ Eterm lhs_elements[SUBTRACT_LHS_THRESHOLD];
+ Eterm rhs_elements[SUBTRACT_RHS_THRESHOLD];
+
+ struct {
+ subtract_tree_t *tree;
+
+ /* A memory area for the tree's nodes, saving us the need to have
+ * one allocation per node. */
+ subtract_tree_t *alloc_start;
+ subtract_tree_t *alloc;
+ } rhs_set;
+ } u;
+} ErtsSubtractContext;
+
+#define ERTS_RBT_PREFIX subtract
+#define ERTS_RBT_T subtract_tree_t
+#define ERTS_RBT_KEY_T Eterm
+#define ERTS_RBT_FLAGS_T int
+#define ERTS_RBT_INIT_EMPTY_TNODE(T) \
+ do { \
+ (T)->parent = NULL; \
+ (T)->left = NULL; \
+ (T)->right = NULL; \
+ } while(0)
+#define ERTS_RBT_IS_RED(T) ((T)->is_red)
+#define ERTS_RBT_SET_RED(T) ((T)->is_red = 1)
+#define ERTS_RBT_IS_BLACK(T) (!ERTS_RBT_IS_RED(T))
+#define ERTS_RBT_SET_BLACK(T) ((T)->is_red = 0)
+#define ERTS_RBT_GET_FLAGS(T) ((T)->is_red)
+#define ERTS_RBT_SET_FLAGS(T, F) ((T)->is_red = F)
+#define ERTS_RBT_GET_PARENT(T) ((T)->parent)
+#define ERTS_RBT_SET_PARENT(T, P) ((T)->parent = P)
+#define ERTS_RBT_GET_RIGHT(T) ((T)->right)
+#define ERTS_RBT_SET_RIGHT(T, R) ((T)->right = (R))
+#define ERTS_RBT_GET_LEFT(T) ((T)->left)
+#define ERTS_RBT_SET_LEFT(T, L) ((T)->left = (L))
+#define ERTS_RBT_GET_KEY(T) ((T)->key)
+#define ERTS_RBT_IS_LT(KX, KY) (CMP_TERM(KX, KY) < 0)
+#define ERTS_RBT_IS_EQ(KX, KY) EQ(KX, KY)
+#define ERTS_RBT_WANT_LOOKUP_INSERT
+#define ERTS_RBT_WANT_LOOKUP
+#define ERTS_RBT_WANT_DELETE
+#define ERTS_RBT_UNDEF
+
+#include "erl_rbtree.h"
+
+static int subtract_continue(Process *p, ErtsSubtractContext *context);
+
+static void subtract_ctx_dtor(ErtsSubtractContext *context) {
+ switch (context->stage) {
+ case SUBTRACT_STAGE_SET_BUILD:
+ case SUBTRACT_STAGE_SET_FINISH:
+ erts_free(ERTS_ALC_T_LIST_TRAP, context->u.rhs_set.alloc_start);
+ break;
+ default:
+ break;
}
- if ((m = erts_list_length(B)) < 0) {
- BIF_ERROR(p, BADARG);
+}
+
+static void subtract_ctx_bin_dtor(Binary *context_bin) {
+ ErtsSubtractContext *context = ERTS_MAGIC_BIN_DATA(context_bin);
+ subtract_ctx_dtor(context);
+ return;
+}
+
+static void subtract_ctx_move(ErtsSubtractContext *from,
+ ErtsSubtractContext *to) {
+ int uses_result_cdr = 0;
+
+ to->stage = from->stage;
+
+ to->lhs_original = from->lhs_original;
+ to->rhs_original = from->rhs_original;
+
+ to->lhs_remaining = from->lhs_remaining;
+ to->rhs_remaining = from->rhs_remaining;
+
+ to->iterator = from->iterator;
+ to->result = from->result;
+
+ switch (to->stage) {
+ case SUBTRACT_STAGE_NAIVE_LHS:
+ sys_memcpy(to->u.lhs_elements,
+ from->u.lhs_elements,
+ sizeof(Eterm) * to->lhs_remaining);
+ break;
+ case SUBTRACT_STAGE_NAIVE_RHS:
+ sys_memcpy(to->u.rhs_elements,
+ from->u.rhs_elements,
+ sizeof(Eterm) * to->rhs_remaining);
+
+ uses_result_cdr = 1;
+ break;
+ case SUBTRACT_STAGE_SET_FINISH:
+ uses_result_cdr = 1;
+ /* FALL THROUGH */
+ case SUBTRACT_STAGE_SET_BUILD:
+ to->u.rhs_set.alloc_start = from->u.rhs_set.alloc_start;
+ to->u.rhs_set.alloc = from->u.rhs_set.alloc;
+ to->u.rhs_set.tree = from->u.rhs_set.tree;
+ break;
+ default:
+ break;
}
-
- if (n == 0)
- BIF_RET(NIL);
- if (m == 0)
- BIF_RET(A);
-
- /* allocate element vector */
- if (n <= SMALL_VEC_SIZE)
- vec_p = small_vec;
- else
- vec_p = (Eterm*) erts_alloc(ERTS_ALC_T_TMP, n * sizeof(Eterm));
-
- /* PUT ALL ELEMENTS IN VP */
- vp = vec_p;
- list = A;
- i = n;
- while(i--) {
- Eterm* listp = list_val(list);
- *vp++ = CAR(listp);
- list = CDR(listp);
+
+ if (uses_result_cdr) {
+ if (from->result_cdr == &from->result) {
+ to->result_cdr = &to->result;
+ } else {
+ to->result_cdr = from->result_cdr;
+ }
}
-
- /* UNMARK ALL DELETED CELLS */
- list = B;
- m = 0; /* number of deleted elements */
- while(is_list(list)) {
- Eterm* listp = list_val(list);
- Eterm elem = CAR(listp);
- i = n;
- vp = vec_p;
- while(i--) {
- if (is_value(*vp) && eq(*vp, elem)) {
- *vp = THE_NON_VALUE;
- m++;
- break;
- }
- vp++;
- }
- list = CDR(listp);
+}
+
+static Eterm subtract_create_trap_state(Process *p,
+ ErtsSubtractContext *context) {
+ Binary *state_bin;
+ Eterm *hp;
+
+ state_bin = erts_create_magic_binary(sizeof(ErtsSubtractContext),
+ subtract_ctx_bin_dtor);
+
+ subtract_ctx_move(context, ERTS_MAGIC_BIN_DATA(state_bin));
+
+ hp = HAlloc(p, PROC_BIN_SIZE);
+
+ return erts_mk_magic_binary_term(&hp, &MSO(p), state_bin);
+}
+
+static int subtract_enter_len_lhs(Process *p, ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_LEN_LHS;
+
+ context->iterator = context->lhs_original;
+ context->lhs_remaining = 0;
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_enter_len_rhs(Process *p, ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_LEN_RHS;
+
+ context->iterator = context->rhs_original;
+ context->rhs_remaining = 0;
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_get_length(Process *p, Eterm *iterator_p, Uint *count_p) {
+ static const Sint ELEMENTS_PER_RED = 32;
+
+ Sint budget, count;
+ Eterm iterator;
+
+ budget = ELEMENTS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ iterator = *iterator_p;
+
+#ifdef DEBUG
+ budget = budget / 10 + 1;
+#endif
+
+ for (count = 0; count < budget && is_list(iterator); count++) {
+ iterator = CDR(list_val(iterator));
}
-
- if (m == n) /* All deleted ? */
- res = NIL;
- else if (m == 0) /* None deleted ? */
- res = A;
- else { /* REBUILD LIST */
- res = NIL;
- need = 2*(n - m);
- hp = HAlloc(p, need);
- vp = vec_p + n - 1;
- while(vp >= vec_p) {
- if (is_value(*vp)) {
- res = CONS(hp, *vp, res);
- hp += 2;
- }
- vp--;
- }
+
+ if (!is_list(iterator) && !is_nil(iterator)) {
+ return -1;
+ }
+
+ BUMP_REDS(p, count / ELEMENTS_PER_RED);
+
+ *iterator_p = iterator;
+ *count_p += count;
+
+ if (is_nil(iterator)) {
+ return 1;
}
- if (vec_p != small_vec)
- erts_free(ERTS_ALC_T_TMP, (void *) vec_p);
- BIF_RET(res);
+
+ return 0;
}
-BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2)
-{
- return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2);
+static int subtract_enter_naive_lhs(Process *p, ErtsSubtractContext *context) {
+ Eterm iterator;
+ int i = 0;
+
+ context->stage = SUBTRACT_STAGE_NAIVE_LHS;
+
+ context->iterator = context->rhs_original;
+ context->result = NIL;
+
+ iterator = context->lhs_original;
+
+ while (is_list(iterator)) {
+ const Eterm *cell = list_val(iterator);
+
+ ASSERT(i < SUBTRACT_LHS_THRESHOLD);
+
+ context->u.lhs_elements[i++] = CAR(cell);
+ iterator = CDR(cell);
+ }
+
+ ASSERT(i == context->lhs_remaining);
+
+ return subtract_continue(p, context);
}
-BIF_RETTYPE subtract_2(BIF_ALIST_2)
-{
- return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2);
+static int subtract_naive_lhs(Process *p, ErtsSubtractContext *context) {
+ const Sint CHECKS_PER_RED = 16;
+ Sint checks, budget;
+
+ budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ checks = 0;
+
+ while (checks < budget && is_list(context->iterator)) {
+ const Eterm *cell;
+ Eterm value, next;
+ int found_at;
+
+ cell = list_val(context->iterator);
+
+ value = CAR(cell);
+ next = CDR(cell);
+
+ for (found_at = 0; found_at < context->lhs_remaining; found_at++) {
+ if (EQ(value, context->u.lhs_elements[found_at])) {
+ /* We shift the array one step down as we have to preserve
+ * order.
+ *
+ * Note that we can't exit early as that would suppress errors
+ * in the right-hand side (this runs prior to determining the
+ * length of RHS). */
+
+ context->lhs_remaining--;
+ sys_memmove(&context->u.lhs_elements[found_at],
+ &context->u.lhs_elements[found_at + 1],
+ (context->lhs_remaining - found_at) * sizeof(Eterm));
+ break;
+ }
+ }
+
+ checks += MAX(1, context->lhs_remaining);
+ context->iterator = next;
+ }
+
+ BUMP_REDS(p, MIN(checks, budget) / CHECKS_PER_RED);
+
+ if (is_list(context->iterator)) {
+ return 0;
+ } else if (!is_nil(context->iterator)) {
+ return -1;
+ }
+
+ if (context->lhs_remaining > 0) {
+ Eterm *hp;
+ int i;
+
+ hp = HAlloc(p, context->lhs_remaining * 2);
+
+ for (i = context->lhs_remaining - 1; i >= 0; i--) {
+ Eterm value = context->u.lhs_elements[i];
+
+ context->result = CONS(hp, value, context->result);
+ hp += 2;
+ }
+ }
+
+ ASSERT(context->lhs_remaining > 0 || context->result == NIL);
+
+ return 1;
+}
+
+static int subtract_enter_naive_rhs(Process *p, ErtsSubtractContext *context) {
+ Eterm iterator;
+ int i = 0;
+
+ context->stage = SUBTRACT_STAGE_NAIVE_RHS;
+
+ context->iterator = context->lhs_original;
+ context->result_cdr = &context->result;
+ context->result = NIL;
+
+ iterator = context->rhs_original;
+
+ while (is_list(iterator)) {
+ const Eterm *cell = list_val(iterator);
+
+ ASSERT(i < SUBTRACT_RHS_THRESHOLD);
+
+ context->u.rhs_elements[i++] = CAR(cell);
+ iterator = CDR(cell);
+ }
+
+ ASSERT(i == context->rhs_remaining);
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_naive_rhs(Process *p, ErtsSubtractContext *context) {
+ const Sint CHECKS_PER_RED = 16;
+ Sint checks, budget;
+
+ budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ checks = 0;
+
+#ifdef DEBUG
+ budget = budget / 10 + 1;
+#endif
+
+ while (checks < budget && is_list(context->iterator)) {
+ const Eterm *cell;
+ Eterm value, next;
+ int found_at;
+
+ cell = list_val(context->iterator);
+ value = CAR(cell);
+ next = CDR(cell);
+
+ for (found_at = context->rhs_remaining - 1; found_at >= 0; found_at--) {
+ if (EQ(value, context->u.rhs_elements[found_at])) {
+ break;
+ }
+ }
+
+ if (found_at < 0) {
+ /* Destructively add the value to the result. This is safe
+ * since the GC is disabled and the unfinished term is never
+ * leaked to the outside world. */
+ Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2);
+
+ *context->result_cdr = make_list(hp);
+ context->result_cdr = &CDR(hp);
+
+ CAR(hp) = value;
+ } else if (found_at >= 0) {
+ Eterm swap;
+
+ if (context->rhs_remaining-- == 1) {
+ /* We've run out of items to remove, so the rest of the
+ * result will be equal to the remainder of the input. We know
+ * that LHS is well-formed as any errors would've been reported
+ * during length determination. */
+ *context->result_cdr = next;
+
+ BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED);
+
+ return 1;
+ }
+
+ swap = context->u.rhs_elements[context->rhs_remaining];
+ context->u.rhs_elements[found_at] = swap;
+ }
+
+ checks += context->rhs_remaining;
+ context->iterator = next;
+ context->lhs_remaining--;
+ }
+
+ /* The result only has to be terminated when returning it to the user, but
+ * we're doing it when trapping as well to prevent headaches when
+ * debugging. */
+ *context->result_cdr = NIL;
+
+ BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED);
+
+ if (is_list(context->iterator)) {
+ ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int subtract_enter_set_build(Process *p, ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_SET_BUILD;
+
+ context->u.rhs_set.alloc_start =
+ erts_alloc(ERTS_ALC_T_LIST_TRAP,
+ context->rhs_remaining * sizeof(subtract_tree_t));
+
+ context->u.rhs_set.alloc = context->u.rhs_set.alloc_start;
+ context->u.rhs_set.tree = NULL;
+
+ context->iterator = context->rhs_original;
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_set_build(Process *p, ErtsSubtractContext *context) {
+ const static Sint INSERTIONS_PER_RED = 16;
+ Sint budget, insertions;
+
+ budget = INSERTIONS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ insertions = 0;
+
+#ifdef DEBUG
+ budget = budget / 10 + 1;
+#endif
+
+ while (insertions < budget && is_list(context->iterator)) {
+ subtract_tree_t *existing_node, *new_node;
+ const Eterm *cell;
+ Eterm value, next;
+
+ cell = list_val(context->iterator);
+ value = CAR(cell);
+ next = CDR(cell);
+
+ new_node = context->u.rhs_set.alloc;
+ new_node->key = value;
+ new_node->count = 1;
+
+ existing_node = subtract_rbt_lookup_insert(&context->u.rhs_set.tree,
+ new_node);
+
+ if (existing_node != NULL) {
+ existing_node->count++;
+ } else {
+ context->u.rhs_set.alloc++;
+ }
+
+ context->iterator = next;
+ insertions++;
+ }
+
+ BUMP_REDS(p, insertions / INSERTIONS_PER_RED);
+
+ ASSERT(is_list(context->iterator) || is_nil(context->iterator));
+ ASSERT(context->u.rhs_set.tree != NULL);
+
+ return is_nil(context->iterator);
+}
+
+static int subtract_enter_set_finish(Process *p, ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_SET_FINISH;
+
+ context->result_cdr = &context->result;
+ context->result = NIL;
+
+ context->iterator = context->lhs_original;
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_set_finish(Process *p, ErtsSubtractContext *context) {
+ const Sint CHECKS_PER_RED = 8;
+ Sint checks, budget;
+
+ budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ checks = 0;
+
+#ifdef DEBUG
+ budget = budget / 10 + 1;
+#endif
+
+ while (checks < budget && is_list(context->iterator)) {
+ subtract_tree_t *node;
+ const Eterm *cell;
+ Eterm value, next;
+
+ cell = list_val(context->iterator);
+ value = CAR(cell);
+ next = CDR(cell);
+
+ ASSERT(context->rhs_remaining > 0);
+
+ node = subtract_rbt_lookup(context->u.rhs_set.tree, value);
+
+ if (node == NULL) {
+ Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2);
+
+ *context->result_cdr = make_list(hp);
+ context->result_cdr = &CDR(hp);
+
+ CAR(hp) = value;
+ } else {
+ if (context->rhs_remaining-- == 1) {
+ *context->result_cdr = next;
+
+ BUMP_REDS(p, checks / CHECKS_PER_RED);
+
+ return 1;
+ }
+
+ if (node->count-- == 1) {
+ subtract_rbt_delete(&context->u.rhs_set.tree, node);
+ }
+ }
+
+ context->iterator = next;
+ context->lhs_remaining--;
+ checks++;
+ }
+
+ *context->result_cdr = NIL;
+
+ BUMP_REDS(p, checks / CHECKS_PER_RED);
+
+ if (is_list(context->iterator)) {
+ ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int subtract_continue(Process *p, ErtsSubtractContext *context) {
+ switch (context->stage) {
+ case SUBTRACT_STAGE_START: {
+ return subtract_enter_len_lhs(p, context);
+ }
+
+ case SUBTRACT_STAGE_LEN_LHS: {
+ int res = subtract_get_length(p,
+ &context->iterator,
+ &context->lhs_remaining);
+
+ if (res != 1) {
+ return res;
+ }
+
+ if (context->lhs_remaining <= SUBTRACT_LHS_THRESHOLD) {
+ return subtract_enter_naive_lhs(p, context);
+ }
+
+ return subtract_enter_len_rhs(p, context);
+ }
+
+ case SUBTRACT_STAGE_NAIVE_LHS: {
+ return subtract_naive_lhs(p, context);
+ }
+
+ case SUBTRACT_STAGE_LEN_RHS: {
+ int res = subtract_get_length(p,
+ &context->iterator,
+ &context->rhs_remaining);
+
+ if (res != 1) {
+ return res;
+ }
+
+ /* We've walked through both lists fully now so we no longer need
+ * to check for errors past this point. */
+
+ if (context->rhs_remaining <= SUBTRACT_RHS_THRESHOLD) {
+ return subtract_enter_naive_rhs(p, context);
+ }
+
+ return subtract_enter_set_build(p, context);
+ }
+
+ case SUBTRACT_STAGE_NAIVE_RHS: {
+ return subtract_naive_rhs(p, context);
+ }
+
+ case SUBTRACT_STAGE_SET_BUILD: {
+ int res = subtract_set_build(p, context);
+
+ if (res != 1) {
+ return res;
+ }
+
+ return subtract_enter_set_finish(p, context);
+ }
+
+ case SUBTRACT_STAGE_SET_FINISH: {
+ return subtract_set_finish(p, context);
+ }
+
+ default:
+ erts_exit(ERTS_ABORT_EXIT, "unreachable");
+ }
+}
+
+static int subtract_start(Process *p, Eterm lhs, Eterm rhs,
+ ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_START;
+
+ context->lhs_original = lhs;
+ context->rhs_original = rhs;
+
+ return subtract_continue(p, context);
}
+/* erlang:'--'/2 */
+static Eterm subtract(Export *bif_entry, BIF_ALIST_2) {
+ Eterm lhs = BIF_ARG_1, rhs = BIF_ARG_2;
+
+ if ((is_list(lhs) || is_nil(lhs)) && (is_list(rhs) || is_nil(rhs))) {
+ /* We start with the context on the stack in the hopes that we won't
+ * have to trap. */
+ ErtsSubtractContext context;
+ int res;
+
+ res = subtract_start(BIF_P, lhs, rhs, &context);
+
+ if (res == 0) {
+ Eterm state_mref;
+
+ state_mref = subtract_create_trap_state(BIF_P, &context);
+ erts_set_gc_state(BIF_P, 0);
+
+ BIF_TRAP2(bif_entry, BIF_P, state_mref, NIL);
+ }
+
+ subtract_ctx_dtor(&context);
+
+ if (res < 0) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ BIF_RET(context.result);
+ } else if (is_binary(lhs)) {
+ ErtsSubtractContext *context;
+ void (*dtor)(Binary*);
+ Binary *magic_bin;
+ Eterm *bin = (Eterm*)binary_val(BIF_ARG_1);
+
+ int res;
+
+ if (*bin != HEADER_PROC_BIN) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ magic_bin = ((ProcBin *) binary_val(BIF_ARG_1))->val;
+ dtor = ERTS_MAGIC_BIN_DESTRUCTOR(magic_bin);
+
+ if (dtor != subtract_ctx_bin_dtor) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ ASSERT(BIF_P->flags & F_DISABLE_GC);
+ ASSERT(rhs == NIL);
+
+ context = ERTS_MAGIC_BIN_DATA(magic_bin);
+ res = subtract_continue(BIF_P, context);
+
+ if (res == 0) {
+ BIF_TRAP2(bif_entry, BIF_P, lhs, NIL);
+ }
+
+ erts_set_gc_state(BIF_P, 1);
+
+ if (res < 0) {
+ ERTS_BIF_ERROR_TRAPPED2(BIF_P, BADARG, bif_entry,
+ context->lhs_original,
+ context->rhs_original);
+ }
+
+ BIF_RET(context->result);
+ }
+
+ ASSERT(!(BIF_P->flags & F_DISABLE_GC));
+
+ BIF_ERROR(BIF_P, BADARG);
+}
+
+BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2) {
+ return subtract(bif_export[BIF_ebif_minusminus_2], BIF_P, BIF__ARGS);
+}
+
+BIF_RETTYPE subtract_2(BIF_ALIST_2) {
+ return subtract(bif_export[BIF_subtract_2], BIF_P, BIF__ARGS);
+}
+
+
BIF_RETTYPE lists_member_2(BIF_ALIST_2)
{
Eterm term;
diff --git a/lib/stdlib/doc/src/lists.xml b/lib/stdlib/doc/src/lists.xml
index 89ba5238b5..4a4c170bba 100644
--- a/lib/stdlib/doc/src/lists.xml
+++ b/lib/stdlib/doc/src/lists.xml
@@ -708,12 +708,6 @@ splitwith(Pred, List) ->
> <input>lists:subtract("123212", "212").</input>
"312".</pre>
<p><c>lists:subtract(A, B)</c> is equivalent to <c>A -- B</c>.</p>
- <warning><p>The complexity of <c>lists:subtract(A, B)</c> is proportional
- to <c>length(A)*length(B)</c>, meaning that it will be very slow if
- both <c>A</c> and <c>B</c> are long lists.
- (Using ordered lists and
- <seealso marker="ordsets#subtract/2">ordsets:subtract/2</seealso>
- is a much better choice if both lists are long.)</p></warning>
</desc>
</func>
<func>
diff --git a/lib/stdlib/test/lists_SUITE.erl b/lib/stdlib/test/lists_SUITE.erl
index a0f7fd2744..21b2dd1777 100644
--- a/lib/stdlib/test/lists_SUITE.erl
+++ b/lib/stdlib/test/lists_SUITE.erl
@@ -2692,6 +2692,13 @@ subtract(Config) when is_list(Config) ->
?line {'EXIT',_} = (catch sub([a|b], [])),
?line {'EXIT',_} = (catch sub([a|b], [a])),
+ %% Trapping, both crashing and otherwise.
+ [sub_trapping(N) || N <- lists:seq(0, 18)],
+
+ %% The current implementation chooses which algorithm to use based on
+ %% certain thresholds, and we need proper coverage for all corner cases.
+ [sub_thresholds(N) || N <- lists:seq(0, 32)],
+
ok.
sub_non_matching(A, B) ->
@@ -2701,6 +2708,41 @@ sub(A, B) ->
Res = A -- B,
Res = lists:subtract(A, B).
+sub_trapping(N) ->
+ List = lists:duplicate(N + (1 bsl N), gurka),
+ ImproperList = List ++ crash,
+
+ {'EXIT',_} = (catch sub_trapping_1(ImproperList, [])),
+ {'EXIT',_} = (catch sub_trapping_1(List, ImproperList)),
+
+ List = List -- lists:duplicate(N + (1 bsl N), gaffel),
+ ok = sub_trapping_1(List, []).
+
+sub_trapping_1([], _) -> ok;
+sub_trapping_1(L, R) -> sub_trapping_1(L -- R, [gurka | R]).
+
+sub_thresholds(N) ->
+ %% This needs to be long enough to cause trapping.
+ OtherLen = 1 bsl 18,
+ Other = lists:seq(0, OtherLen - 1),
+
+ Disjoint = lists:seq(-N, -1),
+ Subset = lists:seq(1, N),
+
+ %% LHS is disjoint from RHS, so all elements must be retained.
+ Disjoint = Disjoint -- Other,
+
+ %% LHS is covered by RHS, so all elements must be removed.
+ [] = Subset -- Other,
+
+ %% RHS is disjoint from LHS, so all elements must be retained.
+ Other = Other -- Disjoint,
+
+ %% RHS is covered by LHS, so N elements must be removed.
+ N = OtherLen - length(Other -- Subset),
+
+ ok.
+
%% Test lists:droplast/1
droplast(Config) when is_list(Config) ->
?line [] = lists:droplast([x]),
diff --git a/system/doc/efficiency_guide/commoncaveats.xml b/system/doc/efficiency_guide/commoncaveats.xml
index c1d71e745a..854dd54660 100644
--- a/system/doc/efficiency_guide/commoncaveats.xml
+++ b/system/doc/efficiency_guide/commoncaveats.xml
@@ -169,53 +169,5 @@ multiple_setelement(T0) ->
{Bin1,Bin2} = split_binary(Bin, Num)</code>
</section>
- <section>
- <title>Operator "--"</title>
- <p>The "<c>--</c>" operator has a complexity
- proportional to the product of the length of its operands.
- This means that the operator is very slow if both of its operands
- are long lists:</p>
-
- <p><em>DO NOT</em></p>
- <code type="none"><![CDATA[
- HugeList1 -- HugeList2]]></code>
-
- <p>Instead use the <seealso marker="stdlib:ordsets">ordsets</seealso>
- module in STDLIB:</p>
-
- <p><em>DO</em></p>
- <code type="none">
- HugeSet1 = ordsets:from_list(HugeList1),
- HugeSet2 = ordsets:from_list(HugeList2),
- ordsets:subtract(HugeSet1, HugeSet2)</code>
-
- <p>Obviously, that code does not work if the original order
- of the list is important. If the order of the list must be
- preserved, do as follows:</p>
-
- <p><em>DO</em></p>
- <code type="none"><![CDATA[
- Set = gb_sets:from_list(HugeList2),
- [E || E <- HugeList1, not gb_sets:is_element(E, Set)]]]></code>
-
- <note><p>This code behaves differently from "<c>--</c>"
- if the lists contain duplicate elements (one occurrence
- of an element in HugeList2 removes <em>all</em>
- occurrences in HugeList1.)</p>
- <p>Also, this code compares lists elements using the
- "<c>==</c>" operator, while "<c>--</c>" uses the "<c>=:=</c>" operator.
- If that difference is important, <c>sets</c> can be used instead of
- <c>gb_sets</c>, but <c>sets:from_list/1</c> is much
- slower than <c>gb_sets:from_list/1</c> for long lists.</p></note>
-
- <p>Using the "<c>--</c>" operator to delete an element
- from a list is not a performance problem:</p>
-
- <p><em>OK</em></p>
- <code type="none">
- HugeList1 -- [Element]</code>
-
- </section>
-
</chapter>