summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2019-06-05 10:44:58 +1000
committerLuke Chen <luke.chen@mongodb.com>2019-06-05 10:44:58 +1000
commitc939010fe98ba0a8affe7d0d30d4e8d57e68242b (patch)
treed26869393e4eb400fc7eb2b5bb3194014ba71956 /src
parented173f027ae8940bb93c57c580993192dc534fb0 (diff)
downloadmongo-c939010fe98ba0a8affe7d0d30d4e8d57e68242b.tar.gz
Import wiredtiger: 9b85ad89688bd72b8a649d844a7e458832955764 from branch mongodb-4.2
ref: 57bd75fee9..9b85ad8968 for: 4.3.1 WT-4190 Decrease shutdown time by doing multi-threaded cache flush WT-4608 Cache stuck with clean pages for LSM data format testing WT-4775 Make the "bad file descriptor" test resilient against crashing WT-4793 Extend test/checkpoint to use timestamps and more WT-4825 Add warning in operation tracking if the open file limit is too small WT-4828 Fix type-related exception in operation tracking after conversion to Python3
Diffstat (limited to 'src')
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c7
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c24
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/checkpointer.c70
-rwxr-xr-xsrc/third_party/wiredtiger/test/checkpoint/smoke.sh9
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c56
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h5
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/workers.c135
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/suite_subprocess.py55
-rwxr-xr-x[-rw-r--r--]src/third_party/wiredtiger/test/suite/test_bug018.py47
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/wttest.py3
-rwxr-xr-xsrc/third_party/wiredtiger/tools/optrack/find-latency-spikes.py21
12 files changed, 369 insertions, 65 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 43ede9082a0..22114815861 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "57bd75fee93b685c133281100719f886d0184589",
+ "commit": "9b85ad89688bd72b8a649d844a7e458832955764",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-4.2"
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index 6d414edaa42..7a854ee596f 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -1064,6 +1064,13 @@ __conn_close(WT_CONNECTION *wt_conn, const char *config)
F_SET(conn, WT_CONN_CLOSING_TIMESTAMP);
}
+ /*
+ * Ramp the eviction dirty target down to encourage eviction threads to
+ * clear dirty content out of cache.
+ */
+ conn->cache->eviction_dirty_trigger = 1.0;
+ conn->cache->eviction_dirty_target = 0.1;
+
err: /*
* Rollback all running transactions.
* We do this as a separate pass because an active transaction in one
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 836fcd8cb59..7d696a20831 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -2639,6 +2639,23 @@ __verbose_dump_cache_single(WT_SESSION_IMPL *session,
leaf_bytes = leaf_bytes_max = leaf_dirty_bytes = 0;
leaf_dirty_bytes_max = leaf_dirty_pages = leaf_pages = 0;
+ dhandle = session->dhandle;
+ btree = dhandle->handle;
+ WT_RET(__wt_msg(session, "%s(%s%s)%s%s:",
+ dhandle->name, dhandle->checkpoint != NULL ? "checkpoint=" : "",
+ dhandle->checkpoint != NULL ? dhandle->checkpoint : "<live>",
+ btree->evict_disabled != 0 ? " eviction disabled" : "",
+ btree->evict_disabled_open ? " at open" : ""));
+
+ /*
+ * We cannot walk the tree of a dhandle held exclusively because
+ * the owning thread could be manipulating it in a way that causes
+ * us to dump core. So print out that we visited and skipped it.
+ */
+ if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE))
+ return (__wt_msg(session,
+ " Opened exclusively. Cannot walk tree, skipping."));
+
next_walk = NULL;
while (__wt_tree_walk(session, &next_walk,
WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 &&
@@ -2669,13 +2686,6 @@ __verbose_dump_cache_single(WT_SESSION_IMPL *session,
}
}
- dhandle = session->dhandle;
- btree = dhandle->handle;
- WT_RET(__wt_msg(session, "%s(%s%s)%s%s:",
- dhandle->name, dhandle->checkpoint != NULL ? "checkpoint=" : "",
- dhandle->checkpoint != NULL ? dhandle->checkpoint : "<live>",
- btree->evict_disabled != 0 ? "eviction disabled" : "",
- btree->evict_disabled_open ? " at open" : ""));
if (intl_pages == 0)
WT_RET(__wt_msg(session, "internal: 0 pages"));
else
diff --git a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
index 493cdaf5114..311c21eff5e 100644
--- a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
+++ b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
@@ -29,6 +29,7 @@
#include "test_checkpoint.h"
static WT_THREAD_RET checkpointer(void *);
+static WT_THREAD_RET clock_thread(void *);
static int compare_cursors(
WT_CURSOR *, const char *, WT_CURSOR *, const char *);
static int diagnose_key_error(WT_CURSOR *, int, WT_CURSOR *, int);
@@ -44,6 +45,11 @@ start_checkpoints(void)
{
testutil_check(__wt_thread_create(NULL,
&g.checkpoint_thread, checkpointer, NULL));
+ if (g.use_timestamps) {
+ testutil_check(__wt_rwlock_init(NULL, &g.clock_lock));
+ testutil_check(__wt_thread_create(NULL,
+ &g.clock_thread, clock_thread, NULL));
+ }
}
/*
@@ -54,6 +60,57 @@ void
end_checkpoints(void)
{
testutil_check(__wt_thread_join(NULL, &g.checkpoint_thread));
+ if (g.use_timestamps) {
+ testutil_check(__wt_thread_join(NULL, &g.clock_thread));
+ __wt_rwlock_destroy(NULL, &g.clock_lock);
+ }
+}
+
+/*
+ * clock_thread --
+ * Clock thread: ticks up timestamps.
+ */
+static WT_THREAD_RET
+clock_thread(void *arg)
+{
+ WT_RAND_STATE rnd;
+ WT_SESSION *wt_session;
+ WT_SESSION_IMPL *session;
+ uint64_t delay;
+ char buf[128];
+
+ WT_UNUSED(arg);
+
+ __wt_random_init(&rnd);
+ testutil_check(g.conn->open_session(g.conn, NULL, NULL, &wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ g.ts = 0;
+ while (g.running) {
+ __wt_writelock(session, &g.clock_lock);
+ ++g.ts;
+ testutil_check(__wt_snprintf(
+ buf, sizeof(buf),
+ "oldest_timestamp=%x,stable_timestamp=%x", g.ts, g.ts));
+ testutil_check(g.conn->set_timestamp(g.conn, buf));
+ if (g.ts % 997 == 0) {
+ /*
+ * Random value between 6 and 10 seconds.
+ */
+ delay = __wt_random(&rnd) % 5;
+ __wt_sleep(delay + 6, 0);
+ }
+ __wt_writeunlock(session, &g.clock_lock);
+ /*
+ * Random value between 5000 and 10000.
+ */
+ delay = __wt_random(&rnd) % 5001;
+ __wt_sleep(0, delay + 5000);
+ }
+
+ testutil_check(wt_session->close(wt_session, NULL));
+
+ return (WT_THREAD_RET_VALUE);
}
/*
@@ -82,7 +139,9 @@ checkpointer(void *arg)
static int
real_checkpointer(void)
{
+ WT_RAND_STATE rnd;
WT_SESSION *session;
+ uint64_t delay;
int ret;
char buf[128], *checkpoint_config;
@@ -90,6 +149,7 @@ real_checkpointer(void)
return (log_print_err(
"Checkpoint thread started stopped\n", EINVAL, 1));
+ __wt_random_init(&rnd);
while (g.ntables > g.ntables_created)
__wt_yield();
@@ -115,6 +175,7 @@ real_checkpointer(void)
session, checkpoint_config)) != 0)
return (log_print_err("session.checkpoint", ret, 1));
printf("Finished a checkpoint\n");
+ fflush(stdout);
if (!g.running)
goto done;
@@ -123,6 +184,14 @@ real_checkpointer(void)
if ((ret = verify_consistency(session, true)) != 0)
return (log_print_err(
"verify_consistency (offline)", ret, 1));
+
+ /*
+ * Random value between 4 and 8 seconds.
+ */
+ if (g.sweep_stress) {
+ delay = __wt_random(&rnd) % 5;
+ __wt_sleep(delay + 4, 0);
+ }
}
done: if ((ret = session->close(session, NULL)) != 0)
@@ -234,6 +303,7 @@ verify_consistency(WT_SESSION *session, bool use_checkpoint)
printf("Finished verifying a %s with %d tables and %" PRIu64
" keys\n", use_checkpoint ? "checkpoint" : "snapshot",
g.ntables, key_count);
+ fflush(stdout);
err: for (i = 0; i < g.ntables; i++) {
if (cursors[i] != NULL &&
diff --git a/src/third_party/wiredtiger/test/checkpoint/smoke.sh b/src/third_party/wiredtiger/test/checkpoint/smoke.sh
index 8db6fc1ebc4..dba60babb92 100755
--- a/src/third_party/wiredtiger/test/checkpoint/smoke.sh
+++ b/src/third_party/wiredtiger/test/checkpoint/smoke.sh
@@ -23,3 +23,12 @@ $TEST_WRAPPER ./t -T 6 -t r
echo "checkpoint: 6 row-store tables, named checkpoint"
$TEST_WRAPPER ./t -c 'TeSt' -T 6 -t r
+
+echo "checkpoint: row-store tables, stress LAS. Sweep and timestamps"
+$TEST_WRAPPER ./t -t r -W 3 -r 2 -s -x -n 100000 -k 100000 -C cache_size=100MB
+
+echo "checkpoint: 3 mixed tables, with sweep"
+$TEST_WRAPPER ./t -T 3 -t m -W 3 -r 2 -s -n 100000 -k 100000
+
+echo "checkpoint: 3 mixed tables, with timestamps"
+$TEST_WRAPPER ./t -T 3 -t m -W 3 -r 2 -x -n 100000 -k 100000
diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
index f73ada611fe..461b6334b27 100644
--- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
+++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
@@ -34,7 +34,7 @@ static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
static void onint(int)
WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
-static void cleanup(void);
+static void cleanup(bool);
static int usage(void);
static int wt_connect(const char *);
static int wt_shutdown(void);
@@ -62,10 +62,11 @@ main(int argc, char *argv[])
g.nops = 100000;
g.ntables = 3;
g.nworkers = 1;
+ g.sweep_stress = g.use_timestamps = false;
runs = 1;
while ((ch = __wt_getopt(
- progname, argc, argv, "C:c:h:k:l:n:r:T:t:W:")) != EOF)
+ progname, argc, argv, "C:c:h:k:l:n:r:sT:t:W:x")) != EOF)
switch (ch) {
case 'c':
g.checkpoint_name = __wt_optarg;
@@ -92,6 +93,9 @@ main(int argc, char *argv[])
case 'r': /* runs */
runs = atoi(__wt_optarg);
break;
+ case 's':
+ g.sweep_stress = true;
+ break;
case 't':
switch (__wt_optarg[0]) {
case 'c':
@@ -116,6 +120,9 @@ main(int argc, char *argv[])
case 'W':
g.nworkers = atoi(__wt_optarg);
break;
+ case 'x':
+ g.use_timestamps = true;
+ break;
default:
return (usage());
}
@@ -131,11 +138,11 @@ main(int argc, char *argv[])
printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid());
for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) {
+ cleanup(cnt == 1); /* Clean up previous runs */
+
printf(" %d: %d workers, %d tables\n",
cnt, g.nworkers, g.ntables);
- cleanup(); /* Clean up previous runs */
-
/* Setup a fresh set of cookies in the global array. */
if ((g.cookies = calloc(
(size_t)(g.ntables), sizeof(COOKIE))) == NULL) {
@@ -189,15 +196,30 @@ wt_connect(const char *config_open)
NULL /* Close handler. */
};
int ret;
- char config[128];
-
- testutil_make_work_dir(g.home);
-
- testutil_check(__wt_snprintf(config, sizeof(config),
- "create,statistics=(fast),error_prefix=\"%s\",cache_size=1GB%s%s",
- progname,
- config_open == NULL ? "" : ",",
- config_open == NULL ? "" : config_open));
+ char config[512];
+
+ /*
+ * If we want to stress sweep, we have a lot of additional
+ * configuration settings to set.
+ */
+ if (g.sweep_stress)
+ testutil_check(__wt_snprintf(config, sizeof(config),
+ "create,cache_cursors=false,statistics=(fast)," \
+ "statistics_log=(json,wait=1),error_prefix=\"%s\"," \
+ "file_manager=(close_handle_minimum=1,close_idle_time=1,"\
+ "close_scan_interval=1),log=(enabled),cache_size=1GB,"\
+ "timing_stress_for_test=(aggressive_sweep)%s%s",
+ progname,
+ config_open == NULL ? "" : ",",
+ config_open == NULL ? "" : config_open));
+ else
+ testutil_check(__wt_snprintf(config, sizeof(config),
+ "create,cache_cursors=false,statistics=(fast)," \
+ "statistics_log=(json,wait=1),error_prefix=\"%s\"" \
+ "%s%s",
+ progname,
+ config_open == NULL ? "" : ",",
+ config_open == NULL ? "" : config_open));
if ((ret = wiredtiger_open(
g.home, &event_handler, config, &g.conn)) != 0)
@@ -230,12 +252,14 @@ wt_shutdown(void)
* Clean up from previous runs.
*/
static void
-cleanup(void)
+cleanup(bool remove_dir)
{
g.running = 0;
g.ntables_created = 0;
+ g.ts = 0;
- testutil_clean_work_dir(g.home);
+ if (remove_dir)
+ testutil_make_work_dir(g.home);
}
static int
@@ -271,7 +295,7 @@ onint(int signo)
{
WT_UNUSED(signo);
- cleanup();
+ cleanup(false);
fprintf(stderr, "\n");
exit(EXIT_FAILURE);
diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
index 805864344cb..b579f5cf9b9 100644
--- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
+++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
@@ -63,8 +63,13 @@ typedef struct {
int ntables_created; /* Number tables opened */
int running; /* Whether to stop */
int status; /* Exit status */
+ bool sweep_stress; /* Sweep stress test */
+ u_int ts; /* Current timestamp */
+ bool use_timestamps; /* Use txn timestamps */
COOKIE *cookies; /* Per-thread info */
+ WT_RWLOCK clock_lock; /* Clock synchronization */
wt_thread_t checkpoint_thread; /* Checkpoint thread */
+ wt_thread_t clock_thread; /* Clock thread */
} GLOBAL;
extern GLOBAL g;
diff --git a/src/third_party/wiredtiger/test/checkpoint/workers.c b/src/third_party/wiredtiger/test/checkpoint/workers.c
index 33836c67110..e9966cec145 100644
--- a/src/third_party/wiredtiger/test/checkpoint/workers.c
+++ b/src/third_party/wiredtiger/test/checkpoint/workers.c
@@ -39,12 +39,23 @@ static int
create_table(WT_SESSION *session, COOKIE *cookie)
{
int ret;
- char config[128];
+ char config[256];
- testutil_check(__wt_snprintf(config, sizeof(config),
- "key_format=%s,value_format=S,%s",
- cookie->type == COL ? "r" : "q",
- cookie->type == LSM ? ",type=lsm" : ""));
+ /*
+ * If we're using timestamps, turn off logging for the table.
+ */
+ if (g.use_timestamps)
+ testutil_check(__wt_snprintf(config, sizeof(config),
+ "key_format=%s,value_format=S,allocation_size=512," \
+ "leaf_page_max=1KB,internal_page_max=1KB," \
+ "memory_page_max=64KB,log=(enabled=false),%s",
+ cookie->type == COL ? "r" : "q",
+ cookie->type == LSM ? ",type=lsm" : ""));
+ else
+ testutil_check(__wt_snprintf(config, sizeof(config),
+ "key_format=%s,value_format=S,%s",
+ cookie->type == COL ? "r" : "q",
+ cookie->type == LSM ? ",type=lsm" : ""));
if ((ret = session->create(session, cookie->uri, config)) != 0)
if (ret != EEXIST)
@@ -94,6 +105,8 @@ start_workers(table_type type)
goto err;
}
+ testutil_check(session->close(session, NULL));
+
(void)gettimeofday(&start, NULL);
/* Create threads. */
@@ -122,20 +135,55 @@ err: free(tids);
static inline int
worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val)
{
- int ret;
+ int cmp, ret;
char valuebuf[64];
cursor->set_key(cursor, keyno);
- /* Roughly 5% removes. */
- if (new_val % 19 == 0) {
- if ((ret = cursor->remove(cursor)) != 0) {
+ /* Roughly half inserts, then balanced inserts / range removes. */
+ if (new_val > g.nops / 2 && new_val % 39 == 0) {
+ if ((ret = cursor->search_near(cursor, &cmp)) != 0) {
+ if (ret == WT_NOTFOUND)
+ return (0);
+ if (ret == WT_ROLLBACK)
+ return (WT_ROLLBACK);
+ return (log_print_err("cursor.search_near", ret, 1));
+ }
+ if (cmp < 0) {
+ if ((ret = cursor->next(cursor)) != 0) {
+ if (ret == WT_NOTFOUND)
+ return (0);
+ if (ret == WT_ROLLBACK)
+ return (WT_ROLLBACK);
+ return (log_print_err("cursor.next", ret, 1));
+ }
+ }
+ for (int i = 10; i > 0; i--) {
+ if ((ret = cursor->remove(cursor)) != 0) {
+ if (ret == WT_ROLLBACK)
+ return (WT_ROLLBACK);
+ return (log_print_err("cursor.remove", ret, 1));
+ }
+ if ((ret = cursor->next(cursor)) != 0) {
+ if (ret == WT_NOTFOUND)
+ return (0);
+ if (ret == WT_ROLLBACK)
+ return (WT_ROLLBACK);
+ return (log_print_err("cursor.next", ret, 1));
+ }
+ }
+ if (g.sweep_stress)
+ testutil_check(cursor->reset(cursor));
+ } else if (new_val % 39 < 10) {
+ if ((ret = cursor->search(cursor)) != 0 && ret != WT_NOTFOUND) {
if (ret == WT_ROLLBACK)
return (WT_ROLLBACK);
- return (log_print_err("cursor.remove", ret, 1));
+ return (log_print_err("cursor.search", ret, 1));
}
+ if (g.sweep_stress)
+ testutil_check(cursor->reset(cursor));
} else {
testutil_check(__wt_snprintf(
- valuebuf, sizeof(valuebuf), "%037u", new_val));
+ valuebuf, sizeof(valuebuf), "%052u", new_val));
cursor->set_value(cursor, valuebuf);
if ((ret = cursor->insert(cursor)) != 0) {
if (ret == WT_ROLLBACK)
@@ -143,6 +191,7 @@ worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val)
return (log_print_err("cursor.insert", ret, 1));
}
}
+
return (0);
}
@@ -177,11 +226,12 @@ real_worker(void)
WT_SESSION *session;
u_int i, keyno;
int j, ret, t_ret;
+ const char *begin_cfg;
+ char buf[128];
+ bool has_cursors;
ret = t_ret = 0;
- __wt_random_init(&rnd);
-
if ((cursors = calloc(
(size_t)(g.ntables), sizeof(WT_CURSOR *))) == NULL)
return (log_print_err("malloc", ENOMEM, 1));
@@ -192,41 +242,80 @@ real_worker(void)
goto err;
}
+ __wt_random_init_seed((WT_SESSION_IMPL *)session, &rnd);
+
for (j = 0; j < g.ntables; j++)
if ((ret = session->open_cursor(session,
g.cookies[j].uri, NULL, NULL, &cursors[j])) != 0) {
(void)log_print_err("session.open_cursor", ret, 1);
goto err;
}
+ has_cursors = true;
+
+ if (g.use_timestamps)
+ begin_cfg = "read_timestamp=1,roundup_timestamps=(read=true)";
+ else
+ begin_cfg = NULL;
for (i = 0; i < g.nops && g.running; ++i, __wt_yield()) {
- if ((ret = session->begin_transaction(session, NULL)) != 0) {
+ if ((ret =
+ session->begin_transaction(session, begin_cfg)) != 0) {
(void)log_print_err(
"real_worker:begin_transaction", ret, 1);
goto err;
}
keyno = __wt_random(&rnd) % g.nkeys + 1;
- for (j = 0; j < g.ntables; j++) {
- if ((ret = worker_op(cursors[j], keyno, i)) != 0)
- break;
+ if (g.use_timestamps && i % 23 == 0) {
+ if (__wt_try_readlock(
+ (WT_SESSION_IMPL *)session, &g.clock_lock) != 0) {
+ testutil_check(
+ session->commit_transaction(session, NULL));
+ for (j = 0; j < g.ntables; j++)
+ testutil_check(
+ cursors[j]->close(cursors[j]));
+ has_cursors = false;
+ __wt_readlock(
+ (WT_SESSION_IMPL *)session, &g.clock_lock);
+ testutil_check(session->begin_transaction(
+ session, begin_cfg));
+ }
+ testutil_check(__wt_snprintf(
+ buf, sizeof(buf), "commit_timestamp=%x", g.ts + 1));
+ testutil_check(
+ session->timestamp_transaction(session, buf));
+ __wt_readunlock(
+ (WT_SESSION_IMPL *)session, &g.clock_lock);
+
+ for (j = 0; !has_cursors && j < g.ntables; j++)
+ if ((ret = session->open_cursor(
+ session, g.cookies[j].uri,
+ NULL, NULL, &cursors[j])) != 0) {
+ (void)log_print_err(
+ "session.open_cursor", ret, 1);
+ goto err;
+ }
+ has_cursors = true;
+ }
+ for (j = 0; ret == 0 && j < g.ntables; j++) {
+ ret = worker_op(cursors[j], keyno, i);
}
- if (ret == 0) {
+ if (ret != 0 && ret != WT_ROLLBACK) {
+ (void)log_print_err("worker op failed", ret, 1);
+ goto err;
+ } else if (ret == 0 && __wt_random(&rnd) % 7 != 0) {
if ((ret = session->commit_transaction(
session, NULL)) != 0) {
(void)log_print_err(
"real_worker:commit_transaction", ret, 1);
goto err;
}
- } else if (ret == WT_ROLLBACK) {
+ } else {
if ((ret = session->rollback_transaction(
session, NULL)) != 0) {
(void)log_print_err(
"real_worker:rollback_transaction", ret, 1);
goto err;
- }
- } else {
- (void)log_print_err("worker op failed", ret, 1);
- goto err;
+ }
}
}
diff --git a/src/third_party/wiredtiger/test/suite/suite_subprocess.py b/src/third_party/wiredtiger/test/suite/suite_subprocess.py
index d04a281807a..95a599090f5 100755
--- a/src/third_party/wiredtiger/test/suite/suite_subprocess.py
+++ b/src/third_party/wiredtiger/test/suite/suite_subprocess.py
@@ -151,7 +151,8 @@ class suite_subprocess:
return envvar + '=' + str(os.environ.get(envvar)) + '\n'
def show_outputs(self, procargs, message, filenames):
- out = 'ERROR: wt command ' + message + ': ' + str(procargs) + '\n' + \
+ out = message + ': ' + \
+ str(procargs) + '\n' + \
self.verbose_env('PATH') + \
self.verbose_env('LD_LIBRARY_PATH') + \
self.verbose_env('DYLD_LIBRARY_PATH') + \
@@ -169,6 +170,48 @@ class suite_subprocess:
out = sepline + filename + '\n' + sepline + contents
WiredTigerTestCase.prout(out)
+ # Run a method as a subprocess using the run.py machinery.
+ # Return the process exit status and the the WiredTiger
+ # home directory used by the subprocess.
+ def run_subprocess_function(self, directory, funcname):
+ testparts = funcname.split('.')
+ if len(testparts) != 3:
+ raise ValueError('bad function name "' + funcname +
+ '", should be three part dotted name')
+ topdir = os.path.dirname(self.buildDirectory())
+ runscript = os.path.join(topdir, 'test', 'suite', 'run.py')
+ procargs = [ sys.executable, runscript, '-p', '--dir', directory,
+ funcname]
+
+ # scenario_number is only set if we are running in a scenario
+ try:
+ scennum = self.scenario_number
+ procargs.append('-s')
+ procargs.append(str(scennum))
+ except:
+ scennum = 0
+
+ returncode = -1
+ os.makedirs(directory)
+
+ # We cannot put the output/error files in the subdirectory, as
+ # that will be cleared by the run.py script.
+ with open("subprocess.err", "w") as wterr:
+ with open("subprocess.out", "w") as wtout:
+ returncode = subprocess.call(
+ procargs, stdout=wtout, stderr=wterr)
+ if returncode != 0:
+ # This is not necessarily an error, the primary reason to
+ # run in a subprocess is that it may crash.
+ self.show_outputs(procargs,
+ "Warning: run_subprocess_function " + funcname + \
+ " returned error code " + str(returncode),
+ [ "subprocess.out", "subprocess.err" ])
+
+ new_home_dir = os.path.join(directory,
+ testparts[1] + '.' + str(scennum))
+ return [ returncode, new_home_dir ]
+
# Run the wt utility.
def runWt(self, args, infilename=None,
outfilename=None, errfilename=None, closeconn=True,
@@ -230,15 +273,17 @@ class suite_subprocess:
procargs, stdout=wtout, stderr=wterr)
if failure:
if returncode == 0:
- self.show_outputs(procargs, "expected failure, got success",
- [wtoutname, wterrname])
+ self.show_outputs(procargs,
+ "ERROR: wt command expected failure, got success",
+ [wtoutname, wterrname])
self.assertNotEqual(returncode, 0,
'expected failure: "' + \
str(procargs) + '": exited ' + str(returncode))
else:
if returncode != 0:
- self.show_outputs(procargs, "expected success, got failure",
- [wtoutname, wterrname])
+ self.show_outputs(procargs,
+ "ERROR: wt command expected success, got failure",
+ [wtoutname, wterrname])
self.assertEqual(returncode, 0,
'expected success: "' + \
str(procargs) + '": exited ' + str(returncode))
diff --git a/src/third_party/wiredtiger/test/suite/test_bug018.py b/src/third_party/wiredtiger/test/suite/test_bug018.py
index f85de89c09f..5e5472f6eef 100644..100755
--- a/src/third_party/wiredtiger/test/suite/test_bug018.py
+++ b/src/third_party/wiredtiger/test/suite/test_bug018.py
@@ -27,6 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
from helper import copy_wiredtiger_home
+from suite_subprocess import suite_subprocess
import os
import wiredtiger, wttest
@@ -34,10 +35,14 @@ import wiredtiger, wttest
# JIRA WT-3590: if writing table data fails during close then tables
# that were updated within the same transaction could get out of sync with
# each other.
-class test_bug018(wttest.WiredTigerTestCase):
+class test_bug018(wttest.WiredTigerTestCase, suite_subprocess):
'''Test closing/reopening/recovering tables when writes fail'''
conn_config = 'log=(enabled)'
+ basename = 'bug018.'
+ baseuri = 'file:' + basename
+ uri1 = baseuri + '01.wt'
+ uri2 = baseuri + '02.wt'
def setUp(self):
# This test uses Linux-specific code so skip on any other system.
@@ -49,12 +54,10 @@ class test_bug018(wttest.WiredTigerTestCase):
self.session.create(uri, 'key_format=S,value_format=S')
return self.session.open_cursor(uri)
- def test_bug018(self):
+ def subprocess_bug018(self):
'''Test closing multiple tables'''
- basename = 'bug018.'
- baseuri = 'file:' + basename
- c1 = self.create_table(baseuri + '01.wt')
- c2 = self.create_table(baseuri + '02.wt')
+ c1 = self.create_table(self.uri1)
+ c2 = self.create_table(self.uri2)
self.session.begin_transaction()
c1['key'] = 'value'
@@ -70,7 +73,7 @@ class test_bug018(wttest.WiredTigerTestCase):
# This is Linux-specific code to figure out the file descriptor.
for f in os.listdir('/proc/self/fd'):
try:
- if os.readlink('/proc/self/fd/' + f).endswith(basename + '02.wt'):
+ if os.readlink('/proc/self/fd/' + f).endswith(self.basename + '02.wt'):
os.close(int(f))
except OSError:
pass
@@ -82,17 +85,37 @@ class test_bug018(wttest.WiredTigerTestCase):
except wiredtiger.WiredTigerError:
self.conn = None
+ def test_bug018(self):
+ '''Test closing multiple tables'''
+
+ self.close_conn()
+ subdir = 'SUBPROCESS'
+ [ignore_result, new_home_dir] = self.run_subprocess_function(subdir,
+ 'test_bug018.test_bug018.subprocess_bug018')
+
# Make a backup for forensics in case something goes wrong.
backup_dir = 'BACKUP'
- copy_wiredtiger_home('.', backup_dir, True)
+ copy_wiredtiger_home(new_home_dir, backup_dir, True)
# After reopening and running recovery both tables should be in
# sync even though table 1 was successfully written and table 2
# had an error on close.
- self.open_conn()
- c1 = self.session.open_cursor(baseuri + '01.wt')
- c2 = self.session.open_cursor(baseuri + '02.wt')
- self.assertEqual(list(c1), list(c2))
+ self.open_conn(new_home_dir)
+
+ results1 = list(self.session.open_cursor(self.uri1))
+
+ # It's possible the second table can't even be opened.
+ # That can happen only if the root page was not pushed out.
+ # So if we get an error, make sure we're getting the right
+ # error message.
+
+ self.captureerr.check(self) # check error messages until now
+ try:
+ results2 = list(self.session.open_cursor(self.uri2))
+ except:
+ self.captureerr.checkAdditionalPattern(self, 'unable to read root page')
+ results2 = []
+ self.assertEqual(results1, results2)
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/wttest.py b/src/third_party/wiredtiger/test/suite/wttest.py
index 260daef7d02..de9514b7651 100755
--- a/src/third_party/wiredtiger/test/suite/wttest.py
+++ b/src/third_party/wiredtiger/test/suite/wttest.py
@@ -247,6 +247,9 @@ class WiredTigerTestCase(unittest.TestCase):
return "%s.%s.%s" % (self.__module__,
self.className(), self._testMethodName)
+ def buildDirectory(self):
+ return self._builddir
+
# Return the wiredtiger_open extension argument for
# any needed shared library.
def extensionsConfig(self):
diff --git a/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py b/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py
index ca2b2d814a8..7409ab62243 100755
--- a/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py
+++ b/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py
@@ -39,6 +39,7 @@ import multiprocessing
import numpy as np
import os
import pandas as pd
+import subprocess
import sys
import traceback
import time
@@ -462,7 +463,7 @@ def createLegendFigure(legendDict):
p = figure(title="TRACKED FUNCTIONS",
plot_width=plotWidth,
- plot_height = max((max_ycoord + 2) * pixelsForLegendItem, 90),
+ plot_height = int(max((max_ycoord + 2) * pixelsForLegendItem, 90)),
tools = [], toolbar_location="above",
x_range = (0, (FUNCS_PER_ROW + 1)* HSPACE_BETWEEN_FUNCS),
y_range = (0, max_ycoord + 2),
@@ -1241,6 +1242,22 @@ def parseConfigFile(fname):
return True;
+# With Python3 this script fails if the number of open files
+# is limited to 256, because the multiprocessing package does
+# not appear to properly clean up processes that exited.
+#
+def checkOpenFileLimit():
+
+ targetLimit = 512;
+ openFileLimit = int(subprocess.check_output("ulimit -n",
+ shell=True).decode());
+
+ if (openFileLimit < targetLimit):
+ print(color.BOLD + color.RED + "Open file limit is " +
+ str(openFileLimit) + ". Please increase to " + str(targetLimit) +
+ " by running `ulimit -n " + str(targetLimit) + "`." +
+ color.END);
+ sys.exit(-1);
def main():
@@ -1275,6 +1292,8 @@ def main():
parser.print_help();
sys.exit(1);
+ checkOpenFileLimit();
+
# Determine the target job parallelism
if (args.jobParallelism > 0):
targetParallelism = args.jobParallelism;