Import wiredtiger: 9b85ad89688bd72b8a649d844a7e458832955764 from branch mongodb-4.2

ref: 57bd75fee9..9b85ad8968 for: 4.3.1 WT-4190 Decrease shutdown time by doing multi-threaded cache flush WT-4608 Cache stuck with clean pages for LSM data format testing WT-4775 Make the "bad file descriptor" test resilient against crashing WT-4793 Extend test/checkpoint to use timestamps and more WT-4825 Add warning in operation tracking if the open file limit is too small WT-4828 Fix type-related exception in operation tracking after conversion to Python3
author: Luke Chen <luke.chen@mongodb.com> 2019-06-05 10:44:58 +1000
committer: Luke Chen <luke.chen@mongodb.com> 2019-06-05 10:44:58 +1000
commit: c939010fe98ba0a8affe7d0d30d4e8d57e68242b (patch)
tree: d26869393e4eb400fc7eb2b5bb3194014ba71956 /src
parent: ed173f027ae8940bb93c57c580993192dc534fb0 (diff)
download: mongo-c939010fe98ba0a8affe7d0d30d4e8d57e68242b.tar.gz
12 files changed, 369 insertions, 65 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 43ede9082a0..22114815861 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
 {
-    "commit": "57bd75fee93b685c133281100719f886d0184589", 
+    "commit": "9b85ad89688bd72b8a649d844a7e458832955764", 
     "github": "wiredtiger/wiredtiger.git", 
     "vendor": "wiredtiger", 
     "branch": "mongodb-4.2"
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index 6d414edaa42..7a854ee596f 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -1064,6 +1064,13 @@ __conn_close(WT_CONNECTION *wt_conn, const char *config)
 			F_SET(conn, WT_CONN_CLOSING_TIMESTAMP);
 	}
 
+	/*
+	 * Ramp the eviction dirty target down to encourage eviction threads to
+	 * clear dirty content out of cache.
+	 */
+	conn->cache->eviction_dirty_trigger = 1.0;
+	conn->cache->eviction_dirty_target = 0.1;
+
 err:	/*
 	 * Rollback all running transactions.
 	 * We do this as a separate pass because an active transaction in one
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 836fcd8cb59..7d696a20831 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -2639,6 +2639,23 @@ __verbose_dump_cache_single(WT_SESSION_IMPL *session,
 	leaf_bytes = leaf_bytes_max = leaf_dirty_bytes = 0;
 	leaf_dirty_bytes_max = leaf_dirty_pages = leaf_pages = 0;
 
+	dhandle = session->dhandle;
+	btree = dhandle->handle;
+	WT_RET(__wt_msg(session, "%s(%s%s)%s%s:",
+	    dhandle->name, dhandle->checkpoint != NULL ? "checkpoint=" : "",
+	    dhandle->checkpoint != NULL ? dhandle->checkpoint : "<live>",
+	    btree->evict_disabled != 0 ?  " eviction disabled" : "",
+	    btree->evict_disabled_open ? " at open" : ""));
+
+	/*
+	 * We cannot walk the tree of a dhandle held exclusively because
+	 * the owning thread could be manipulating it in a way that causes
+	 * us to dump core. So print out that we visited and skipped it.
+	 */
+	if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE))
+		return (__wt_msg(session,
+		    "  Opened exclusively. Cannot walk tree, skipping."));
+
 	next_walk = NULL;
 	while (__wt_tree_walk(session, &next_walk,
 	    WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 &&
@@ -2669,13 +2686,6 @@ __verbose_dump_cache_single(WT_SESSION_IMPL *session,
 		}
 	}
 
-	dhandle = session->dhandle;
-	btree = dhandle->handle;
-	WT_RET(__wt_msg(session, "%s(%s%s)%s%s:",
-	    dhandle->name, dhandle->checkpoint != NULL ? "checkpoint=" : "",
-	    dhandle->checkpoint != NULL ? dhandle->checkpoint : "<live>",
-	    btree->evict_disabled != 0 ?  "eviction disabled" : "",
-	    btree->evict_disabled_open ? " at open" : ""));
 	if (intl_pages == 0)
 		WT_RET(__wt_msg(session, "internal: 0 pages"));
 	else
diff --git a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
index 493cdaf5114..311c21eff5e 100644
--- a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
+++ b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
@@ -29,6 +29,7 @@
 #include "test_checkpoint.h"
 
 static WT_THREAD_RET checkpointer(void *);
+static WT_THREAD_RET clock_thread(void *);
 static int compare_cursors(
     WT_CURSOR *, const char *, WT_CURSOR *, const char *);
 static int diagnose_key_error(WT_CURSOR *, int, WT_CURSOR *, int);
@@ -44,6 +45,11 @@ start_checkpoints(void)
 {
 	testutil_check(__wt_thread_create(NULL,
 	    &g.checkpoint_thread, checkpointer, NULL));
+	if (g.use_timestamps) {
+		testutil_check(__wt_rwlock_init(NULL, &g.clock_lock));
+		testutil_check(__wt_thread_create(NULL,
+		    &g.clock_thread, clock_thread, NULL));
+	}
 }
 
 /*
@@ -54,6 +60,57 @@ void
 end_checkpoints(void)
 {
 	testutil_check(__wt_thread_join(NULL, &g.checkpoint_thread));
+	if (g.use_timestamps) {
+		testutil_check(__wt_thread_join(NULL, &g.clock_thread));
+		__wt_rwlock_destroy(NULL, &g.clock_lock);
+	}
+}
+
+/*
+ * clock_thread --
+ *	Clock thread: ticks up timestamps.
+ */
+static WT_THREAD_RET
+clock_thread(void *arg)
+{
+	WT_RAND_STATE rnd;
+	WT_SESSION *wt_session;
+	WT_SESSION_IMPL *session;
+	uint64_t delay;
+	char buf[128];
+
+	WT_UNUSED(arg);
+
+	__wt_random_init(&rnd);
+	testutil_check(g.conn->open_session(g.conn, NULL, NULL, &wt_session));
+	session = (WT_SESSION_IMPL *)wt_session;
+
+	g.ts = 0;
+	while (g.running) {
+		__wt_writelock(session, &g.clock_lock);
+		++g.ts;
+		testutil_check(__wt_snprintf(
+		    buf, sizeof(buf),
+		    "oldest_timestamp=%x,stable_timestamp=%x", g.ts, g.ts));
+		testutil_check(g.conn->set_timestamp(g.conn, buf));
+		if (g.ts % 997 == 0) {
+			/*
+			 * Random value between 6 and 10 seconds.
+			 */
+			delay = __wt_random(&rnd) % 5;
+			__wt_sleep(delay + 6, 0);
+		}
+		__wt_writeunlock(session, &g.clock_lock);
+		/*
+		 * Random value between 5000 and 10000.
+		 */
+		delay = __wt_random(&rnd) % 5001;
+		__wt_sleep(0, delay + 5000);
+	}
+
+	testutil_check(wt_session->close(wt_session, NULL));
+
+	return (WT_THREAD_RET_VALUE);
 }
 
 /*
@@ -82,7 +139,9 @@ checkpointer(void *arg)
 static int
 real_checkpointer(void)
 {
+	WT_RAND_STATE rnd;
 	WT_SESSION *session;
+	uint64_t delay;
 	int ret;
 	char buf[128], *checkpoint_config;
 
@@ -90,6 +149,7 @@ real_checkpointer(void)
 		return (log_print_err(
 		    "Checkpoint thread started stopped\n", EINVAL, 1));
 
+	__wt_random_init(&rnd);
 	while (g.ntables > g.ntables_created)
 		__wt_yield();
 
@@ -115,6 +175,7 @@ real_checkpointer(void)
 		    session, checkpoint_config)) != 0)
 			return (log_print_err("session.checkpoint", ret, 1));
 		printf("Finished a checkpoint\n");
+		fflush(stdout);
 
 		if (!g.running)
 			goto done;
@@ -123,6 +184,14 @@ real_checkpointer(void)
 		if ((ret = verify_consistency(session, true)) != 0)
 			return (log_print_err(
 			    "verify_consistency (offline)", ret, 1));
+
+		/*
+		 * Random value between 4 and 8 seconds.
+		 */
+		if (g.sweep_stress) {
+			delay = __wt_random(&rnd) % 5;
+			__wt_sleep(delay + 4, 0);
+		}
 	}
 
 done:	if ((ret = session->close(session, NULL)) != 0)
@@ -234,6 +303,7 @@ verify_consistency(WT_SESSION *session, bool use_checkpoint)
 	printf("Finished verifying a %s with %d tables and %" PRIu64
 	    " keys\n", use_checkpoint ? "checkpoint" : "snapshot",
 	    g.ntables, key_count);
+	fflush(stdout);
 
 err:	for (i = 0; i < g.ntables; i++) {
 		if (cursors[i] != NULL &&
diff --git a/src/third_party/wiredtiger/test/checkpoint/smoke.sh b/src/third_party/wiredtiger/test/checkpoint/smoke.sh
index 8db6fc1ebc4..dba60babb92 100755
--- a/src/third_party/wiredtiger/test/checkpoint/smoke.sh
+++ b/src/third_party/wiredtiger/test/checkpoint/smoke.sh
@@ -23,3 +23,12 @@ $TEST_WRAPPER ./t -T 6 -t r
 
 echo "checkpoint: 6 row-store tables, named checkpoint"
 $TEST_WRAPPER ./t -c 'TeSt' -T 6 -t r
+
+echo "checkpoint: row-store tables, stress LAS. Sweep and timestamps"
+$TEST_WRAPPER ./t -t r -W 3 -r 2 -s -x -n 100000 -k 100000 -C cache_size=100MB
+
+echo "checkpoint: 3 mixed tables, with sweep"
+$TEST_WRAPPER ./t -T 3 -t m -W 3 -r 2 -s -n 100000 -k 100000
+
+echo "checkpoint: 3 mixed tables, with timestamps"
+$TEST_WRAPPER ./t -T 3 -t m -W 3 -r 2 -x -n 100000 -k 100000
diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
index f73ada611fe..461b6334b27 100644
--- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
+++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
@@ -34,7 +34,7 @@ static int  handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
 static int  handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
 static void onint(int)
     WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
-static void cleanup(void);
+static void cleanup(bool);
 static int  usage(void);
 static int  wt_connect(const char *);
 static int  wt_shutdown(void);
@@ -62,10 +62,11 @@ main(int argc, char *argv[])
 	g.nops = 100000;
 	g.ntables = 3;
 	g.nworkers = 1;
+	g.sweep_stress = g.use_timestamps = false;
 	runs = 1;
 
 	while ((ch = __wt_getopt(
-	    progname, argc, argv, "C:c:h:k:l:n:r:T:t:W:")) != EOF)
+	    progname, argc, argv, "C:c:h:k:l:n:r:sT:t:W:x")) != EOF)
 		switch (ch) {
 		case 'c':
 			g.checkpoint_name = __wt_optarg;
@@ -92,6 +93,9 @@ main(int argc, char *argv[])
 		case 'r':			/* runs */
 			runs = atoi(__wt_optarg);
 			break;
+		case 's':
+			g.sweep_stress = true;
+			break;
 		case 't':
 			switch (__wt_optarg[0]) {
 			case 'c':
@@ -116,6 +120,9 @@ main(int argc, char *argv[])
 		case 'W':
 			g.nworkers = atoi(__wt_optarg);
 			break;
+		case 'x':
+			g.use_timestamps = true;
+			break;
 		default:
 			return (usage());
 		}
@@ -131,11 +138,11 @@ main(int argc, char *argv[])
 
 	printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid());
 	for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) {
+		cleanup(cnt == 1);		/* Clean up previous runs */
+
 		printf("    %d: %d workers, %d tables\n",
 		    cnt, g.nworkers, g.ntables);
 
-		cleanup();			/* Clean up previous runs */
-
 		/* Setup a fresh set of cookies in the global array. */
 		if ((g.cookies = calloc(
 		    (size_t)(g.ntables), sizeof(COOKIE))) == NULL) {
@@ -189,15 +196,30 @@ wt_connect(const char *config_open)
 		NULL	/* Close handler. */
 	};
 	int ret;
-	char config[128];
-
-	testutil_make_work_dir(g.home);
-
-	testutil_check(__wt_snprintf(config, sizeof(config),
-	    "create,statistics=(fast),error_prefix=\"%s\",cache_size=1GB%s%s",
-	    progname,
-	    config_open == NULL ? "" : ",",
-	    config_open == NULL ? "" : config_open));
+	char config[512];
+
+	/*
+	 * If we want to stress sweep, we have a lot of additional
+	 * configuration settings to set.
+	 */
+	if (g.sweep_stress)
+		testutil_check(__wt_snprintf(config, sizeof(config),
+		    "create,cache_cursors=false,statistics=(fast),"	\
+		    "statistics_log=(json,wait=1),error_prefix=\"%s\","	\
+		    "file_manager=(close_handle_minimum=1,close_idle_time=1,"\
+		    "close_scan_interval=1),log=(enabled),cache_size=1GB,"\
+		    "timing_stress_for_test=(aggressive_sweep)%s%s",
+		    progname,
+		    config_open == NULL ? "" : ",",
+		    config_open == NULL ? "" : config_open));
+	else
+		testutil_check(__wt_snprintf(config, sizeof(config),
+		    "create,cache_cursors=false,statistics=(fast),"	\
+		    "statistics_log=(json,wait=1),error_prefix=\"%s\""	\
+		    "%s%s",
+		    progname,
+		    config_open == NULL ? "" : ",",
+		    config_open == NULL ? "" : config_open));
 
 	if ((ret = wiredtiger_open(
 	    g.home, &event_handler, config, &g.conn)) != 0)
@@ -230,12 +252,14 @@ wt_shutdown(void)
  *	Clean up from previous runs.
  */
 static void
-cleanup(void)
+cleanup(bool remove_dir)
 {
 	g.running = 0;
 	g.ntables_created = 0;
+	g.ts = 0;
 
-	testutil_clean_work_dir(g.home);
+	if (remove_dir)
+		testutil_make_work_dir(g.home);
 }
 
 static int
@@ -271,7 +295,7 @@ onint(int signo)
 {
 	WT_UNUSED(signo);
 
-	cleanup();
+	cleanup(false);
 
 	fprintf(stderr, "\n");
 	exit(EXIT_FAILURE);
diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
index 805864344cb..b579f5cf9b9 100644
--- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
+++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
@@ -63,8 +63,13 @@ typedef struct {
 	int ntables_created;			/* Number tables opened */
 	int running;				/* Whether to stop */
 	int status;				/* Exit status */
+	bool sweep_stress;			/* Sweep stress test */
+	u_int ts;				/* Current timestamp */
+	bool use_timestamps;			/* Use txn timestamps */
 	COOKIE *cookies;			/* Per-thread info */
+	WT_RWLOCK clock_lock;			/* Clock synchronization */
 	wt_thread_t checkpoint_thread;		/* Checkpoint thread */
+	wt_thread_t clock_thread;		/* Clock thread */
 } GLOBAL;
 extern GLOBAL g;
 
diff --git a/src/third_party/wiredtiger/test/checkpoint/workers.c b/src/third_party/wiredtiger/test/checkpoint/workers.c
index 33836c67110..e9966cec145 100644
--- a/src/third_party/wiredtiger/test/checkpoint/workers.c
+++ b/src/third_party/wiredtiger/test/checkpoint/workers.c
@@ -39,12 +39,23 @@ static int
 create_table(WT_SESSION *session, COOKIE *cookie)
 {
 	int ret;
-	char config[128];
+	char config[256];
 
-	testutil_check(__wt_snprintf(config, sizeof(config),
-	    "key_format=%s,value_format=S,%s",
-	    cookie->type == COL ? "r" : "q",
-	    cookie->type == LSM ? ",type=lsm" : ""));
+	/*
+	 * If we're using timestamps, turn off logging for the table.
+	 */
+	if (g.use_timestamps)
+		testutil_check(__wt_snprintf(config, sizeof(config),
+		    "key_format=%s,value_format=S,allocation_size=512,"	\
+		    "leaf_page_max=1KB,internal_page_max=1KB,"		\
+		    "memory_page_max=64KB,log=(enabled=false),%s",
+		    cookie->type == COL ? "r" : "q",
+		    cookie->type == LSM ? ",type=lsm" : ""));
+	else
+		testutil_check(__wt_snprintf(config, sizeof(config),
+		    "key_format=%s,value_format=S,%s",
+		    cookie->type == COL ? "r" : "q",
+		    cookie->type == LSM ? ",type=lsm" : ""));
 
 	if ((ret = session->create(session, cookie->uri, config)) != 0)
 		if (ret != EEXIST)
@@ -94,6 +105,8 @@ start_workers(table_type type)
 			goto err;
 	}
 
+	testutil_check(session->close(session, NULL));
+
 	(void)gettimeofday(&start, NULL);
 
 	/* Create threads. */
@@ -122,20 +135,55 @@ err:	free(tids);
 static inline int
 worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val)
 {
-	int ret;
+	int cmp, ret;
 	char valuebuf[64];
 
 	cursor->set_key(cursor, keyno);
-	/* Roughly 5% removes. */
-	if (new_val % 19 == 0) {
-		if ((ret = cursor->remove(cursor)) != 0) {
+	/* Roughly half inserts, then balanced inserts / range removes. */
+	if (new_val > g.nops / 2 && new_val % 39 == 0) {
+		if ((ret = cursor->search_near(cursor, &cmp)) != 0) {
+			if (ret == WT_NOTFOUND)
+				return (0);
+			if (ret == WT_ROLLBACK)
+				return (WT_ROLLBACK);
+			return (log_print_err("cursor.search_near", ret, 1));
+		}
+		if (cmp < 0) {
+			if ((ret = cursor->next(cursor)) != 0) {
+				if (ret == WT_NOTFOUND)
+					return (0);
+				if (ret == WT_ROLLBACK)
+					return (WT_ROLLBACK);
+				return (log_print_err("cursor.next", ret, 1));
+			}
+		}
+		for (int i = 10; i > 0; i--) {
+			if ((ret = cursor->remove(cursor)) != 0) {
+				if (ret == WT_ROLLBACK)
+					return (WT_ROLLBACK);
+				return (log_print_err("cursor.remove", ret, 1));
+			}
+			if ((ret = cursor->next(cursor)) != 0) {
+				if (ret == WT_NOTFOUND)
+					return (0);
+				if (ret == WT_ROLLBACK)
+					return (WT_ROLLBACK);
+				return (log_print_err("cursor.next", ret, 1));
+			}
+		}
+		if (g.sweep_stress)
+			testutil_check(cursor->reset(cursor));
+	} else if (new_val % 39 < 10) {
+		if ((ret = cursor->search(cursor)) != 0 && ret != WT_NOTFOUND) {
 			if (ret == WT_ROLLBACK)
 				return (WT_ROLLBACK);
-			return (log_print_err("cursor.remove", ret, 1));
+			return (log_print_err("cursor.search", ret, 1));
 		}
+		if (g.sweep_stress)
+			testutil_check(cursor->reset(cursor));
 	} else {
 		testutil_check(__wt_snprintf(
-		    valuebuf, sizeof(valuebuf), "%037u", new_val));
+		    valuebuf, sizeof(valuebuf), "%052u", new_val));
 		cursor->set_value(cursor, valuebuf);
 		if ((ret = cursor->insert(cursor)) != 0) {
 			if (ret == WT_ROLLBACK)
@@ -143,6 +191,7 @@ worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val)
 			return (log_print_err("cursor.insert", ret, 1));
 		}
 	}
+
 	return (0);
 }
 
@@ -177,11 +226,12 @@ real_worker(void)
 	WT_SESSION *session;
 	u_int i, keyno;
 	int j, ret, t_ret;
+	const char *begin_cfg;
+	char buf[128];
+	bool has_cursors;
 
 	ret = t_ret = 0;
 
-	__wt_random_init(&rnd);
-
 	if ((cursors = calloc(
 	    (size_t)(g.ntables), sizeof(WT_CURSOR *))) == NULL)
 		return (log_print_err("malloc", ENOMEM, 1));
@@ -192,41 +242,80 @@ real_worker(void)
 		goto err;
 	}
 
+	__wt_random_init_seed((WT_SESSION_IMPL *)session, &rnd);
+
 	for (j = 0; j < g.ntables; j++)
 		if ((ret = session->open_cursor(session,
 		    g.cookies[j].uri, NULL, NULL, &cursors[j])) != 0) {
 			(void)log_print_err("session.open_cursor", ret, 1);
 			goto err;
 		}
+	has_cursors = true;
+
+	if (g.use_timestamps)
+		begin_cfg = "read_timestamp=1,roundup_timestamps=(read=true)";
+	else
+		begin_cfg = NULL;
 
 	for (i = 0; i < g.nops && g.running; ++i, __wt_yield()) {
-		if ((ret = session->begin_transaction(session, NULL)) != 0) {
+		if ((ret =
+		    session->begin_transaction(session, begin_cfg)) != 0) {
 			(void)log_print_err(
 			    "real_worker:begin_transaction", ret, 1);
 			goto err;
 		}
 		keyno = __wt_random(&rnd) % g.nkeys + 1;
-		for (j = 0; j < g.ntables; j++) {
-			if ((ret = worker_op(cursors[j], keyno, i)) != 0)
-				break;
+		if (g.use_timestamps && i % 23 == 0) {
+			if (__wt_try_readlock(
+			    (WT_SESSION_IMPL *)session, &g.clock_lock) != 0) {
+				testutil_check(
+				    session->commit_transaction(session, NULL));
+				for (j = 0; j < g.ntables; j++)
+					testutil_check(
+					    cursors[j]->close(cursors[j]));
+				has_cursors = false;
+				__wt_readlock(
+				    (WT_SESSION_IMPL *)session, &g.clock_lock);
+				testutil_check(session->begin_transaction(
+				    session, begin_cfg));
+			}
+			testutil_check(__wt_snprintf(
+			    buf, sizeof(buf), "commit_timestamp=%x", g.ts + 1));
+			testutil_check(
+			    session->timestamp_transaction(session, buf));
+			__wt_readunlock(
+			    (WT_SESSION_IMPL *)session, &g.clock_lock);
+
+			for (j = 0; !has_cursors && j < g.ntables; j++)
+				if ((ret = session->open_cursor(
+				    session, g.cookies[j].uri,
+				    NULL, NULL, &cursors[j])) != 0) {
+					(void)log_print_err(
+					    "session.open_cursor", ret, 1);
+					goto err;
+				}
+			has_cursors = true;
+		}
+		for (j = 0; ret == 0 && j < g.ntables; j++) {
+			ret = worker_op(cursors[j], keyno, i);
 		}
-		if (ret == 0) {
+		if (ret != 0 && ret != WT_ROLLBACK) {
+			(void)log_print_err("worker op failed", ret, 1);
+			goto err;
+		} else if (ret == 0 && __wt_random(&rnd) % 7 != 0) {
 			if ((ret = session->commit_transaction(
 			    session, NULL)) != 0) {
 				(void)log_print_err(
 				    "real_worker:commit_transaction", ret, 1);
 				goto err;
 			}
-		} else if (ret == WT_ROLLBACK) {
+		} else {
 			if ((ret = session->rollback_transaction(
 			    session, NULL)) != 0) {
 				(void)log_print_err(
 				    "real_worker:rollback_transaction", ret, 1);
 				goto err;
-			    }
-		} else {
-			(void)log_print_err("worker op failed", ret, 1);
-			goto err;
+			}
 		}
 	}
 
diff --git a/src/third_party/wiredtiger/test/suite/suite_subprocess.py b/src/third_party/wiredtiger/test/suite/suite_subprocess.py
index d04a281807a..95a599090f5 100755
--- a/src/third_party/wiredtiger/test/suite/suite_subprocess.py
+++ b/src/third_party/wiredtiger/test/suite/suite_subprocess.py
@@ -151,7 +151,8 @@ class suite_subprocess:
         return envvar + '=' + str(os.environ.get(envvar)) + '\n'
 
     def show_outputs(self, procargs, message, filenames):
-        out = 'ERROR: wt command ' + message + ': ' + str(procargs) + '\n' + \
+        out = message + ': ' + \
+              str(procargs) + '\n' + \
               self.verbose_env('PATH') + \
               self.verbose_env('LD_LIBRARY_PATH') + \
               self.verbose_env('DYLD_LIBRARY_PATH') + \
@@ -169,6 +170,48 @@ class suite_subprocess:
                     out = sepline + filename + '\n' + sepline + contents
                     WiredTigerTestCase.prout(out)
 
+    # Run a method as a subprocess using the run.py machinery.
+    # Return the process exit status and the the WiredTiger
+    # home directory used by the subprocess.
+    def run_subprocess_function(self, directory, funcname):
+        testparts = funcname.split('.')
+        if len(testparts) != 3:
+            raise ValueError('bad function name "' + funcname +
+                '", should be three part dotted name')
+        topdir = os.path.dirname(self.buildDirectory())
+        runscript = os.path.join(topdir, 'test', 'suite', 'run.py')
+        procargs = [ sys.executable, runscript, '-p', '--dir', directory,
+            funcname]
+
+        # scenario_number is only set if we are running in a scenario
+        try:
+            scennum = self.scenario_number
+            procargs.append('-s')
+            procargs.append(str(scennum))
+        except:
+            scennum = 0
+
+        returncode = -1
+        os.makedirs(directory)
+
+        # We cannot put the output/error files in the subdirectory, as
+        # that will be cleared by the run.py script.
+        with open("subprocess.err", "w") as wterr:
+            with open("subprocess.out", "w") as wtout:
+                returncode = subprocess.call(
+                    procargs, stdout=wtout, stderr=wterr)
+                if returncode != 0:
+                    # This is not necessarily an error, the primary reason to
+                    # run in a subprocess is that it may crash.
+                    self.show_outputs(procargs,
+                        "Warning: run_subprocess_function " + funcname + \
+                        " returned error code " + str(returncode),
+                        [ "subprocess.out", "subprocess.err" ])
+
+        new_home_dir = os.path.join(directory,
+            testparts[1] + '.' + str(scennum))
+        return [ returncode, new_home_dir ]
+
     # Run the wt utility.
     def runWt(self, args, infilename=None,
         outfilename=None, errfilename=None, closeconn=True,
@@ -230,15 +273,17 @@ class suite_subprocess:
                         procargs, stdout=wtout, stderr=wterr)
         if failure:
             if returncode == 0:
-                self.show_outputs(procargs, "expected failure, got success",
-                                  [wtoutname, wterrname])
+                self.show_outputs(procargs,
+                    "ERROR: wt command expected failure, got success",
+                    [wtoutname, wterrname])
             self.assertNotEqual(returncode, 0,
                 'expected failure: "' + \
                 str(procargs) + '": exited ' + str(returncode))
         else:
             if returncode != 0:
-                self.show_outputs(procargs, "expected success, got failure",
-                                  [wtoutname, wterrname])
+                self.show_outputs(procargs,
+                    "ERROR: wt command expected success, got failure",
+                    [wtoutname, wterrname])
             self.assertEqual(returncode, 0,
                 'expected success: "' + \
                 str(procargs) + '": exited ' + str(returncode))
diff --git a/src/third_party/wiredtiger/test/suite/test_bug018.py b/src/third_party/wiredtiger/test/suite/test_bug018.py
index f85de89c09f..5e5472f6eef 100644..100755
--- a/src/third_party/wiredtiger/test/suite/test_bug018.py
+++ b/src/third_party/wiredtiger/test/suite/test_bug018.py
@@ -27,6 +27,7 @@
 # OTHER DEALINGS IN THE SOFTWARE.
 
 from helper import copy_wiredtiger_home
+from suite_subprocess import suite_subprocess
 import os
 import wiredtiger, wttest
 
@@ -34,10 +35,14 @@ import wiredtiger, wttest
 #   JIRA WT-3590: if writing table data fails during close then tables
 # that were updated within the same transaction could get out of sync with
 # each other.
-class test_bug018(wttest.WiredTigerTestCase):
+class test_bug018(wttest.WiredTigerTestCase, suite_subprocess):
     '''Test closing/reopening/recovering tables when writes fail'''
 
     conn_config = 'log=(enabled)'
+    basename = 'bug018.'
+    baseuri = 'file:' + basename
+    uri1 = baseuri + '01.wt'
+    uri2 = baseuri + '02.wt'
 
     def setUp(self):
         # This test uses Linux-specific code so skip on any other system.
@@ -49,12 +54,10 @@ class test_bug018(wttest.WiredTigerTestCase):
         self.session.create(uri, 'key_format=S,value_format=S')
         return self.session.open_cursor(uri)
 
-    def test_bug018(self):
+    def subprocess_bug018(self):
         '''Test closing multiple tables'''
-        basename = 'bug018.'
-        baseuri = 'file:' + basename
-        c1 = self.create_table(baseuri + '01.wt')
-        c2 = self.create_table(baseuri + '02.wt')
+        c1 = self.create_table(self.uri1)
+        c2 = self.create_table(self.uri2)
 
         self.session.begin_transaction()
         c1['key'] = 'value'
@@ -70,7 +73,7 @@ class test_bug018(wttest.WiredTigerTestCase):
         # This is Linux-specific code to figure out the file descriptor.
         for f in os.listdir('/proc/self/fd'):
             try:
-                if os.readlink('/proc/self/fd/' + f).endswith(basename + '02.wt'):
+                if os.readlink('/proc/self/fd/' + f).endswith(self.basename + '02.wt'):
                     os.close(int(f))
             except OSError:
                 pass
@@ -82,17 +85,37 @@ class test_bug018(wttest.WiredTigerTestCase):
             except wiredtiger.WiredTigerError:
                 self.conn = None
 
+    def test_bug018(self):
+        '''Test closing multiple tables'''
+
+        self.close_conn()
+        subdir = 'SUBPROCESS'
+        [ignore_result, new_home_dir] = self.run_subprocess_function(subdir,
+            'test_bug018.test_bug018.subprocess_bug018')
+
         # Make a backup for forensics in case something goes wrong.
         backup_dir = 'BACKUP'
-        copy_wiredtiger_home('.', backup_dir, True)
+        copy_wiredtiger_home(new_home_dir, backup_dir, True)
 
         # After reopening and running recovery both tables should be in
         # sync even though table 1 was successfully written and table 2
         # had an error on close.
-        self.open_conn()
-        c1 = self.session.open_cursor(baseuri + '01.wt')
-        c2 = self.session.open_cursor(baseuri + '02.wt')
-        self.assertEqual(list(c1), list(c2))
+        self.open_conn(new_home_dir)
+
+        results1 = list(self.session.open_cursor(self.uri1))
+
+        # It's possible the second table can't even be opened.
+        # That can happen only if the root page was not pushed out.
+        # So if we get an error, make sure we're getting the right
+        # error message.
+
+        self.captureerr.check(self)     # check error messages until now
+        try:
+            results2 = list(self.session.open_cursor(self.uri2))
+        except:
+            self.captureerr.checkAdditionalPattern(self, 'unable to read root page')
+            results2 = []
+        self.assertEqual(results1, results2)
 
 if __name__ == '__main__':
     wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/wttest.py b/src/third_party/wiredtiger/test/suite/wttest.py
index 260daef7d02..de9514b7651 100755
--- a/src/third_party/wiredtiger/test/suite/wttest.py
+++ b/src/third_party/wiredtiger/test/suite/wttest.py
@@ -247,6 +247,9 @@ class WiredTigerTestCase(unittest.TestCase):
         return "%s.%s.%s" %  (self.__module__,
                               self.className(), self._testMethodName)
 
+    def buildDirectory(self):
+        return self._builddir
+
     # Return the wiredtiger_open extension argument for
     # any needed shared library.
     def extensionsConfig(self):
diff --git a/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py b/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py
index ca2b2d814a8..7409ab62243 100755
--- a/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py
+++ b/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py
@@ -39,6 +39,7 @@ import multiprocessing
 import numpy as np
 import os
 import pandas as pd
+import subprocess
 import sys
 import traceback
 import time
@@ -462,7 +463,7 @@ def createLegendFigure(legendDict):
 
     p = figure(title="TRACKED FUNCTIONS",
                plot_width=plotWidth,
-               plot_height = max((max_ycoord + 2) * pixelsForLegendItem, 90),
+               plot_height = int(max((max_ycoord + 2) * pixelsForLegendItem, 90)),
                tools = [], toolbar_location="above",
                x_range = (0, (FUNCS_PER_ROW + 1)* HSPACE_BETWEEN_FUNCS),
                y_range = (0, max_ycoord + 2),
@@ -1241,6 +1242,22 @@ def parseConfigFile(fname):
 
     return True;
 
+# With Python3 this script fails if the number of open files
+# is limited to 256, because the multiprocessing package does
+# not appear to properly clean up processes that exited.
+#
+def checkOpenFileLimit():
+
+    targetLimit = 512;
+    openFileLimit = int(subprocess.check_output("ulimit -n",
+                                            shell=True).decode());
+
+    if (openFileLimit < targetLimit):
+        print(color.BOLD + color.RED + "Open file limit is " +
+              str(openFileLimit) + ". Please increase to " + str(targetLimit) +
+              " by running `ulimit -n " + str(targetLimit) + "`." +
+              color.END);
+        sys.exit(-1);
 
 def main():
 
@@ -1275,6 +1292,8 @@ def main():
         parser.print_help();
         sys.exit(1);
 
+    checkOpenFileLimit();
+
     # Determine the target job parallelism
     if (args.jobParallelism > 0):
         targetParallelism = args.jobParallelism;
author	Luke Chen <luke.chen@mongodb.com>	2019-06-05 10:44:58 +1000
committer	Luke Chen <luke.chen@mongodb.com>	2019-06-05 10:44:58 +1000
commit	c939010fe98ba0a8affe7d0d30d4e8d57e68242b (patch)
tree	d26869393e4eb400fc7eb2b5bb3194014ba71956 /src
parent	ed173f027ae8940bb93c57c580993192dc534fb0 (diff)
download	mongo-c939010fe98ba0a8affe7d0d30d4e8d57e68242b.tar.gz