From 1afb95fee0342b8d9e05b0433e8e44a6dfd7c4a3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 19 Dec 2020 07:34:35 -0800 Subject: torture: Maintain torture-specific set of CPUs-online books The TREE01 rcutorture scenario intentionally creates confusion as to the number of available CPUs by specifying the "maxcpus=8 nr_cpus=43" kernel boot parameters. This can disable rcutorture's load shedding, which currently uses num_online_cpus(), which would count the extra 35 CPUs. However, the rcutorture guest OS will be provisioned with only 8 CPUs, which means that rcutorture will present full load even when all but one of the original 8 CPUs are offline. This can result in spurious errors due to extreme overloading of that single remaining CPU. This commit therefore keeps a separate set of books on the number of usable online CPUs, so that torture_num_online_cpus() is used for load shedding instead of num_online_cpus(). Note that initial sizing must use num_online_cpus() because torture_num_online_cpus() will return NR_CPUS until shortly after torture_onoff_init() is invoked. Reported-by: Frederic Weisbecker [ paulmck: Apply feedback from kernel test robot. ] Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 5 +++++ kernel/rcu/rcutorture.c | 4 ++-- kernel/torture.c | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/include/linux/torture.h b/include/linux/torture.h index d62d13c8c69a..0910c5803f35 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -48,6 +48,11 @@ do { \ void verbose_torout_sleep(void); /* Definitions for online/offline exerciser. */ +#ifdef CONFIG_HOTPLUG_CPU +int torture_num_online_cpus(void); +#else /* #ifdef CONFIG_HOTPLUG_CPU */ +static inline int torture_num_online_cpus(void) { return 1; } +#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ typedef void torture_ofl_func(void); bool torture_offline(int cpu, long *n_onl_attempts, long *n_onl_successes, unsigned long *sum_offl, int *min_onl, int *max_onl); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 76c838696366..a816df4e86e0 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1338,7 +1338,7 @@ static void rcu_torture_reader_do_mbchk(long myid, struct rcu_torture *rtp, struct torture_random_state *trsp) { unsigned long loops; - int noc = num_online_cpus(); + int noc = torture_num_online_cpus(); int rdrchked; int rdrchker; struct rcu_torture_reader_check *rtrcp; // Me. @@ -1658,7 +1658,7 @@ rcu_torture_reader(void *arg) torture_hrtimeout_us(500, 1000, &rand); lastsleep = jiffies + 10; } - while (num_online_cpus() < mynumonline && !torture_must_stop()) + while (torture_num_online_cpus() < mynumonline && !torture_must_stop()) schedule_timeout_interruptible(HZ / 5); stutter_wait("rcu_torture_reader"); } while (!torture_must_stop()); diff --git a/kernel/torture.c b/kernel/torture.c index 507a20be6950..01e336f1e5b2 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -175,6 +175,19 @@ static unsigned long sum_online; static int min_online = -1; static int max_online; +static int torture_online_cpus = NR_CPUS; + +/* + * Some torture testing leverages confusion as to the number of online + * CPUs. This function returns the torture-testing view of this number, + * which allows torture tests to load-balance appropriately. + */ +int torture_num_online_cpus(void) +{ + return READ_ONCE(torture_online_cpus); +} +EXPORT_SYMBOL_GPL(torture_num_online_cpus); + /* * Attempt to take a CPU offline. Return false if the CPU is already * offline or if it is not subject to CPU-hotplug operations. The @@ -229,6 +242,8 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes, *min_offl = delta; if (*max_offl < delta) *max_offl = delta; + WRITE_ONCE(torture_online_cpus, torture_online_cpus - 1); + WARN_ON_ONCE(torture_online_cpus <= 0); } return true; @@ -285,6 +300,7 @@ bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes, *min_onl = delta; if (*max_onl < delta) *max_onl = delta; + WRITE_ONCE(torture_online_cpus, torture_online_cpus + 1); } return true; -- cgit v1.2.1