/****************************************************************************** * cpupool.c * * Generic cpupool-handling functions. * * Cpupools are a feature to have configurable scheduling domains. Each * cpupool runs an own scheduler on a dedicated set of physical cpus. * A domain is bound to one cpupool at any time, but it can be moved to * another cpupool. * * (C) 2009, Juergen Gross, Fujitsu Technology Solutions */ #include #include #include #include #include #include #include #include #include #include #include #include #include "private.h" struct cpupool *cpupool0; /* Initial cpupool with Dom0 */ cpumask_t cpupool_free_cpus; /* cpus not in any cpupool */ static LIST_HEAD(cpupool_list); /* linked list, sorted by poolid */ static unsigned int n_cpupools; static int cpupool_moving_cpu = -1; static struct cpupool *cpupool_cpu_moving = NULL; static cpumask_t cpupool_locked_cpus; /* This lock nests inside sysctl or hypfs lock. */ static DEFINE_SPINLOCK(cpupool_lock); static enum sched_gran __read_mostly opt_sched_granularity = SCHED_GRAN_cpu; static unsigned int __read_mostly sched_granularity = 1; #define SCHED_GRAN_NAME_LEN 8 struct sched_gran_name { enum sched_gran mode; char name[SCHED_GRAN_NAME_LEN]; }; static const struct sched_gran_name sg_name[] = { {SCHED_GRAN_cpu, "cpu"}, {SCHED_GRAN_core, "core"}, {SCHED_GRAN_socket, "socket"}, }; static const char *sched_gran_get_name(enum sched_gran mode) { const char *name = ""; unsigned int i; for ( i = 0; i < ARRAY_SIZE(sg_name); i++ ) { if ( mode == sg_name[i].mode ) { name = sg_name[i].name; break; } } return name; } static void sched_gran_print(enum sched_gran mode, unsigned int gran) { printk("Scheduling granularity: %s, %u CPU%s per sched-resource\n", sched_gran_get_name(mode), gran, gran == 1 ? "" : "s"); } #ifdef CONFIG_HAS_SCHED_GRANULARITY static int sched_gran_get(const char *str, enum sched_gran *mode) { unsigned int i; for ( i = 0; i < ARRAY_SIZE(sg_name); i++ ) { if ( strcmp(sg_name[i].name, str) == 0 ) { *mode = sg_name[i].mode; return 0; } } return -EINVAL; } static int __init cf_check sched_select_granularity(const char *str) { return sched_gran_get(str, &opt_sched_granularity); } custom_param("sched-gran", sched_select_granularity); #elif defined(CONFIG_HYPFS) static int sched_gran_get(const char *str, enum sched_gran *mode) { return -EINVAL; } #endif static unsigned int cpupool_check_granularity(enum sched_gran mode) { unsigned int cpu; unsigned int siblings, gran = 0; if ( mode == SCHED_GRAN_cpu ) return 1; for_each_online_cpu ( cpu ) { siblings = cpumask_weight(sched_get_opt_cpumask(mode, cpu)); if ( gran == 0 ) gran = siblings; else if ( gran != siblings ) return 0; } return gran; } /* Setup data for selected scheduler granularity. */ static void __init cpupool_gran_init(void) { unsigned int gran = 0; const char *fallback = NULL; while ( gran == 0 ) { gran = cpupool_check_granularity(opt_sched_granularity); if ( gran == 0 ) { switch ( opt_sched_granularity ) { case SCHED_GRAN_core: opt_sched_granularity = SCHED_GRAN_cpu; fallback = "Asymmetric cpu configuration.\n" "Falling back to sched-gran=cpu.\n"; break; case SCHED_GRAN_socket: opt_sched_granularity = SCHED_GRAN_core; fallback = "Asymmetric cpu configuration.\n" "Falling back to sched-gran=core.\n"; break; default: ASSERT_UNREACHABLE(); break; } } } if ( fallback ) warning_add(fallback); if ( opt_sched_granularity != SCHED_GRAN_cpu ) sched_disable_smt_switching = true; sched_granularity = gran; sched_gran_print(opt_sched_granularity, sched_granularity); } unsigned int cpupool_get_granularity(const struct cpupool *c) { return c ? c->sched_gran : 1; } static void free_cpupool_struct(struct cpupool *c) { if ( c ) { free_cpumask_var(c->res_valid); free_cpumask_var(c->cpu_valid); } xfree(c); } static struct cpupool *alloc_cpupool_struct(void) { struct cpupool *c = xzalloc(struct cpupool); if ( !c ) return NULL; if ( !zalloc_cpumask_var(&c->cpu_valid) || !zalloc_cpumask_var(&c->res_valid) ) { free_cpupool_struct(c); c = NULL; } return c; } /* * find a cpupool by it's id. to be called with cpupool lock held * if exact is not specified, the first cpupool with an id larger or equal to * the searched id is returned * returns NULL if not found. */ static struct cpupool *__cpupool_find_by_id(unsigned int id, bool exact) { struct cpupool *q; ASSERT(spin_is_locked(&cpupool_lock)); list_for_each_entry(q, &cpupool_list, list) if ( q->cpupool_id == id || (!exact && q->cpupool_id > id) ) return q; return NULL; } static struct cpupool *cpupool_find_by_id(unsigned int poolid) { return __cpupool_find_by_id(poolid, true); } static struct cpupool *__cpupool_get_by_id(unsigned int poolid, bool exact) { struct cpupool *c; spin_lock(&cpupool_lock); c = __cpupool_find_by_id(poolid, exact); if ( c != NULL ) atomic_inc(&c->refcnt); spin_unlock(&cpupool_lock); return c; } struct cpupool *cpupool_get_by_id(unsigned int poolid) { return __cpupool_get_by_id(poolid, true); } static struct cpupool *cpupool_get_next_by_id(unsigned int poolid) { return __cpupool_get_by_id(poolid, false); } void cpupool_put(struct cpupool *pool) { if ( !atomic_dec_and_test(&pool->refcnt) ) return; scheduler_free(pool->sched); free_cpupool_struct(pool); } /* * create a new cpupool with specified poolid and scheduler * returns pointer to new cpupool structure if okay, NULL else * possible failures: * - no memory * - poolid already used * - unknown scheduler */ static struct cpupool *cpupool_create(unsigned int poolid, unsigned int sched_id) { struct cpupool *c; struct cpupool *q; int ret; if ( (c = alloc_cpupool_struct()) == NULL ) return ERR_PTR(-ENOMEM); /* One reference for caller, one reference for cpupool_destroy(). */ atomic_set(&c->refcnt, 2); debugtrace_printk("cpupool_create(pool=%u,sched=%u)\n", poolid, sched_id); spin_lock(&cpupool_lock); /* Don't allow too many cpupools. */ if ( n_cpupools >= 2 * nr_cpu_ids ) { ret = -ENOSPC; goto unlock; } n_cpupools++; if ( poolid != CPUPOOLID_NONE ) { q = __cpupool_find_by_id(poolid, false); if ( !q ) list_add_tail(&c->list, &cpupool_list); else { list_add_tail(&c->list, &q->list); if ( q->cpupool_id == poolid ) { ret = -EEXIST; goto err; } } c->cpupool_id = poolid; } else { /* Cpupool 0 is created with specified id at boot and never removed. */ ASSERT(!list_empty(&cpupool_list)); q = list_last_entry(&cpupool_list, struct cpupool, list); /* In case of wrap search for first free id. */ if ( q->cpupool_id == CPUPOOLID_NONE - 1 ) { list_for_each_entry(q, &cpupool_list, list) if ( q->cpupool_id + 1 != list_next_entry(q, list)->cpupool_id ) break; } list_add(&c->list, &q->list); c->cpupool_id = q->cpupool_id + 1; } c->sched = scheduler_alloc(sched_id); if ( IS_ERR(c->sched) ) { ret = PTR_ERR(c->sched); goto err; } c->sched->cpupool = c; c->gran = opt_sched_granularity; c->sched_gran = sched_granularity; spin_unlock(&cpupool_lock); debugtrace_printk("Created cpupool %u with scheduler %s (%s)\n", c->cpupool_id, c->sched->name, c->sched->opt_name); return c; err: list_del(&c->list); n_cpupools--; unlock: spin_unlock(&cpupool_lock); free_cpupool_struct(c); return ERR_PTR(ret); } /* * destroys the given cpupool * returns 0 on success, 1 else * possible failures: * - pool still in use * - cpus still assigned to pool */ static int cpupool_destroy(struct cpupool *c) { spin_lock(&cpupool_lock); if ( (c->n_dom != 0) || cpumask_weight(c->cpu_valid) ) { spin_unlock(&cpupool_lock); return -EBUSY; } n_cpupools--; list_del(&c->list); spin_unlock(&cpupool_lock); cpupool_put(c); debugtrace_printk("cpupool_destroy(pool=%u)\n", c->cpupool_id); return 0; } /* * Move domain to another cpupool */ static int cpupool_move_domain_locked(struct domain *d, struct cpupool *c) { int ret; if ( unlikely(d->cpupool == c) ) return 0; d->cpupool->n_dom--; ret = sched_move_domain(d, c); if ( ret ) d->cpupool->n_dom++; else c->n_dom++; return ret; } int cpupool_move_domain(struct domain *d, struct cpupool *c) { int ret; spin_lock(&cpupool_lock); ret = cpupool_move_domain_locked(d, c); spin_unlock(&cpupool_lock); return ret; } /* Update affinities of all domains in a cpupool. */ static void cpupool_update_node_affinity(const struct cpupool *c, struct affinity_masks *masks) { struct affinity_masks local_masks; struct domain *d; if ( !masks ) { if ( !alloc_affinity_masks(&local_masks) ) return; masks = &local_masks; } rcu_read_lock(&domlist_read_lock); for_each_domain_in_cpupool(d, c) domain_update_node_aff(d, masks); rcu_read_unlock(&domlist_read_lock); if ( masks == &local_masks ) free_affinity_masks(masks); } /* * assign a specific cpu to a cpupool * cpupool_lock must be held */ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu) { int ret; const cpumask_t *cpus; cpus = sched_get_opt_cpumask(c->gran, cpu); if ( (cpupool_moving_cpu == cpu) && (c != cpupool_cpu_moving) ) return -EADDRNOTAVAIL; ret = schedule_cpu_add(cpumask_first(cpus), c); if ( ret ) return ret; rcu_read_lock(&sched_res_rculock); cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus); if (cpupool_moving_cpu == cpu) { cpupool_moving_cpu = -1; cpupool_put(cpupool_cpu_moving); cpupool_cpu_moving = NULL; } cpumask_or(c->cpu_valid, c->cpu_valid, cpus); cpumask_and(c->res_valid, c->cpu_valid, &sched_res_mask); rcu_read_unlock(&sched_res_rculock); cpupool_update_node_affinity(c, NULL); return 0; } static int cpupool_unassign_cpu_finish(struct cpupool *c, struct cpu_rm_data *mem) { int cpu = cpupool_moving_cpu; const cpumask_t *cpus; struct affinity_masks *masks = mem ? &mem->affinity : NULL; int ret; if ( c != cpupool_cpu_moving ) return -EADDRNOTAVAIL; rcu_read_lock(&domlist_read_lock); ret = cpu_disable_scheduler(cpu); rcu_read_unlock(&domlist_read_lock); rcu_read_lock(&sched_res_rculock); cpus = get_sched_res(cpu)->cpus; cpumask_or(&cpupool_free_cpus, &cpupool_free_cpus, cpus); /* * cpu_disable_scheduler() returning an error doesn't require resetting * cpupool_free_cpus' cpu bit. All error cases should be of temporary * nature and tools will retry the operation. Even if the number of * retries may be limited, the in-between state can easily be repaired * by adding the cpu to the cpupool again. */ if ( !ret ) { ret = schedule_cpu_rm(cpu, mem); if ( ret ) cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus); else { cpupool_moving_cpu = -1; cpupool_put(cpupool_cpu_moving); cpupool_cpu_moving = NULL; } } rcu_read_unlock(&sched_res_rculock); cpupool_update_node_affinity(c, masks); return ret; } static int cpupool_unassign_cpu_start(struct cpupool *c, unsigned int cpu) { int ret; struct domain *d; const cpumask_t *cpus; spin_lock(&cpupool_lock); ret = -EADDRNOTAVAIL; if ( ((cpupool_moving_cpu != -1) || !cpumask_test_cpu(cpu, c->cpu_valid)) && (cpu != cpupool_moving_cpu) ) goto out; ret = 0; rcu_read_lock(&sched_res_rculock); cpus = get_sched_res(cpu)->cpus; if ( (c->n_dom > 0) && (cpumask_weight(c->cpu_valid) == cpumask_weight(cpus)) && (cpu != cpupool_moving_cpu) ) { rcu_read_lock(&domlist_read_lock); for_each_domain_in_cpupool(d, c) { if ( !d->is_dying && system_state == SYS_STATE_active ) { ret = -EBUSY; break; } ret = cpupool_move_domain_locked(d, cpupool0); if ( ret ) break; } rcu_read_unlock(&domlist_read_lock); if ( ret ) goto out_rcu; } cpupool_moving_cpu = cpu; atomic_inc(&c->refcnt); cpupool_cpu_moving = c; cpumask_andnot(c->cpu_valid, c->cpu_valid, cpus); cpumask_and(c->res_valid, c->cpu_valid, &sched_res_mask); out_rcu: rcu_read_unlock(&sched_res_rculock); out: spin_unlock(&cpupool_lock); return ret; } static long cf_check cpupool_unassign_cpu_helper(void *info) { struct cpupool *c = info; long ret; debugtrace_printk("cpupool_unassign_cpu(pool=%u,cpu=%d)\n", cpupool_cpu_moving->cpupool_id, cpupool_moving_cpu); spin_lock(&cpupool_lock); ret = cpupool_unassign_cpu_finish(c, NULL); spin_unlock(&cpupool_lock); debugtrace_printk("cpupool_unassign_cpu ret=%ld\n", ret); return ret; } /* * unassign a specific cpu from a cpupool * we must be sure not to run on the cpu to be unassigned! to achieve this * the main functionality is performed via continue_hypercall_on_cpu on a * specific cpu. * if the cpu to be removed is the last one of the cpupool no active domain * must be bound to the cpupool. dying domains are moved to cpupool0 as they * might be zombies. * possible failures: * - last cpu and still active domains in cpupool * - cpu just being unplugged * - Attempt to remove boot cpu from cpupool0 */ static int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu) { int work_cpu; int ret; unsigned int master_cpu; debugtrace_printk("cpupool_unassign_cpu(pool=%u,cpu=%d)\n", c->cpupool_id, cpu); /* * Cpu0 must remain in cpupool0, otherwise some operations like moving cpus * between cpupools, cpu hotplug, destroying cpupools, shutdown of the host, * might not work in a sane way. */ if ( (!c->cpupool_id && !cpu) || !cpu_online(cpu) ) return -EINVAL; master_cpu = sched_get_resource_cpu(cpu); ret = cpupool_unassign_cpu_start(c, master_cpu); if ( ret ) { debugtrace_printk("cpupool_unassign_cpu(pool=%u,cpu=%d) ret %d\n", c->cpupool_id, cpu, ret); return ret; } work_cpu = sched_get_resource_cpu(smp_processor_id()); if ( work_cpu == master_cpu ) { work_cpu = cpumask_first(cpupool0->cpu_valid); if ( work_cpu == master_cpu ) work_cpu = cpumask_last(cpupool0->cpu_valid); } return continue_hypercall_on_cpu(work_cpu, cpupool_unassign_cpu_helper, c); } /* * add a new domain to a cpupool * possible failures: * - pool does not exist * - no cpu assigned to pool */ int cpupool_add_domain(struct domain *d, unsigned int poolid) { struct cpupool *c; int rc; int n_dom = 0; spin_lock(&cpupool_lock); c = cpupool_find_by_id(poolid); if ( c == NULL ) rc = -ESRCH; else if ( !cpumask_weight(c->cpu_valid) ) rc = -ENODEV; else { c->n_dom++; n_dom = c->n_dom; d->cpupool = c; rc = 0; } spin_unlock(&cpupool_lock); debugtrace_printk("cpupool_add_domain(dom=%d,pool=%u) n_dom %d rc %d\n", d->domain_id, poolid, n_dom, rc); return rc; } /* * remove a domain from a cpupool */ void cpupool_rm_domain(struct domain *d) { unsigned int cpupool_id; int n_dom; if ( d->cpupool == NULL ) return; spin_lock(&cpupool_lock); cpupool_id = d->cpupool->cpupool_id; d->cpupool->n_dom--; n_dom = d->cpupool->n_dom; d->cpupool = NULL; spin_unlock(&cpupool_lock); debugtrace_printk("cpupool_rm_domain(dom=%d,pool=%u) n_dom %d\n", d->domain_id, cpupool_id, n_dom); return; } /* * Called to add a cpu to a pool. CPUs being hot-plugged are added to pool0, * as they must have been in there when unplugged. */ static int cpupool_cpu_add(unsigned int cpu) { int ret = 0; const cpumask_t *cpus; spin_lock(&cpupool_lock); cpumask_clear_cpu(cpu, &cpupool_locked_cpus); cpumask_set_cpu(cpu, &cpupool_free_cpus); /* * If we are not resuming, we are hot-plugging cpu, and in which case * we add it to pool0, as it certainly was there when hot-unplagged * (or unplugging would have failed) and that is the default behavior * anyway. */ rcu_read_lock(&sched_res_rculock); get_sched_res(cpu)->cpupool = NULL; cpus = sched_get_opt_cpumask(cpupool0->gran, cpu); if ( cpumask_subset(cpus, &cpupool_free_cpus) && cpumask_weight(cpus) == cpupool_get_granularity(cpupool0) ) ret = cpupool_assign_cpu_locked(cpupool0, cpu); rcu_read_unlock(&sched_res_rculock); spin_unlock(&cpupool_lock); return ret; } /* * This function is called in stop_machine context, so we can be sure no * non-idle vcpu is active on the system. */ static void cpupool_cpu_remove(unsigned int cpu, struct cpu_rm_data *mem) { int ret; ASSERT(is_idle_vcpu(current)); if ( !cpumask_test_cpu(cpu, &cpupool_free_cpus) ) { ret = cpupool_unassign_cpu_finish(cpupool0, mem); BUG_ON(ret); } cpumask_clear_cpu(cpu, &cpupool_free_cpus); } /* * Called before a CPU is being removed from the system. * Removing a CPU is allowed for free CPUs or CPUs in Pool-0 (those are moved * to free cpus actually before removing them). * The CPU is locked, to forbid adding it again to another cpupool. */ static int cpupool_cpu_remove_prologue(unsigned int cpu) { int ret = 0; cpumask_t *cpus; unsigned int master_cpu; spin_lock(&cpupool_lock); rcu_read_lock(&sched_res_rculock); cpus = get_sched_res(cpu)->cpus; master_cpu = sched_get_resource_cpu(cpu); if ( cpumask_intersects(cpus, &cpupool_locked_cpus) ) ret = -EBUSY; else cpumask_set_cpu(cpu, &cpupool_locked_cpus); rcu_read_unlock(&sched_res_rculock); spin_unlock(&cpupool_lock); if ( ret ) return ret; if ( cpumask_test_cpu(master_cpu, cpupool0->cpu_valid) ) { /* Cpupool0 is populated only after all cpus are up. */ ASSERT(system_state == SYS_STATE_active); ret = cpupool_unassign_cpu_start(cpupool0, master_cpu); } else if ( !cpumask_test_cpu(master_cpu, &cpupool_free_cpus) ) ret = -ENODEV; return ret; } /* * Called during resume for all cpus which didn't come up again. The cpu must * be removed from the cpupool it is assigned to. In case a cpupool will be * left without cpu we move all domains of that cpupool to cpupool0. * As we are called with all domains still frozen there is no need to take the * cpupool lock here. */ static void cpupool_cpu_remove_forced(unsigned int cpu) { struct cpupool *c; int ret; unsigned int master_cpu = sched_get_resource_cpu(cpu); list_for_each_entry(c, &cpupool_list, list) { if ( cpumask_test_cpu(master_cpu, c->cpu_valid) ) { ret = cpupool_unassign_cpu_start(c, master_cpu); BUG_ON(ret); ret = cpupool_unassign_cpu_finish(c, NULL); BUG_ON(ret); } } cpumask_clear_cpu(cpu, &cpupool_free_cpus); rcu_read_lock(&sched_res_rculock); sched_rm_cpu(cpu); rcu_read_unlock(&sched_res_rculock); } /* * do cpupool related sysctl operations */ int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op) { int ret = 0; struct cpupool *c; switch ( op->op ) { case XEN_SYSCTL_CPUPOOL_OP_CREATE: { unsigned int poolid; poolid = (op->cpupool_id == XEN_SYSCTL_CPUPOOL_PAR_ANY) ? CPUPOOLID_NONE: op->cpupool_id; c = cpupool_create(poolid, op->sched_id); if ( IS_ERR(c) ) ret = PTR_ERR(c); else { op->cpupool_id = c->cpupool_id; cpupool_put(c); } } break; case XEN_SYSCTL_CPUPOOL_OP_DESTROY: { c = cpupool_get_by_id(op->cpupool_id); ret = -ENOENT; if ( c == NULL ) break; ret = cpupool_destroy(c); cpupool_put(c); } break; case XEN_SYSCTL_CPUPOOL_OP_INFO: { c = cpupool_get_next_by_id(op->cpupool_id); ret = -ENOENT; if ( c == NULL ) break; op->cpupool_id = c->cpupool_id; op->sched_id = c->sched->sched_id; op->n_dom = c->n_dom; ret = cpumask_to_xenctl_bitmap(&op->cpumap, c->cpu_valid); cpupool_put(c); } break; case XEN_SYSCTL_CPUPOOL_OP_ADDCPU: { unsigned int cpu; const cpumask_t *cpus; cpu = op->cpu; debugtrace_printk("cpupool_assign_cpu(pool=%u,cpu=%u)\n", op->cpupool_id, cpu); spin_lock(&cpupool_lock); c = cpupool_find_by_id(op->cpupool_id); ret = -ENOENT; if ( c == NULL ) goto addcpu_out; if ( cpu == XEN_SYSCTL_CPUPOOL_PAR_ANY ) { for_each_cpu ( cpu, &cpupool_free_cpus ) { cpus = sched_get_opt_cpumask(c->gran, cpu); if ( cpumask_subset(cpus, &cpupool_free_cpus) ) break; } ret = -ENODEV; if ( cpu >= nr_cpu_ids ) goto addcpu_out; } ret = -EINVAL; if ( cpu >= nr_cpu_ids ) goto addcpu_out; ret = -ENODEV; cpus = sched_get_opt_cpumask(c->gran, cpu); if ( !cpumask_subset(cpus, &cpupool_free_cpus) || cpumask_intersects(cpus, &cpupool_locked_cpus) ) goto addcpu_out; ret = cpupool_assign_cpu_locked(c, cpu); addcpu_out: spin_unlock(&cpupool_lock); debugtrace_printk("cpupool_assign_cpu(pool=%u,cpu=%u) ret %d\n", op->cpupool_id, cpu, ret); } break; case XEN_SYSCTL_CPUPOOL_OP_RMCPU: { unsigned int cpu; c = cpupool_get_by_id(op->cpupool_id); ret = -ENOENT; if ( c == NULL ) break; cpu = op->cpu; if ( cpu == XEN_SYSCTL_CPUPOOL_PAR_ANY ) cpu = cpumask_last(c->cpu_valid); ret = (cpu < nr_cpu_ids) ? cpupool_unassign_cpu(c, cpu) : -EINVAL; cpupool_put(c); } break; case XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN: { struct domain *d; ret = rcu_lock_remote_domain_by_id(op->domid, &d); if ( ret ) break; if ( d->cpupool == NULL ) { ret = -EINVAL; rcu_unlock_domain(d); break; } if ( op->cpupool_id == d->cpupool->cpupool_id ) { ret = 0; rcu_unlock_domain(d); break; } debugtrace_printk("cpupool move_domain(dom=%d)->pool=%u\n", d->domain_id, op->cpupool_id); ret = -ENOENT; spin_lock(&cpupool_lock); c = cpupool_find_by_id(op->cpupool_id); if ( (c != NULL) && cpumask_weight(c->cpu_valid) ) ret = cpupool_move_domain_locked(d, c); spin_unlock(&cpupool_lock); debugtrace_printk("cpupool move_domain(dom=%d)->pool=%u ret %d\n", d->domain_id, op->cpupool_id, ret); rcu_unlock_domain(d); } break; case XEN_SYSCTL_CPUPOOL_OP_FREEINFO: { ret = cpumask_to_xenctl_bitmap( &op->cpumap, &cpupool_free_cpus); } break; default: ret = -ENOSYS; break; } return ret; } unsigned int cpupool_get_id(const struct domain *d) { return d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE; } const cpumask_t *cpupool_valid_cpus(const struct cpupool *pool) { return pool->cpu_valid; } void cf_check dump_runq(unsigned char key) { s_time_t now = NOW(); struct cpupool *c; spin_lock(&cpupool_lock); printk("sched_smt_power_savings: %s\n", sched_smt_power_savings? "enabled":"disabled"); printk("NOW=%"PRI_stime"\n", now); printk("Online Cpus: %*pbl\n", CPUMASK_PR(&cpu_online_map)); if ( !cpumask_empty(&cpupool_free_cpus) ) { printk("Free Cpus: %*pbl\n", CPUMASK_PR(&cpupool_free_cpus)); schedule_dump(NULL); } list_for_each_entry(c, &cpupool_list, list) { printk("Cpupool %u:\n", c->cpupool_id); printk("Cpus: %*pbl\n", CPUMASK_PR(c->cpu_valid)); sched_gran_print(c->gran, cpupool_get_granularity(c)); schedule_dump(c); } spin_unlock(&cpupool_lock); } static int cf_check cpu_callback( struct notifier_block *nfb, unsigned long action, void *hcpu) { static struct cpu_rm_data *mem; unsigned int cpu = (unsigned long)hcpu; int rc = 0; switch ( action ) { case CPU_DOWN_FAILED: if ( system_state <= SYS_STATE_active ) { if ( mem ) { free_cpu_rm_data(mem, cpu); mem = NULL; } rc = cpupool_cpu_add(cpu); } break; case CPU_ONLINE: if ( system_state <= SYS_STATE_active ) rc = cpupool_cpu_add(cpu); else sched_migrate_timers(cpu); break; case CPU_DOWN_PREPARE: /* Suspend/Resume don't change assignments of cpus to cpupools. */ if ( system_state <= SYS_STATE_active ) { rc = cpupool_cpu_remove_prologue(cpu); if ( !rc ) { ASSERT(!mem); mem = alloc_cpu_rm_data(cpu, true); rc = mem ? 0 : -ENOMEM; } } break; case CPU_DYING: /* Suspend/Resume don't change assignments of cpus to cpupools. */ if ( system_state <= SYS_STATE_active ) { ASSERT(mem); cpupool_cpu_remove(cpu, mem); } break; case CPU_DEAD: if ( system_state <= SYS_STATE_active ) { ASSERT(mem); free_cpu_rm_data(mem, cpu); mem = NULL; } break; case CPU_RESUME_FAILED: cpupool_cpu_remove_forced(cpu); break; default: break; } return notifier_from_errno(rc); } static struct notifier_block cpu_nfb = { .notifier_call = cpu_callback }; #ifdef CONFIG_HYPFS static HYPFS_DIR_INIT(cpupool_pooldir, "%u"); static int cf_check cpupool_dir_read( const struct hypfs_entry *entry, XEN_GUEST_HANDLE_PARAM(void) uaddr) { int ret = 0; struct cpupool *c; struct hypfs_dyndir_id *data; data = hypfs_get_dyndata(); list_for_each_entry(c, &cpupool_list, list) { data->id = c->cpupool_id; data->data = c; ret = hypfs_read_dyndir_id_entry(&cpupool_pooldir, c->cpupool_id, list_is_last(&c->list, &cpupool_list), &uaddr); if ( ret ) break; } return ret; } static unsigned int cf_check cpupool_dir_getsize( const struct hypfs_entry *entry) { const struct cpupool *c; unsigned int size = 0; list_for_each_entry(c, &cpupool_list, list) size += hypfs_dynid_entry_size(entry, c->cpupool_id); return size; } static const struct hypfs_entry *cf_check cpupool_dir_enter( const struct hypfs_entry *entry) { struct hypfs_dyndir_id *data; data = hypfs_alloc_dyndata(struct hypfs_dyndir_id); if ( !data ) return ERR_PTR(-ENOMEM); data->id = CPUPOOLID_NONE; spin_lock(&cpupool_lock); return entry; } static void cf_check cpupool_dir_exit(const struct hypfs_entry *entry) { spin_unlock(&cpupool_lock); hypfs_free_dyndata(); } static struct hypfs_entry *cf_check cpupool_dir_findentry( const struct hypfs_entry_dir *dir, const char *name, unsigned int name_len) { unsigned long id; const char *end; struct cpupool *cpupool; id = simple_strtoul(name, &end, 10); if ( end != name + name_len || id > UINT_MAX ) return ERR_PTR(-ENOENT); cpupool = __cpupool_find_by_id(id, true); if ( !cpupool ) return ERR_PTR(-ENOENT); return hypfs_gen_dyndir_id_entry(&cpupool_pooldir, id, cpupool); } static int cf_check cpupool_gran_read( const struct hypfs_entry *entry, XEN_GUEST_HANDLE_PARAM(void) uaddr) { const struct hypfs_dyndir_id *data; const struct cpupool *cpupool; const char *gran; data = hypfs_get_dyndata(); cpupool = data->data; ASSERT(cpupool); gran = sched_gran_get_name(cpupool->gran); if ( !*gran ) return -ENOENT; return copy_to_guest(uaddr, gran, strlen(gran) + 1) ? -EFAULT : 0; } static unsigned int cf_check hypfs_gran_getsize(const struct hypfs_entry *entry) { const struct hypfs_dyndir_id *data; const struct cpupool *cpupool; const char *gran; data = hypfs_get_dyndata(); cpupool = data->data; ASSERT(cpupool); gran = sched_gran_get_name(cpupool->gran); return strlen(gran) + 1; } static int cf_check cpupool_gran_write( struct hypfs_entry_leaf *leaf, XEN_GUEST_HANDLE_PARAM(const_void) uaddr, unsigned int ulen) { const struct hypfs_dyndir_id *data; struct cpupool *cpupool; enum sched_gran gran; unsigned int sched_gran = 0; char name[SCHED_GRAN_NAME_LEN]; int ret = 0; if ( ulen > SCHED_GRAN_NAME_LEN ) return -ENOSPC; if ( copy_from_guest(name, uaddr, ulen) ) return -EFAULT; if ( memchr(name, 0, ulen) == (name + ulen - 1) ) sched_gran = sched_gran_get(name, &gran) ? 0 : cpupool_check_granularity(gran); if ( sched_gran == 0 ) return -EINVAL; data = hypfs_get_dyndata(); cpupool = data->data; ASSERT(cpupool); /* Guarded by the cpupool_lock taken in cpupool_dir_enter(). */ if ( !cpumask_empty(cpupool->cpu_valid) ) ret = -EBUSY; else { cpupool->gran = gran; cpupool->sched_gran = sched_gran; } return ret; } static const struct hypfs_funcs cpupool_gran_funcs = { .enter = hypfs_node_enter, .exit = hypfs_node_exit, .read = cpupool_gran_read, .write = cpupool_gran_write, .getsize = hypfs_gran_getsize, .findentry = hypfs_leaf_findentry, }; static HYPFS_VARSIZE_INIT(cpupool_gran, XEN_HYPFS_TYPE_STRING, "sched-gran", SCHED_GRAN_NAME_LEN, &cpupool_gran_funcs); static char granstr[SCHED_GRAN_NAME_LEN] = { [0 ... SCHED_GRAN_NAME_LEN - 2] = '?', [SCHED_GRAN_NAME_LEN - 1] = 0 }; static const struct hypfs_funcs cpupool_dir_funcs = { .enter = cpupool_dir_enter, .exit = cpupool_dir_exit, .read = cpupool_dir_read, .write = hypfs_write_deny, .getsize = cpupool_dir_getsize, .findentry = cpupool_dir_findentry, }; static HYPFS_DIR_INIT_FUNC(cpupool_dir, "cpupool", &cpupool_dir_funcs); static void cpupool_hypfs_init(void) { hypfs_add_dir(&hypfs_root, &cpupool_dir, true); hypfs_add_dyndir(&cpupool_dir, &cpupool_pooldir); hypfs_string_set_reference(&cpupool_gran, granstr); hypfs_add_leaf(&cpupool_pooldir, &cpupool_gran, true); } #else /* CONFIG_HYPFS */ static void cpupool_hypfs_init(void) { } #endif /* CONFIG_HYPFS */ struct cpupool *__init cpupool_create_pool(unsigned int pool_id, int sched_id) { struct cpupool *pool; if ( sched_id < 0 ) sched_id = scheduler_get_default()->sched_id; pool = cpupool_create(pool_id, sched_id); BUG_ON(IS_ERR(pool)); cpupool_put(pool); return pool; } static int __init cf_check cpupool_init(void) { unsigned int cpu; cpupool_gran_init(); cpupool_hypfs_init(); register_cpu_notifier(&cpu_nfb); btcpupools_dtb_parse(); btcpupools_allocate_pools(); spin_lock(&cpupool_lock); cpumask_copy(&cpupool_free_cpus, &cpu_online_map); for_each_cpu ( cpu, &cpupool_free_cpus ) { unsigned int pool_id = btcpupools_get_cpupool_id(cpu); struct cpupool *pool = cpupool_find_by_id(pool_id); ASSERT(pool); cpupool_assign_cpu_locked(pool, cpu); } spin_unlock(&cpupool_lock); return 0; } __initcall(cpupool_init); /* * Local variables: * mode: C * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */