/* * xen/common/sched_null.c * * Copyright (c) 2017, Dario Faggioli, Citrix Ltd * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License v2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; If not, see . */ /* * The 'null' scheduler always choose to run, on each pCPU, either nothing * (i.e., the pCPU stays idle) or always the same unit. * * It is aimed at supporting static scenarios, where there always are * less units than pCPUs (and the units don't need to move among pCPUs * for any reason) with the least possible overhead. * * Typical usecase are embedded applications, but also HPC, especially * if the scheduler is used inside a cpupool. */ #include #include #include #include "private.h" /* * null tracing events. Check include/public/trace.h for more details. */ #define TRC_SNULL_PICKED_CPU TRC_SCHED_CLASS_EVT(SNULL, 1) #define TRC_SNULL_UNIT_ASSIGN TRC_SCHED_CLASS_EVT(SNULL, 2) #define TRC_SNULL_UNIT_DEASSIGN TRC_SCHED_CLASS_EVT(SNULL, 3) #define TRC_SNULL_MIGRATE TRC_SCHED_CLASS_EVT(SNULL, 4) #define TRC_SNULL_SCHEDULE TRC_SCHED_CLASS_EVT(SNULL, 5) #define TRC_SNULL_TASKLET TRC_SCHED_CLASS_EVT(SNULL, 6) /* * Locking: * - Scheduler-lock (a.k.a. runqueue lock): * + is per-pCPU; * + serializes assignment and deassignment of units to a pCPU. * - Private data lock (a.k.a. private scheduler lock): * + is scheduler-wide; * + serializes accesses to the list of domains in this scheduler. * - Waitqueue lock: * + is scheduler-wide; * + serialize accesses to the list of units waiting to be assigned * to pCPUs. * * Ordering is: private lock, runqueue lock, waitqueue lock. Or, OTOH, * waitqueue lock nests inside runqueue lock which nests inside private * lock. More specifically: * + if we need both runqueue and private locks, we must acquire the * private lock for first; * + if we need both runqueue and waitqueue locks, we must acquire * the runqueue lock for first; * + if we need both private and waitqueue locks, we must acquire * the private lock for first; * + if we already own a runqueue lock, we must never acquire * the private lock; * + if we already own the waitqueue lock, we must never acquire * the runqueue lock or the private lock. */ /* * System-wide private data */ struct null_private { spinlock_t lock; /* scheduler lock; nests inside cpupool_lock */ struct list_head ndom; /* Domains of this scheduler */ struct list_head waitq; /* units not assigned to any pCPU */ spinlock_t waitq_lock; /* serializes waitq; nests inside runq locks */ cpumask_t cpus_free; /* CPUs without a unit associated to them */ }; /* * Physical CPU */ struct null_pcpu { struct sched_unit *unit; }; /* * Schedule unit */ struct null_unit { struct list_head waitq_elem; struct sched_unit *unit; }; /* * Domain */ struct null_dom { struct list_head ndom_elem; struct domain *dom; }; /* * Accessor helpers functions */ static inline struct null_private *null_priv(const struct scheduler *ops) { return ops->sched_data; } static inline struct null_unit *null_unit(const struct sched_unit *unit) { return unit->priv; } static inline bool unit_check_affinity(struct sched_unit *unit, unsigned int cpu, unsigned int balance_step) { affinity_balance_cpumask(unit, balance_step, cpumask_scratch_cpu(cpu)); cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu), cpupool_domain_master_cpumask(unit->domain)); return cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu)); } static int cf_check null_init(struct scheduler *ops) { struct null_private *prv; printk("Initializing null scheduler\n" "WARNING: This is experimental software in development.\n" "Use at your own risk.\n"); prv = xzalloc(struct null_private); if ( prv == NULL ) return -ENOMEM; spin_lock_init(&prv->lock); spin_lock_init(&prv->waitq_lock); INIT_LIST_HEAD(&prv->ndom); INIT_LIST_HEAD(&prv->waitq); ops->sched_data = prv; return 0; } static void cf_check null_deinit(struct scheduler *ops) { xfree(ops->sched_data); ops->sched_data = NULL; } static void init_pdata(struct null_private *prv, struct null_pcpu *npc, unsigned int cpu) { /* Mark the pCPU as free, and with no unit assigned */ cpumask_set_cpu(cpu, &prv->cpus_free); npc->unit = NULL; } static void cf_check null_deinit_pdata( const struct scheduler *ops, void *pcpu, int cpu) { struct null_private *prv = null_priv(ops); struct null_pcpu *npc = pcpu; ASSERT(npc); cpumask_clear_cpu(cpu, &prv->cpus_free); npc->unit = NULL; } static void *cf_check null_alloc_pdata(const struct scheduler *ops, int cpu) { struct null_pcpu *npc; npc = xzalloc(struct null_pcpu); if ( npc == NULL ) return ERR_PTR(-ENOMEM); return npc; } static void cf_check null_free_pdata( const struct scheduler *ops, void *pcpu, int cpu) { xfree(pcpu); } static void *cf_check null_alloc_udata( const struct scheduler *ops, struct sched_unit *unit, void *dd) { struct null_unit *nvc; nvc = xzalloc(struct null_unit); if ( nvc == NULL ) return NULL; INIT_LIST_HEAD(&nvc->waitq_elem); nvc->unit = unit; SCHED_STAT_CRANK(unit_alloc); return nvc; } static void cf_check null_free_udata(const struct scheduler *ops, void *priv) { struct null_unit *nvc = priv; xfree(nvc); } static void *cf_check null_alloc_domdata( const struct scheduler *ops, struct domain *d) { struct null_private *prv = null_priv(ops); struct null_dom *ndom; unsigned long flags; ndom = xzalloc(struct null_dom); if ( ndom == NULL ) return ERR_PTR(-ENOMEM); ndom->dom = d; spin_lock_irqsave(&prv->lock, flags); list_add_tail(&ndom->ndom_elem, &null_priv(ops)->ndom); spin_unlock_irqrestore(&prv->lock, flags); return ndom; } static void cf_check null_free_domdata(const struct scheduler *ops, void *data) { struct null_dom *ndom = data; struct null_private *prv = null_priv(ops); if ( ndom ) { unsigned long flags; spin_lock_irqsave(&prv->lock, flags); list_del_init(&ndom->ndom_elem); spin_unlock_irqrestore(&prv->lock, flags); xfree(ndom); } } /* * unit to pCPU assignment and placement. This _only_ happens: * - on insert, * - on migrate. * * Insert occurs when a unit joins this scheduler for the first time * (e.g., when the domain it's part of is moved to the scheduler's * cpupool). * * Migration may be necessary if a pCPU (with a unit assigned to it) * is removed from the scheduler's cpupool. * * So this is not part of any hot path. */ static struct sched_resource * pick_res(const struct null_private *prv, const struct sched_unit *unit) { unsigned int bs; unsigned int cpu = sched_unit_master(unit), new_cpu; const cpumask_t *cpus = cpupool_domain_master_cpumask(unit->domain); const struct null_pcpu *npc = get_sched_res(cpu)->sched_priv; ASSERT(spin_is_locked(get_sched_res(cpu)->schedule_lock)); for_each_affinity_balance_step( bs ) { if ( bs == BALANCE_SOFT_AFFINITY && !has_soft_affinity(unit) ) continue; affinity_balance_cpumask(unit, bs, cpumask_scratch_cpu(cpu)); cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu), cpus); /* * If our processor is free, or we are assigned to it, and it is also * still valid and part of our affinity, just go for it. * (Note that we may call unit_check_affinity(), but we deliberately * don't, so we get to keep in the scratch cpumask what we have just * put in it.) */ if ( likely((npc->unit == NULL || npc->unit == unit) && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu))) ) { new_cpu = cpu; goto out; } /* If not, just go for a free pCPU, within our affinity, if any */ cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu), &prv->cpus_free); new_cpu = cpumask_first(cpumask_scratch_cpu(cpu)); if ( likely(new_cpu != nr_cpu_ids) ) goto out; } /* * If we didn't find any free pCPU, just pick any valid pcpu, even if * it has another unit assigned. This will happen during shutdown and * suspend/resume, but it may also happen during "normal operation", if * all the pCPUs are busy. * * In fact, there must always be something sane in v->processor, or * unit_schedule_lock() and friends won't work. This is not a problem, * as we will actually assign the unit to the pCPU we return from here, * only if the pCPU is free. */ cpumask_and(cpumask_scratch_cpu(cpu), cpus, unit->cpu_hard_affinity); new_cpu = cpumask_any(cpumask_scratch_cpu(cpu)); out: if ( unlikely(tb_init_done) ) { struct { uint16_t unit, dom; uint32_t new_cpu; } d; d.dom = unit->domain->domain_id; d.unit = unit->unit_id; d.new_cpu = new_cpu; __trace_var(TRC_SNULL_PICKED_CPU, 1, sizeof(d), &d); } return get_sched_res(new_cpu); } static void unit_assign(struct null_private *prv, struct sched_unit *unit, unsigned int cpu) { struct null_pcpu *npc = get_sched_res(cpu)->sched_priv; ASSERT(is_unit_online(unit)); npc->unit = unit; sched_set_res(unit, get_sched_res(cpu)); cpumask_clear_cpu(cpu, &prv->cpus_free); dprintk(XENLOG_G_INFO, "%d <-- %pdv%d\n", cpu, unit->domain, unit->unit_id); if ( unlikely(tb_init_done) ) { struct { uint16_t unit, dom; uint32_t cpu; } d; d.dom = unit->domain->domain_id; d.unit = unit->unit_id; d.cpu = cpu; __trace_var(TRC_SNULL_UNIT_ASSIGN, 1, sizeof(d), &d); } } /* Returns true if a cpu was tickled */ static bool unit_deassign(struct null_private *prv, const struct sched_unit *unit) { unsigned int bs; unsigned int cpu = sched_unit_master(unit); struct null_unit *wvc; struct null_pcpu *npc = get_sched_res(cpu)->sched_priv; ASSERT(list_empty(&null_unit(unit)->waitq_elem)); ASSERT(npc->unit == unit); ASSERT(!cpumask_test_cpu(cpu, &prv->cpus_free)); npc->unit = NULL; cpumask_set_cpu(cpu, &prv->cpus_free); dprintk(XENLOG_G_INFO, "%d <-- NULL (%pdv%d)\n", cpu, unit->domain, unit->unit_id); if ( unlikely(tb_init_done) ) { struct { uint16_t unit, dom; uint32_t cpu; } d; d.dom = unit->domain->domain_id; d.unit = unit->unit_id; d.cpu = cpu; __trace_var(TRC_SNULL_UNIT_DEASSIGN, 1, sizeof(d), &d); } spin_lock(&prv->waitq_lock); /* * If unit is assigned to a pCPU, let's see if there is someone waiting, * suitable to be assigned to it (prioritizing units that have * soft-affinity with cpu). */ for_each_affinity_balance_step( bs ) { list_for_each_entry( wvc, &prv->waitq, waitq_elem ) { if ( bs == BALANCE_SOFT_AFFINITY && !has_soft_affinity(wvc->unit) ) continue; if ( unit_check_affinity(wvc->unit, cpu, bs) ) { list_del_init(&wvc->waitq_elem); unit_assign(prv, wvc->unit, cpu); cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); spin_unlock(&prv->waitq_lock); return true; } } } spin_unlock(&prv->waitq_lock); return false; } /* Change the scheduler of cpu to us (null). */ static spinlock_t *cf_check null_switch_sched( struct scheduler *new_ops, unsigned int cpu, void *pdata, void *vdata) { struct sched_resource *sr = get_sched_res(cpu); struct null_private *prv = null_priv(new_ops); const struct null_unit *nvc = vdata; ASSERT(nvc && is_idle_unit(nvc->unit)); sched_idle_unit(cpu)->priv = vdata; /* * We are holding the runqueue lock already (it's been taken in * schedule_cpu_switch()). It actually may or may not be the 'right' * one for this cpu, but that is ok for preventing races. */ ASSERT(!local_irq_is_enabled()); init_pdata(prv, pdata, cpu); return &sr->_lock; } static void cf_check null_unit_insert( const struct scheduler *ops, struct sched_unit *unit) { struct null_private *prv = null_priv(ops); struct null_unit *nvc = null_unit(unit); struct null_pcpu *npc; unsigned int cpu; spinlock_t *lock; ASSERT(!is_idle_unit(unit)); lock = unit_schedule_lock_irq(unit); if ( unlikely(!is_unit_online(unit)) ) { unit_schedule_unlock_irq(lock, unit); return; } retry: sched_set_res(unit, pick_res(prv, unit)); cpu = sched_unit_master(unit); npc = get_sched_res(cpu)->sched_priv; spin_unlock(lock); lock = unit_schedule_lock(unit); cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity, cpupool_domain_master_cpumask(unit->domain)); /* If the pCPU is free, we assign unit to it */ if ( likely(npc->unit == NULL) ) { /* * Insert is followed by vcpu_wake(), so there's no need to poke * the pcpu with the SCHEDULE_SOFTIRQ, as wake will do that. */ unit_assign(prv, unit, cpu); } else if ( cpumask_intersects(&prv->cpus_free, cpumask_scratch_cpu(cpu)) ) { /* * If the pCPU is not free (e.g., because we raced with another * insert or a migrate), but there are other free pCPUs, we can * try to pick again. */ goto retry; } else { /* * If the pCPU is not free, and there aren't any (valid) others, * we have no alternatives than to go into the waitqueue. */ spin_lock(&prv->waitq_lock); list_add_tail(&nvc->waitq_elem, &prv->waitq); dprintk(XENLOG_G_WARNING, "WARNING: %pdv%d not assigned to any CPU!\n", unit->domain, unit->unit_id); spin_unlock(&prv->waitq_lock); } spin_unlock_irq(lock); SCHED_STAT_CRANK(unit_insert); } static void cf_check null_unit_remove( const struct scheduler *ops, struct sched_unit *unit) { struct null_private *prv = null_priv(ops); struct null_unit *nvc = null_unit(unit); spinlock_t *lock; ASSERT(!is_idle_unit(unit)); lock = unit_schedule_lock_irq(unit); /* If offline, the unit shouldn't be assigned, nor in the waitqueue */ if ( unlikely(!is_unit_online(unit)) ) { struct null_pcpu *npc; npc = unit->res->sched_priv; ASSERT(npc->unit != unit); ASSERT(list_empty(&nvc->waitq_elem)); goto out; } /* If unit is in waitqueue, just get it out of there and bail */ if ( unlikely(!list_empty(&nvc->waitq_elem)) ) { spin_lock(&prv->waitq_lock); list_del_init(&nvc->waitq_elem); spin_unlock(&prv->waitq_lock); goto out; } unit_deassign(prv, unit); out: unit_schedule_unlock_irq(lock, unit); SCHED_STAT_CRANK(unit_remove); } static void cf_check null_unit_wake( const struct scheduler *ops, struct sched_unit *unit) { struct null_private *prv = null_priv(ops); struct null_unit *nvc = null_unit(unit); unsigned int cpu = sched_unit_master(unit); struct null_pcpu *npc = get_sched_res(cpu)->sched_priv; ASSERT(!is_idle_unit(unit)); if ( unlikely(curr_on_cpu(sched_unit_master(unit)) == unit) ) { SCHED_STAT_CRANK(unit_wake_running); return; } if ( unlikely(!list_empty(&nvc->waitq_elem)) ) { /* Not exactly "on runq", but close enough for reusing the counter */ SCHED_STAT_CRANK(unit_wake_onrunq); return; } if ( likely(unit_runnable(unit)) ) SCHED_STAT_CRANK(unit_wake_runnable); else SCHED_STAT_CRANK(unit_wake_not_runnable); if ( likely(npc->unit == unit) ) { cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); return; } /* * If a unit is neither on a pCPU nor in the waitqueue, it means it was * offline, and that it is now coming back being online. If we're lucky, * and its previous resource is free (and affinities match), we can just * assign the unit to it (we own the proper lock already) and be done. */ if ( npc->unit == NULL && unit_check_affinity(unit, cpu, BALANCE_HARD_AFFINITY) ) { if ( !has_soft_affinity(unit) || unit_check_affinity(unit, cpu, BALANCE_SOFT_AFFINITY) ) { unit_assign(prv, unit, cpu); cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); return; } } /* * If the resource is not free (or affinities do not match) we need * to assign unit to some other one, but we can't do it here, as: * - we don't own the proper lock, * - we can't change v->processor under vcpu_wake()'s feet. * So we add it to the waitqueue, and tickle all the free CPUs (if any) * on which unit can run. The first one that schedules will pick it up. */ spin_lock(&prv->waitq_lock); list_add_tail(&nvc->waitq_elem, &prv->waitq); spin_unlock(&prv->waitq_lock); cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity, cpupool_domain_master_cpumask(unit->domain)); cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu), &prv->cpus_free); if ( cpumask_empty(cpumask_scratch_cpu(cpu)) ) dprintk(XENLOG_G_WARNING, "WARNING: d%dv%d not assigned to any CPU!\n", unit->domain->domain_id, unit->unit_id); else cpumask_raise_softirq(cpumask_scratch_cpu(cpu), SCHEDULE_SOFTIRQ); } static void cf_check null_unit_sleep( const struct scheduler *ops, struct sched_unit *unit) { struct null_private *prv = null_priv(ops); unsigned int cpu = sched_unit_master(unit); struct null_pcpu *npc = get_sched_res(cpu)->sched_priv; bool tickled = false; ASSERT(!is_idle_unit(unit)); /* * Check if the unit is in the process of being offlined. If yes, * we need to remove it from either its pCPU or the waitqueue. */ if ( unlikely(!is_unit_online(unit)) ) { struct null_unit *nvc = null_unit(unit); if ( unlikely(!list_empty(&nvc->waitq_elem)) ) { spin_lock(&prv->waitq_lock); list_del_init(&nvc->waitq_elem); spin_unlock(&prv->waitq_lock); } else if ( npc->unit == unit ) tickled = unit_deassign(prv, unit); } /* If unit is not assigned to a pCPU, or is not running, no need to bother */ if ( likely(!tickled && curr_on_cpu(cpu) == unit) ) cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); SCHED_STAT_CRANK(unit_sleep); } static struct sched_resource *cf_check null_res_pick(const struct scheduler *ops, const struct sched_unit *unit) { ASSERT(!is_idle_unit(unit)); return pick_res(null_priv(ops), unit); } static void cf_check null_unit_migrate( const struct scheduler *ops, struct sched_unit *unit, unsigned int new_cpu) { struct null_private *prv = null_priv(ops); struct null_unit *nvc = null_unit(unit); struct null_pcpu *npc; ASSERT(!is_idle_unit(unit)); if ( sched_unit_master(unit) == new_cpu ) return; if ( unlikely(tb_init_done) ) { struct { uint16_t unit, dom; uint16_t cpu, new_cpu; } d; d.dom = unit->domain->domain_id; d.unit = unit->unit_id; d.cpu = sched_unit_master(unit); d.new_cpu = new_cpu; __trace_var(TRC_SNULL_MIGRATE, 1, sizeof(d), &d); } /* * If unit is assigned to a pCPU, then such pCPU becomes free, and we * should look in the waitqueue if anyone else can be assigned to it. */ npc = unit->res->sched_priv; if ( likely(npc->unit == unit) ) { unit_deassign(prv, unit); SCHED_STAT_CRANK(migrate_running); } else if ( !list_empty(&nvc->waitq_elem) ) SCHED_STAT_CRANK(migrate_on_runq); SCHED_STAT_CRANK(migrated); /* * If a unit is (going) offline, we want it to be neither assigned * to a pCPU, nor in the waitqueue. * * If it was on a cpu, we've removed it from there above. If it is * in the waitqueue, we remove it from there now. And then we bail. */ if ( unlikely(!is_unit_online(unit)) ) { spin_lock(&prv->waitq_lock); list_del_init(&nvc->waitq_elem); spin_unlock(&prv->waitq_lock); goto out; } /* * Let's now consider new_cpu, which is where unit is being sent. It can be * either free, or have a unit already assigned to it. * * In the former case we should assign unit to it, and try to get it to run, * if possible, according to affinity. * * In latter, all we can do is to park unit in the waitqueue. */ npc = get_sched_res(new_cpu)->sched_priv; if ( npc->unit == NULL && unit_check_affinity(unit, new_cpu, BALANCE_HARD_AFFINITY) ) { /* unit might have been in the waitqueue, so remove it */ spin_lock(&prv->waitq_lock); list_del_init(&nvc->waitq_elem); spin_unlock(&prv->waitq_lock); unit_assign(prv, unit, new_cpu); } else { /* Put unit in the waitqueue, if it wasn't there already */ spin_lock(&prv->waitq_lock); if ( list_empty(&nvc->waitq_elem) ) { list_add_tail(&nvc->waitq_elem, &prv->waitq); dprintk(XENLOG_G_WARNING, "WARNING: %pdv%d not assigned to any CPU!\n", unit->domain, unit->unit_id); } spin_unlock(&prv->waitq_lock); } /* * Whatever all the above, we always at least override v->processor. * This is especially important for shutdown or suspend/resume paths, * when it is important to let our caller (cpu_disable_scheduler()) * know that the migration did happen, to the best of our possibilities, * at least. In case of suspend, any temporary inconsistency caused * by this, will be fixed-up during resume. */ out: sched_set_res(unit, get_sched_res(new_cpu)); } #ifndef NDEBUG static inline void null_unit_check(struct sched_unit *unit) { struct null_unit * const nvc = null_unit(unit); struct null_dom * const ndom = unit->domain->sched_priv; BUG_ON(nvc->unit != unit); if ( ndom ) BUG_ON(is_idle_unit(unit)); else BUG_ON(!is_idle_unit(unit)); SCHED_STAT_CRANK(unit_check); } #define NULL_UNIT_CHECK(unit) (null_unit_check(unit)) #else #define NULL_UNIT_CHECK(unit) #endif /* * The most simple scheduling function of all times! We either return: * - the unit assigned to the pCPU, if there's one and it can run; * - the idle unit, otherwise. */ static void cf_check null_schedule( const struct scheduler *ops, struct sched_unit *prev, s_time_t now, bool tasklet_work_scheduled) { unsigned int bs; const unsigned int cur_cpu = smp_processor_id(); const unsigned int sched_cpu = sched_get_resource_cpu(cur_cpu); struct null_pcpu *npc = get_sched_res(sched_cpu)->sched_priv; struct null_private *prv = null_priv(ops); struct null_unit *wvc; SCHED_STAT_CRANK(schedule); NULL_UNIT_CHECK(current->sched_unit); if ( unlikely(tb_init_done) ) { struct { uint16_t tasklet, cpu; int16_t unit, dom; } d; d.cpu = cur_cpu; d.tasklet = tasklet_work_scheduled; if ( npc->unit == NULL ) { d.unit = d.dom = -1; } else { d.unit = npc->unit->unit_id; d.dom = npc->unit->domain->domain_id; } __trace_var(TRC_SNULL_SCHEDULE, 1, sizeof(d), &d); } if ( tasklet_work_scheduled ) { trace_var(TRC_SNULL_TASKLET, 1, 0, NULL); prev->next_task = sched_idle_unit(sched_cpu); } else prev->next_task = npc->unit; prev->next_time = -1; /* * We may be new in the cpupool, or just coming back online. In which * case, there may be units in the waitqueue that we can assign to us * and run. */ if ( unlikely(prev->next_task == NULL) ) { bool unit_found; spin_lock(&prv->waitq_lock); if ( list_empty(&prv->waitq) ) goto unlock; /* * We scan the waitqueue twice, for prioritizing units that have * soft-affinity with cpu. This may look like something expensive to * do here in null_schedule(), but it's actually fine, because we do * it only in cases where a pcpu has no unit associated (e.g., as * said above, the cpu has just joined a cpupool). */ unit_found = false; for_each_affinity_balance_step( bs ) { list_for_each_entry( wvc, &prv->waitq, waitq_elem ) { if ( bs == BALANCE_SOFT_AFFINITY && !has_soft_affinity(wvc->unit) ) continue; if ( unit_check_affinity(wvc->unit, sched_cpu, bs) ) { spinlock_t *lock; unit_found = true; /* * If the unit in the waitqueue has just come up online, * we risk racing with vcpu_wake(). To avoid this, sync * on the spinlock that vcpu_wake() holds, but only with * trylock, to avoid deadlock). */ lock = pcpu_schedule_trylock(sched_unit_master(wvc->unit)); /* * We know the vcpu's lock is not this resource's lock. In * fact, if it were, since this cpu is free, vcpu_wake() * would have assigned the unit to here directly. */ ASSERT(lock != get_sched_res(sched_cpu)->schedule_lock); if ( lock ) { unit_assign(prv, wvc->unit, sched_cpu); list_del_init(&wvc->waitq_elem); prev->next_task = wvc->unit; spin_unlock(lock); goto unlock; } } } } /* * If we did find a unit with suitable affinity in the waitqueue, but * we could not pick it up (due to lock contention), and hence we are * still free, plan for another try. In fact, we don't want such unit * to be stuck in the waitqueue, when there are free cpus where it * could run. */ if ( unlikely( unit_found && prev->next_task == NULL && !list_empty(&prv->waitq)) ) cpu_raise_softirq(cur_cpu, SCHEDULE_SOFTIRQ); unlock: spin_unlock(&prv->waitq_lock); if ( prev->next_task == NULL && !cpumask_test_cpu(sched_cpu, &prv->cpus_free) ) cpumask_set_cpu(sched_cpu, &prv->cpus_free); } if ( unlikely(prev->next_task == NULL || !unit_runnable_state(prev->next_task)) ) prev->next_task = sched_idle_unit(sched_cpu); NULL_UNIT_CHECK(prev->next_task); prev->next_task->migrated = false; } static inline void dump_unit(const struct null_private *prv, const struct null_unit *nvc) { printk("[%i.%i] pcpu=%d", nvc->unit->domain->domain_id, nvc->unit->unit_id, list_empty(&nvc->waitq_elem) ? sched_unit_master(nvc->unit) : -1); } static void cf_check null_dump_pcpu(const struct scheduler *ops, int cpu) { struct null_private *prv = null_priv(ops); const struct null_pcpu *npc = get_sched_res(cpu)->sched_priv; const struct null_unit *nvc; spinlock_t *lock; unsigned long flags; lock = pcpu_schedule_lock_irqsave(cpu, &flags); printk("CPU[%02d] sibling={%*pbl}, core={%*pbl}", cpu, CPUMASK_PR(per_cpu(cpu_sibling_mask, cpu)), CPUMASK_PR(per_cpu(cpu_core_mask, cpu))); if ( npc->unit != NULL ) printk(", unit=%pdv%d", npc->unit->domain, npc->unit->unit_id); printk("\n"); /* current unit (nothing to say if that's the idle unit) */ nvc = null_unit(curr_on_cpu(cpu)); if ( nvc && !is_idle_unit(nvc->unit) ) { printk("\trun: "); dump_unit(prv, nvc); printk("\n"); } pcpu_schedule_unlock_irqrestore(lock, flags, cpu); } static void cf_check null_dump(const struct scheduler *ops) { struct null_private *prv = null_priv(ops); struct list_head *iter; unsigned long flags; unsigned int loop; spin_lock_irqsave(&prv->lock, flags); printk("\tcpus_free = %*pbl\n", CPUMASK_PR(&prv->cpus_free)); printk("Domain info:\n"); loop = 0; list_for_each( iter, &prv->ndom ) { struct null_dom *ndom; struct sched_unit *unit; ndom = list_entry(iter, struct null_dom, ndom_elem); printk("\tDomain: %d\n", ndom->dom->domain_id); for_each_sched_unit( ndom->dom, unit ) { struct null_unit * const nvc = null_unit(unit); spinlock_t *lock; lock = unit_schedule_lock(unit); printk("\t%3d: ", ++loop); dump_unit(prv, nvc); printk("\n"); unit_schedule_unlock(lock, unit); } } printk("Waitqueue: "); loop = 0; spin_lock(&prv->waitq_lock); list_for_each( iter, &prv->waitq ) { struct null_unit *nvc = list_entry(iter, struct null_unit, waitq_elem); if ( loop++ != 0 ) printk(", "); if ( loop % 24 == 0 ) printk("\n\t"); printk("%pdv%d", nvc->unit->domain, nvc->unit->unit_id); } printk("\n"); spin_unlock(&prv->waitq_lock); spin_unlock_irqrestore(&prv->lock, flags); } static const struct scheduler sched_null_def = { .name = "null Scheduler", .opt_name = "null", .sched_id = XEN_SCHEDULER_NULL, .sched_data = NULL, .init = null_init, .deinit = null_deinit, .alloc_pdata = null_alloc_pdata, .free_pdata = null_free_pdata, .switch_sched = null_switch_sched, .deinit_pdata = null_deinit_pdata, .alloc_udata = null_alloc_udata, .free_udata = null_free_udata, .alloc_domdata = null_alloc_domdata, .free_domdata = null_free_domdata, .insert_unit = null_unit_insert, .remove_unit = null_unit_remove, .wake = null_unit_wake, .sleep = null_unit_sleep, .pick_resource = null_res_pick, .migrate = null_unit_migrate, .do_schedule = null_schedule, .dump_cpu_state = null_dump_pcpu, .dump_settings = null_dump, }; REGISTER_SCHEDULER(sched_null_def);