/******************************************************************************
* event_channel.c
*
* Event notifications from VIRQs, PIRQs, and other domains.
*
* Copyright (c) 2003-2006, K A Fraser.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; If not, see .
*/
#include "event_channel.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef CONFIG_PV_SHIM
#include
#endif
#define ERROR_EXIT(_errno) \
do { \
gdprintk(XENLOG_WARNING, \
"EVTCHNOP failure: error %d\n", \
(_errno)); \
rc = (_errno); \
goto out; \
} while ( 0 )
#define ERROR_EXIT_DOM(_errno, _dom) \
do { \
gdprintk(XENLOG_WARNING, \
"EVTCHNOP failure: domain %d, error %d\n", \
(_dom)->domain_id, (_errno)); \
rc = (_errno); \
goto out; \
} while ( 0 )
#define consumer_is_xen(e) (!!(e)->xen_consumer)
/*
* Lock an event channel exclusively. This is allowed only when the channel is
* free or unbound either when taking or when releasing the lock, as any
* concurrent operation on the event channel using evtchn_read_trylock() will
* just assume the event channel is free or unbound at the moment when the
* evtchn_read_trylock() returns false.
*/
static inline void evtchn_write_lock(struct evtchn *evtchn)
{
write_lock(&evtchn->lock);
#ifndef NDEBUG
evtchn->old_state = evtchn->state;
#endif
}
static inline unsigned int old_state(const struct evtchn *evtchn)
{
#ifndef NDEBUG
return evtchn->old_state;
#else
return ECS_RESERVED; /* Just to allow things to build. */
#endif
}
static inline void evtchn_write_unlock(struct evtchn *evtchn)
{
/* Enforce lock discipline. */
ASSERT(old_state(evtchn) == ECS_FREE || old_state(evtchn) == ECS_UNBOUND ||
evtchn->state == ECS_FREE || evtchn->state == ECS_UNBOUND);
write_unlock(&evtchn->lock);
}
/*
* The function alloc_unbound_xen_event_channel() allows an arbitrary
* notifier function to be specified. However, very few unique functions
* are specified in practice, so to prevent bloating the evtchn structure
* with a pointer, we stash them dynamically in a small lookup array which
* can be indexed by a small integer.
*/
static xen_event_channel_notification_t __read_mostly
xen_consumers[NR_XEN_CONSUMERS];
/* Default notification action: wake up from wait_on_xen_event_channel(). */
static void cf_check default_xen_notification_fn(
struct vcpu *v, unsigned int port)
{
/* Consumer needs notification only if blocked. */
if ( test_and_clear_bit(_VPF_blocked_in_xen, &v->pause_flags) )
vcpu_wake(v);
}
/*
* Given a notification function, return the value to stash in
* the evtchn->xen_consumer field.
*/
static uint8_t get_xen_consumer(xen_event_channel_notification_t fn)
{
unsigned int i;
if ( fn == NULL )
fn = default_xen_notification_fn;
for ( i = 0; i < ARRAY_SIZE(xen_consumers); i++ )
{
/* Use cmpxchgptr() in lieu of a global lock. */
if ( xen_consumers[i] == NULL )
cmpxchgptr(&xen_consumers[i], NULL, fn);
if ( xen_consumers[i] == fn )
break;
}
BUG_ON(i >= ARRAY_SIZE(xen_consumers));
return i+1;
}
/* Get the notification function for a given Xen-bound event channel. */
#define xen_notification_fn(e) (xen_consumers[(e)->xen_consumer-1])
static bool virq_is_global(unsigned int virq)
{
switch ( virq )
{
case VIRQ_TIMER:
case VIRQ_DEBUG:
case VIRQ_XENOPROF:
case VIRQ_XENPMU:
return false;
case VIRQ_ARCH_0 ... VIRQ_ARCH_7:
return arch_virq_is_global(virq);
}
ASSERT(virq < NR_VIRQS);
return true;
}
static struct evtchn *_evtchn_from_port(const struct domain *d,
evtchn_port_t port)
{
return port_is_valid(d, port) ? evtchn_from_port(d, port) : NULL;
}
static void free_evtchn_bucket(struct domain *d, struct evtchn *bucket)
{
if ( !bucket )
return;
xsm_free_security_evtchns(bucket, EVTCHNS_PER_BUCKET);
xfree(bucket);
}
static struct evtchn *alloc_evtchn_bucket(struct domain *d, unsigned int port)
{
struct evtchn *chn;
unsigned int i;
chn = xzalloc_array(struct evtchn, EVTCHNS_PER_BUCKET);
if ( !chn )
goto err;
if ( xsm_alloc_security_evtchns(chn, EVTCHNS_PER_BUCKET) )
goto err;
for ( i = 0; i < EVTCHNS_PER_BUCKET; i++ )
{
chn[i].port = port + i;
rwlock_init(&chn[i].lock);
}
return chn;
err:
free_evtchn_bucket(d, chn);
return NULL;
}
/*
* Allocate a given port and ensure all the buckets up to that ports
* have been allocated.
*
* The last part is important because the rest of the event channel code
* relies on all the buckets up to d->valid_evtchns to be valid. However,
* event channels may be sparsed when allocating the static evtchn port
* numbers that are scattered in nature.
*/
int evtchn_allocate_port(struct domain *d, evtchn_port_t port)
{
if ( port > d->max_evtchn_port || port >= max_evtchns(d) )
return -ENOSPC;
if ( port_is_valid(d, port) )
{
const struct evtchn *chn = evtchn_from_port(d, port);
if ( chn->state != ECS_FREE || evtchn_is_busy(d, chn) )
return -EBUSY;
}
else
{
unsigned int alloc_port = read_atomic(&d->valid_evtchns);
do
{
struct evtchn *chn;
struct evtchn **grp;
if ( !group_from_port(d, alloc_port) )
{
grp = xzalloc_array(struct evtchn *, BUCKETS_PER_GROUP);
if ( !grp )
return -ENOMEM;
group_from_port(d, alloc_port) = grp;
}
chn = alloc_evtchn_bucket(d, alloc_port);
if ( !chn )
return -ENOMEM;
bucket_from_port(d, alloc_port) = chn;
/*
* d->valid_evtchns is used to check whether the bucket can be
* accessed without the per-domain lock. Therefore,
* d->valid_evtchns should be seen *after* the new bucket has
* been setup.
*/
smp_wmb();
alloc_port += EVTCHNS_PER_BUCKET;
write_atomic(&d->valid_evtchns, alloc_port);
} while ( port >= alloc_port );
}
write_atomic(&d->active_evtchns, d->active_evtchns + 1);
return 0;
}
static int get_free_port(struct domain *d)
{
int port;
if ( d->is_dying )
return -EINVAL;
for ( port = 0; port <= d->max_evtchn_port; port++ )
{
int rc = evtchn_allocate_port(d, port);
if ( rc == 0 )
return port;
else if ( rc != -EBUSY )
return rc;
}
return -ENOSPC;
}
/*
* Check whether a port is still marked free, and if so update the domain
* counter accordingly. To be used on function exit paths.
*/
static void check_free_port(struct domain *d, evtchn_port_t port)
{
if ( port_is_valid(d, port) &&
evtchn_from_port(d, port)->state == ECS_FREE )
write_atomic(&d->active_evtchns, d->active_evtchns - 1);
}
void evtchn_free(struct domain *d, struct evtchn *chn)
{
/* Clear pending event to avoid unexpected behavior on re-bind. */
evtchn_port_clear_pending(d, chn);
if ( consumer_is_xen(chn) )
{
write_atomic(&d->xen_evtchns, d->xen_evtchns - 1);
/* Decrement ->xen_evtchns /before/ ->active_evtchns. */
smp_wmb();
}
write_atomic(&d->active_evtchns, d->active_evtchns - 1);
/* Reset binding to vcpu0 when the channel is freed. */
chn->state = ECS_FREE;
chn->notify_vcpu_id = 0;
chn->xen_consumer = 0;
xsm_evtchn_close_post(chn);
}
static int evtchn_get_port(struct domain *d, evtchn_port_t port)
{
int rc;
if ( port != 0 )
rc = evtchn_allocate_port(d, port);
else
rc = get_free_port(d);
return rc ?: port;
}
/*
* If port is zero get the next free port and allocate. If port is non-zero
* allocate the specified port.
*/
int evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc, evtchn_port_t port)
{
struct evtchn *chn;
struct domain *d;
int rc;
domid_t dom = alloc->dom;
d = rcu_lock_domain_by_any_id(dom);
if ( d == NULL )
return -ESRCH;
write_lock(&d->event_lock);
port = rc = evtchn_get_port(d, port);
if ( rc < 0 )
ERROR_EXIT(rc);
rc = 0;
chn = evtchn_from_port(d, port);
rc = xsm_evtchn_unbound(XSM_TARGET, d, chn, alloc->remote_dom);
if ( rc )
goto out;
evtchn_write_lock(chn);
chn->state = ECS_UNBOUND;
if ( (chn->u.unbound.remote_domid = alloc->remote_dom) == DOMID_SELF )
chn->u.unbound.remote_domid = current->domain->domain_id;
evtchn_port_init(d, chn);
evtchn_write_unlock(chn);
alloc->port = port;
out:
check_free_port(d, port);
write_unlock(&d->event_lock);
rcu_unlock_domain(d);
return rc;
}
static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn)
{
ASSERT(lchn != rchn);
if ( lchn > rchn )
SWAP(lchn, rchn);
evtchn_write_lock(lchn);
evtchn_write_lock(rchn);
}
static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn)
{
evtchn_write_unlock(lchn);
evtchn_write_unlock(rchn);
}
/*
* If lport is zero get the next free port and allocate. If port is non-zero
* allocate the specified lport.
*/
int evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind, struct domain *ld,
evtchn_port_t lport)
{
struct evtchn *lchn, *rchn;
struct domain *rd;
int rc;
evtchn_port_t rport = bind->remote_port;
domid_t rdom = bind->remote_dom;
if ( (rd = rcu_lock_domain_by_any_id(rdom)) == NULL )
return -ESRCH;
/* Avoid deadlock by first acquiring lock of domain with smaller id. */
if ( ld < rd )
{
write_lock(&ld->event_lock);
write_lock(&rd->event_lock);
}
else
{
if ( ld != rd )
write_lock(&rd->event_lock);
write_lock(&ld->event_lock);
}
lport = rc = evtchn_get_port(ld, lport);
if ( rc < 0 )
ERROR_EXIT(rc);
rc = 0;
lchn = evtchn_from_port(ld, lport);
rchn = _evtchn_from_port(rd, rport);
if ( !rchn )
ERROR_EXIT_DOM(-EINVAL, rd);
if ( (rchn->state != ECS_UNBOUND) ||
(rchn->u.unbound.remote_domid != ld->domain_id) )
ERROR_EXIT_DOM(-EINVAL, rd);
rc = xsm_evtchn_interdomain(XSM_HOOK, ld, lchn, rd, rchn);
if ( rc )
goto out;
double_evtchn_lock(lchn, rchn);
lchn->u.interdomain.remote_dom = rd;
lchn->u.interdomain.remote_port = rport;
lchn->state = ECS_INTERDOMAIN;
evtchn_port_init(ld, lchn);
rchn->u.interdomain.remote_dom = ld;
rchn->u.interdomain.remote_port = lport;
rchn->state = ECS_INTERDOMAIN;
/*
* We may have lost notifications on the remote unbound port. Fix that up
* here by conservatively always setting a notification on the local port.
*/
evtchn_port_set_pending(ld, lchn->notify_vcpu_id, lchn);
double_evtchn_unlock(lchn, rchn);
bind->local_port = lport;
out:
check_free_port(ld, lport);
write_unlock(&ld->event_lock);
if ( ld != rd )
write_unlock(&rd->event_lock);
rcu_unlock_domain(rd);
return rc;
}
int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port)
{
struct evtchn *chn;
struct vcpu *v;
struct domain *d = current->domain;
int virq = bind->virq, vcpu = bind->vcpu;
int rc = 0;
if ( (virq < 0) || (virq >= ARRAY_SIZE(v->virq_to_evtchn)) )
return -EINVAL;
/*
* Make sure the guest controlled value virq is bounded even during
* speculative execution.
*/
virq = array_index_nospec(virq, ARRAY_SIZE(v->virq_to_evtchn));
if ( virq_is_global(virq) && (vcpu != 0) )
return -EINVAL;
if ( (v = domain_vcpu(d, vcpu)) == NULL )
return -ENOENT;
write_lock(&d->event_lock);
if ( read_atomic(&v->virq_to_evtchn[virq]) )
ERROR_EXIT(-EEXIST);
port = rc = evtchn_get_port(d, port);
if ( rc < 0 )
ERROR_EXIT(rc);
rc = 0;
chn = evtchn_from_port(d, port);
evtchn_write_lock(chn);
chn->state = ECS_VIRQ;
chn->notify_vcpu_id = vcpu;
chn->u.virq = virq;
evtchn_port_init(d, chn);
evtchn_write_unlock(chn);
bind->port = port;
/*
* If by any, the update of virq_to_evtchn[] would need guarding by
* virq_lock, but since this is the last action here, there's no strict
* need to acquire the lock. Hence holding event_lock isn't helpful
* anymore at this point, but utilize that its unlocking acts as the
* otherwise necessary smp_wmb() here.
*/
write_atomic(&v->virq_to_evtchn[virq], port);
out:
write_unlock(&d->event_lock);
return rc;
}
static int evtchn_bind_ipi(evtchn_bind_ipi_t *bind)
{
struct evtchn *chn;
struct domain *d = current->domain;
int port, rc = 0;
unsigned int vcpu = bind->vcpu;
if ( domain_vcpu(d, vcpu) == NULL )
return -ENOENT;
write_lock(&d->event_lock);
if ( (port = get_free_port(d)) < 0 )
ERROR_EXIT(port);
chn = evtchn_from_port(d, port);
evtchn_write_lock(chn);
chn->state = ECS_IPI;
chn->notify_vcpu_id = vcpu;
evtchn_port_init(d, chn);
evtchn_write_unlock(chn);
bind->port = port;
out:
write_unlock(&d->event_lock);
return rc;
}
static void link_pirq_port(int port, struct evtchn *chn, struct vcpu *v)
{
chn->u.pirq.prev_port = 0;
chn->u.pirq.next_port = v->pirq_evtchn_head;
if ( v->pirq_evtchn_head )
evtchn_from_port(v->domain, v->pirq_evtchn_head)
->u.pirq.prev_port = port;
v->pirq_evtchn_head = port;
}
static void unlink_pirq_port(struct evtchn *chn, struct vcpu *v)
{
struct domain *d = v->domain;
if ( chn->u.pirq.prev_port )
evtchn_from_port(d, chn->u.pirq.prev_port)->u.pirq.next_port =
chn->u.pirq.next_port;
else
v->pirq_evtchn_head = chn->u.pirq.next_port;
if ( chn->u.pirq.next_port )
evtchn_from_port(d, chn->u.pirq.next_port)->u.pirq.prev_port =
chn->u.pirq.prev_port;
}
static int evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
{
struct evtchn *chn;
struct domain *d = current->domain;
struct vcpu *v = d->vcpu[0];
struct pirq *info;
int port = 0, rc;
unsigned int pirq = bind->pirq;
if ( pirq >= d->nr_pirqs )
return -EINVAL;
if ( !is_hvm_domain(d) && !pirq_access_permitted(d, pirq) )
return -EPERM;
write_lock(&d->event_lock);
if ( pirq_to_evtchn(d, pirq) != 0 )
ERROR_EXIT(-EEXIST);
if ( (port = get_free_port(d)) < 0 )
ERROR_EXIT(port);
chn = evtchn_from_port(d, port);
info = pirq_get_info(d, pirq);
if ( !info )
ERROR_EXIT(-ENOMEM);
info->evtchn = port;
rc = (!is_hvm_domain(d)
? pirq_guest_bind(v, info,
!!(bind->flags & BIND_PIRQ__WILL_SHARE))
: 0);
if ( rc != 0 )
{
info->evtchn = 0;
pirq_cleanup_check(info, d);
goto out;
}
evtchn_write_lock(chn);
chn->state = ECS_PIRQ;
chn->u.pirq.irq = pirq;
link_pirq_port(port, chn, v);
evtchn_port_init(d, chn);
evtchn_write_unlock(chn);
bind->port = port;
arch_evtchn_bind_pirq(d, pirq);
out:
check_free_port(d, port);
write_unlock(&d->event_lock);
return rc;
}
int evtchn_close(struct domain *d1, int port1, bool guest)
{
struct domain *d2 = NULL;
struct evtchn *chn1 = _evtchn_from_port(d1, port1), *chn2;
int rc = 0;
if ( !chn1 )
return -EINVAL;
again:
write_lock(&d1->event_lock);
/* Guest cannot close a Xen-attached event channel. */
if ( unlikely(consumer_is_xen(chn1)) && guest )
{
rc = -EINVAL;
goto out;
}
switch ( chn1->state )
{
case ECS_FREE:
case ECS_RESERVED:
rc = -EINVAL;
goto out;
case ECS_UNBOUND:
break;
case ECS_PIRQ: {
struct pirq *pirq = pirq_info(d1, chn1->u.pirq.irq);
if ( pirq )
{
if ( !is_hvm_domain(d1) )
pirq_guest_unbind(d1, pirq);
pirq->evtchn = 0;
pirq_cleanup_check(pirq, d1);
#ifdef CONFIG_X86
if ( is_hvm_domain(d1) && domain_pirq_to_irq(d1, pirq->pirq) > 0 )
unmap_domain_pirq_emuirq(d1, pirq->pirq);
#endif
}
unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]);
break;
}
case ECS_VIRQ: {
struct vcpu *v;
unsigned long flags;
v = d1->vcpu[virq_is_global(chn1->u.virq) ? 0 : chn1->notify_vcpu_id];
write_lock_irqsave(&v->virq_lock, flags);
ASSERT(read_atomic(&v->virq_to_evtchn[chn1->u.virq]) == port1);
write_atomic(&v->virq_to_evtchn[chn1->u.virq], 0);
write_unlock_irqrestore(&v->virq_lock, flags);
break;
}
case ECS_IPI:
break;
case ECS_INTERDOMAIN:
if ( d2 == NULL )
{
d2 = chn1->u.interdomain.remote_dom;
/* If we unlock d1 then we could lose d2. */
rcu_lock_domain(d2);
if ( d1 < d2 )
write_lock(&d2->event_lock);
else if ( d1 != d2 )
{
write_unlock(&d1->event_lock);
write_lock(&d2->event_lock);
goto again;
}
}
else if ( d2 != chn1->u.interdomain.remote_dom )
{
/*
* We can only get here if the port was closed and re-bound after
* unlocking d1 but before locking d2 above. We could retry but
* it is easier to return the same error as if we had seen the
* port in ECS_FREE. It must have passed through that state for
* us to end up here, so it's a valid error to return.
*/
rc = -EINVAL;
goto out;
}
chn2 = _evtchn_from_port(d2, chn1->u.interdomain.remote_port);
BUG_ON(!chn2);
BUG_ON(chn2->state != ECS_INTERDOMAIN);
BUG_ON(chn2->u.interdomain.remote_dom != d1);
double_evtchn_lock(chn1, chn2);
evtchn_free(d1, chn1);
chn2->state = ECS_UNBOUND;
chn2->u.unbound.remote_domid = d1->domain_id;
double_evtchn_unlock(chn1, chn2);
goto out;
default:
BUG();
}
evtchn_write_lock(chn1);
evtchn_free(d1, chn1);
evtchn_write_unlock(chn1);
out:
if ( d2 != NULL )
{
if ( d1 != d2 )
write_unlock(&d2->event_lock);
rcu_unlock_domain(d2);
}
write_unlock(&d1->event_lock);
return rc;
}
int evtchn_send(struct domain *ld, unsigned int lport)
{
struct evtchn *lchn = _evtchn_from_port(ld, lport), *rchn;
struct domain *rd;
int rport, ret = 0;
if ( !lchn )
return -EINVAL;
evtchn_read_lock(lchn);
/* Guest cannot send via a Xen-attached event channel. */
if ( unlikely(consumer_is_xen(lchn)) )
{
ret = -EINVAL;
goto out;
}
ret = xsm_evtchn_send(XSM_HOOK, ld, lchn);
if ( ret )
goto out;
switch ( lchn->state )
{
case ECS_INTERDOMAIN:
rd = lchn->u.interdomain.remote_dom;
rport = lchn->u.interdomain.remote_port;
rchn = evtchn_from_port(rd, rport);
if ( consumer_is_xen(rchn) )
{
/* Don't keep holding the lock for the call below. */
xen_event_channel_notification_t fn = xen_notification_fn(rchn);
struct vcpu *rv = rd->vcpu[rchn->notify_vcpu_id];
rcu_lock_domain(rd);
evtchn_read_unlock(lchn);
fn(rv, rport);
rcu_unlock_domain(rd);
return 0;
}
evtchn_port_set_pending(rd, rchn->notify_vcpu_id, rchn);
break;
case ECS_IPI:
evtchn_port_set_pending(ld, lchn->notify_vcpu_id, lchn);
break;
case ECS_UNBOUND:
/* silently drop the notification */
break;
default:
ret = -EINVAL;
}
out:
evtchn_read_unlock(lchn);
return ret;
}
bool evtchn_virq_enabled(const struct vcpu *v, unsigned int virq)
{
if ( !v )
return false;
if ( virq_is_global(virq) && v->vcpu_id )
v = domain_vcpu(v->domain, 0);
return read_atomic(&v->virq_to_evtchn[virq]);
}
void send_guest_vcpu_virq(struct vcpu *v, uint32_t virq)
{
unsigned long flags;
int port;
struct domain *d;
struct evtchn *chn;
ASSERT(!virq_is_global(virq));
read_lock_irqsave(&v->virq_lock, flags);
port = read_atomic(&v->virq_to_evtchn[virq]);
if ( unlikely(port == 0) )
goto out;
d = v->domain;
chn = evtchn_from_port(d, port);
if ( evtchn_read_trylock(chn) )
{
evtchn_port_set_pending(d, v->vcpu_id, chn);
evtchn_read_unlock(chn);
}
out:
read_unlock_irqrestore(&v->virq_lock, flags);
}
void send_guest_global_virq(struct domain *d, uint32_t virq)
{
unsigned long flags;
int port;
struct vcpu *v;
struct evtchn *chn;
ASSERT(virq_is_global(virq));
if ( unlikely(d == NULL) || unlikely(d->vcpu == NULL) )
return;
v = d->vcpu[0];
if ( unlikely(v == NULL) )
return;
read_lock_irqsave(&v->virq_lock, flags);
port = read_atomic(&v->virq_to_evtchn[virq]);
if ( unlikely(port == 0) )
goto out;
chn = evtchn_from_port(d, port);
if ( evtchn_read_trylock(chn) )
{
evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
evtchn_read_unlock(chn);
}
out:
read_unlock_irqrestore(&v->virq_lock, flags);
}
void send_guest_pirq(struct domain *d, const struct pirq *pirq)
{
int port;
struct evtchn *chn;
/*
* PV guests: It should not be possible to race with __evtchn_close(). The
* caller of this function must synchronise with pirq_guest_unbind().
* HVM guests: Port is legitimately zero when the guest disables the
* emulated interrupt/evtchn.
*/
if ( pirq == NULL || (port = pirq->evtchn) == 0 )
{
BUG_ON(!is_hvm_domain(d));
return;
}
chn = evtchn_from_port(d, port);
if ( evtchn_read_trylock(chn) )
{
evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
evtchn_read_unlock(chn);
}
}
static struct domain *global_virq_handlers[NR_VIRQS] __read_mostly;
static DEFINE_SPINLOCK(global_virq_handlers_lock);
void send_global_virq(uint32_t virq)
{
ASSERT(virq_is_global(virq));
send_guest_global_virq(global_virq_handlers[virq] ?: hardware_domain, virq);
}
int set_global_virq_handler(struct domain *d, uint32_t virq)
{
struct domain *old;
if (virq >= NR_VIRQS)
return -EINVAL;
if (!virq_is_global(virq))
return -EINVAL;
if (global_virq_handlers[virq] == d)
return 0;
if (unlikely(!get_domain(d)))
return -EINVAL;
spin_lock(&global_virq_handlers_lock);
old = global_virq_handlers[virq];
global_virq_handlers[virq] = d;
spin_unlock(&global_virq_handlers_lock);
if (old != NULL)
put_domain(old);
return 0;
}
static void clear_global_virq_handlers(struct domain *d)
{
uint32_t virq;
int put_count = 0;
spin_lock(&global_virq_handlers_lock);
for (virq = 0; virq < NR_VIRQS; virq++)
{
if (global_virq_handlers[virq] == d)
{
global_virq_handlers[virq] = NULL;
put_count++;
}
}
spin_unlock(&global_virq_handlers_lock);
while (put_count)
{
put_domain(d);
put_count--;
}
}
int evtchn_status(evtchn_status_t *status)
{
struct domain *d;
domid_t dom = status->dom;
int port = status->port;
struct evtchn *chn;
int rc = 0;
d = rcu_lock_domain_by_any_id(dom);
if ( d == NULL )
return -ESRCH;
chn = _evtchn_from_port(d, port);
if ( !chn )
{
rcu_unlock_domain(d);
return -EINVAL;
}
read_lock(&d->event_lock);
if ( consumer_is_xen(chn) )
{
rc = -EACCES;
goto out;
}
rc = xsm_evtchn_status(XSM_TARGET, d, chn);
if ( rc )
goto out;
switch ( chn->state )
{
case ECS_FREE:
case ECS_RESERVED:
status->status = EVTCHNSTAT_closed;
break;
case ECS_UNBOUND:
status->status = EVTCHNSTAT_unbound;
status->u.unbound.dom = chn->u.unbound.remote_domid;
break;
case ECS_INTERDOMAIN:
status->status = EVTCHNSTAT_interdomain;
status->u.interdomain.dom =
chn->u.interdomain.remote_dom->domain_id;
status->u.interdomain.port = chn->u.interdomain.remote_port;
break;
case ECS_PIRQ:
status->status = EVTCHNSTAT_pirq;
status->u.pirq = chn->u.pirq.irq;
break;
case ECS_VIRQ:
status->status = EVTCHNSTAT_virq;
status->u.virq = chn->u.virq;
break;
case ECS_IPI:
status->status = EVTCHNSTAT_ipi;
break;
default:
BUG();
}
status->vcpu = chn->notify_vcpu_id;
out:
read_unlock(&d->event_lock);
rcu_unlock_domain(d);
return rc;
}
int evtchn_bind_vcpu(evtchn_port_t port, unsigned int vcpu_id)
{
struct domain *d = current->domain;
struct evtchn *chn;
int rc = 0;
struct vcpu *v;
/* Use the vcpu info to prevent speculative out-of-bound accesses */
if ( (v = domain_vcpu(d, vcpu_id)) == NULL )
return -ENOENT;
chn = _evtchn_from_port(d, port);
if ( !chn )
return -EINVAL;
write_lock(&d->event_lock);
/* Guest cannot re-bind a Xen-attached event channel. */
if ( unlikely(consumer_is_xen(chn)) )
{
rc = -EINVAL;
goto out;
}
switch ( chn->state )
{
case ECS_VIRQ:
if ( virq_is_global(chn->u.virq) )
chn->notify_vcpu_id = v->vcpu_id;
else
rc = -EINVAL;
break;
case ECS_UNBOUND:
case ECS_INTERDOMAIN:
chn->notify_vcpu_id = v->vcpu_id;
break;
case ECS_PIRQ:
if ( chn->notify_vcpu_id == v->vcpu_id )
break;
unlink_pirq_port(chn, d->vcpu[chn->notify_vcpu_id]);
chn->notify_vcpu_id = v->vcpu_id;
pirq_set_affinity(d, chn->u.pirq.irq,
cpumask_of(v->processor));
link_pirq_port(port, chn, v);
break;
default:
rc = -EINVAL;
break;
}
out:
write_unlock(&d->event_lock);
return rc;
}
int evtchn_unmask(unsigned int port)
{
struct domain *d = current->domain;
struct evtchn *evtchn = _evtchn_from_port(d, port);
if ( unlikely(!evtchn) )
return -EINVAL;
evtchn_read_lock(evtchn);
evtchn_port_unmask(d, evtchn);
evtchn_read_unlock(evtchn);
return 0;
}
static bool has_active_evtchns(const struct domain *d)
{
unsigned int xen = read_atomic(&d->xen_evtchns);
/*
* Read ->xen_evtchns /before/ active_evtchns, to prevent
* evtchn_reset() exiting its loop early.
*/
smp_rmb();
return read_atomic(&d->active_evtchns) > xen;
}
int evtchn_reset(struct domain *d, bool resuming)
{
unsigned int i;
int rc = 0;
if ( d != current->domain && !d->controller_pause_count )
return -EINVAL;
write_lock(&d->event_lock);
/*
* If we are resuming, then start where we stopped. Otherwise, check
* that a reset operation is not already in progress, and if none is,
* record that this is now the case.
*/
i = resuming ? d->next_evtchn : !d->next_evtchn;
if ( i > d->next_evtchn )
d->next_evtchn = i;
write_unlock(&d->event_lock);
if ( !i )
return -EBUSY;
for ( ; port_is_valid(d, i) && has_active_evtchns(d); i++ )
{
evtchn_close(d, i, 1);
/* NB: Choice of frequency is arbitrary. */
if ( !(i & 0x3f) && hypercall_preempt_check() )
{
write_lock(&d->event_lock);
d->next_evtchn = i;
write_unlock(&d->event_lock);
return -ERESTART;
}
}
write_lock(&d->event_lock);
d->next_evtchn = 0;
if ( d->active_evtchns > d->xen_evtchns )
rc = -EAGAIN;
else if ( d->evtchn_fifo )
{
/* Switching back to 2-level ABI. */
evtchn_fifo_destroy(d);
evtchn_2l_init(d);
}
write_unlock(&d->event_lock);
return rc;
}
static int evtchn_set_priority(const struct evtchn_set_priority *set_priority)
{
struct domain *d = current->domain;
struct evtchn *chn = _evtchn_from_port(d, set_priority->port);
int ret;
if ( !chn )
return -EINVAL;
evtchn_read_lock(chn);
ret = evtchn_port_set_priority(d, chn, set_priority->priority);
evtchn_read_unlock(chn);
return ret;
}
long do_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
{
int rc;
#ifdef CONFIG_PV_SHIM
if ( unlikely(pv_shim) )
return pv_shim_event_channel_op(cmd, arg);
#endif
switch ( cmd )
{
case EVTCHNOP_alloc_unbound: {
struct evtchn_alloc_unbound alloc_unbound;
if ( copy_from_guest(&alloc_unbound, arg, 1) != 0 )
return -EFAULT;
rc = evtchn_alloc_unbound(&alloc_unbound, 0);
if ( !rc && __copy_to_guest(arg, &alloc_unbound, 1) )
rc = -EFAULT; /* Cleaning up here would be a mess! */
break;
}
case EVTCHNOP_bind_interdomain: {
struct evtchn_bind_interdomain bind_interdomain;
if ( copy_from_guest(&bind_interdomain, arg, 1) != 0 )
return -EFAULT;
rc = evtchn_bind_interdomain(&bind_interdomain, current->domain, 0);
if ( !rc && __copy_to_guest(arg, &bind_interdomain, 1) )
rc = -EFAULT; /* Cleaning up here would be a mess! */
break;
}
case EVTCHNOP_bind_virq: {
struct evtchn_bind_virq bind_virq;
if ( copy_from_guest(&bind_virq, arg, 1) != 0 )
return -EFAULT;
rc = evtchn_bind_virq(&bind_virq, 0);
if ( !rc && __copy_to_guest(arg, &bind_virq, 1) )
rc = -EFAULT; /* Cleaning up here would be a mess! */
break;
}
case EVTCHNOP_bind_ipi: {
struct evtchn_bind_ipi bind_ipi;
if ( copy_from_guest(&bind_ipi, arg, 1) != 0 )
return -EFAULT;
rc = evtchn_bind_ipi(&bind_ipi);
if ( !rc && __copy_to_guest(arg, &bind_ipi, 1) )
rc = -EFAULT; /* Cleaning up here would be a mess! */
break;
}
case EVTCHNOP_bind_pirq: {
struct evtchn_bind_pirq bind_pirq;
if ( copy_from_guest(&bind_pirq, arg, 1) != 0 )
return -EFAULT;
rc = evtchn_bind_pirq(&bind_pirq);
if ( !rc && __copy_to_guest(arg, &bind_pirq, 1) )
rc = -EFAULT; /* Cleaning up here would be a mess! */
break;
}
case EVTCHNOP_close: {
struct evtchn_close close;
if ( copy_from_guest(&close, arg, 1) != 0 )
return -EFAULT;
rc = evtchn_close(current->domain, close.port, 1);
break;
}
case EVTCHNOP_send: {
struct evtchn_send send;
if ( copy_from_guest(&send, arg, 1) != 0 )
return -EFAULT;
rc = evtchn_send(current->domain, send.port);
break;
}
case EVTCHNOP_status: {
struct evtchn_status status;
if ( copy_from_guest(&status, arg, 1) != 0 )
return -EFAULT;
rc = evtchn_status(&status);
if ( !rc && __copy_to_guest(arg, &status, 1) )
rc = -EFAULT;
break;
}
case EVTCHNOP_bind_vcpu: {
struct evtchn_bind_vcpu bind_vcpu;
if ( copy_from_guest(&bind_vcpu, arg, 1) != 0 )
return -EFAULT;
rc = evtchn_bind_vcpu(bind_vcpu.port, bind_vcpu.vcpu);
break;
}
case EVTCHNOP_unmask: {
struct evtchn_unmask unmask;
if ( copy_from_guest(&unmask, arg, 1) != 0 )
return -EFAULT;
rc = evtchn_unmask(unmask.port);
break;
}
case EVTCHNOP_reset:
case EVTCHNOP_reset_cont: {
struct evtchn_reset reset;
struct domain *d;
if ( copy_from_guest(&reset, arg, 1) != 0 )
return -EFAULT;
d = rcu_lock_domain_by_any_id(reset.dom);
if ( d == NULL )
return -ESRCH;
rc = xsm_evtchn_reset(XSM_TARGET, current->domain, d);
if ( !rc )
rc = evtchn_reset(d, cmd == EVTCHNOP_reset_cont);
rcu_unlock_domain(d);
if ( rc == -ERESTART )
rc = hypercall_create_continuation(__HYPERVISOR_event_channel_op,
"ih", EVTCHNOP_reset_cont, arg);
break;
}
case EVTCHNOP_init_control: {
struct evtchn_init_control init_control;
if ( copy_from_guest(&init_control, arg, 1) != 0 )
return -EFAULT;
rc = evtchn_fifo_init_control(&init_control);
if ( !rc && __copy_to_guest(arg, &init_control, 1) )
rc = -EFAULT;
break;
}
case EVTCHNOP_expand_array: {
struct evtchn_expand_array expand_array;
if ( copy_from_guest(&expand_array, arg, 1) != 0 )
return -EFAULT;
rc = evtchn_fifo_expand_array(&expand_array);
break;
}
case EVTCHNOP_set_priority: {
struct evtchn_set_priority set_priority;
if ( copy_from_guest(&set_priority, arg, 1) != 0 )
return -EFAULT;
rc = evtchn_set_priority(&set_priority);
break;
}
default:
rc = -ENOSYS;
break;
}
return rc;
}
int alloc_unbound_xen_event_channel(
struct domain *ld, unsigned int lvcpu, domid_t remote_domid,
xen_event_channel_notification_t notification_fn)
{
struct evtchn *chn;
int port, rc;
write_lock(&ld->event_lock);
port = rc = get_free_port(ld);
if ( rc < 0 )
goto out;
chn = evtchn_from_port(ld, port);
rc = xsm_evtchn_unbound(XSM_TARGET, ld, chn, remote_domid);
if ( rc )
goto out;
evtchn_write_lock(chn);
chn->state = ECS_UNBOUND;
chn->xen_consumer = get_xen_consumer(notification_fn);
chn->notify_vcpu_id = lvcpu;
chn->u.unbound.remote_domid = remote_domid;
evtchn_write_unlock(chn);
/*
* Increment ->xen_evtchns /after/ ->active_evtchns. No explicit
* barrier needed due to spin-locked region just above.
*/
write_atomic(&ld->xen_evtchns, ld->xen_evtchns + 1);
out:
check_free_port(ld, port);
write_unlock(&ld->event_lock);
return rc < 0 ? rc : port;
}
void free_xen_event_channel(struct domain *d, int port)
{
if ( !port_is_valid(d, port) )
{
/*
* Make sure ->is_dying is read /after/ ->valid_evtchns, pairing
* with the kind-of-barrier and BUG_ON() in evtchn_destroy().
*/
smp_rmb();
BUG_ON(!d->is_dying);
return;
}
evtchn_close(d, port, 0);
}
void notify_via_xen_event_channel(struct domain *ld, int lport)
{
struct evtchn *lchn = _evtchn_from_port(ld, lport), *rchn;
struct domain *rd;
if ( !lchn )
{
/*
* Make sure ->is_dying is read /after/ ->valid_evtchns, pairing
* with the kind-of-barrier and BUG_ON() in evtchn_destroy().
*/
smp_rmb();
ASSERT(ld->is_dying);
return;
}
if ( !evtchn_read_trylock(lchn) )
return;
if ( likely(lchn->state == ECS_INTERDOMAIN) )
{
ASSERT(consumer_is_xen(lchn));
rd = lchn->u.interdomain.remote_dom;
rchn = evtchn_from_port(rd, lchn->u.interdomain.remote_port);
evtchn_port_set_pending(rd, rchn->notify_vcpu_id, rchn);
}
evtchn_read_unlock(lchn);
}
void evtchn_check_pollers(struct domain *d, unsigned int port)
{
struct vcpu *v;
unsigned int vcpuid;
/* Check if some VCPU might be polling for this event. */
if ( likely(bitmap_empty(d->poll_mask, d->max_vcpus)) )
return;
/* Wake any interested (or potentially interested) pollers. */
for ( vcpuid = find_first_bit(d->poll_mask, d->max_vcpus);
vcpuid < d->max_vcpus;
vcpuid = find_next_bit(d->poll_mask, d->max_vcpus, vcpuid+1) )
{
v = d->vcpu[vcpuid];
if ( ((v->poll_evtchn <= 0) || (v->poll_evtchn == port)) &&
test_and_clear_bit(vcpuid, d->poll_mask) )
{
v->poll_evtchn = 0;
vcpu_unblock(v);
}
}
}
int evtchn_init(struct domain *d, unsigned int max_port)
{
evtchn_2l_init(d);
d->max_evtchn_port = min_t(unsigned int, max_port, INT_MAX);
d->evtchn = alloc_evtchn_bucket(d, 0);
if ( !d->evtchn )
return -ENOMEM;
d->valid_evtchns = EVTCHNS_PER_BUCKET;
rwlock_init(&d->event_lock);
if ( get_free_port(d) != 0 )
{
free_evtchn_bucket(d, d->evtchn);
return -EINVAL;
}
evtchn_from_port(d, 0)->state = ECS_RESERVED;
write_atomic(&d->active_evtchns, 0);
#if MAX_VIRT_CPUS > BITS_PER_LONG
d->poll_mask = xzalloc_array(unsigned long, BITS_TO_LONGS(d->max_vcpus));
if ( !d->poll_mask )
{
free_evtchn_bucket(d, d->evtchn);
return -ENOMEM;
}
#endif
return 0;
}
int evtchn_destroy(struct domain *d)
{
unsigned int i;
/* After this kind-of-barrier no new event-channel allocations can occur. */
BUG_ON(!d->is_dying);
read_lock(&d->event_lock);
read_unlock(&d->event_lock);
/* Close all existing event channels. */
for ( i = d->valid_evtchns; --i; )
{
evtchn_close(d, i, 0);
/*
* Avoid preempting when called from domain_create()'s error path,
* and don't check too often (choice of frequency is arbitrary).
*/
if ( i && !(i & 0x3f) && d->is_dying != DOMDYING_dead &&
hypercall_preempt_check() )
{
write_atomic(&d->valid_evtchns, i);
return -ERESTART;
}
}
ASSERT(!d->active_evtchns);
clear_global_virq_handlers(d);
evtchn_fifo_destroy(d);
return 0;
}
void evtchn_destroy_final(struct domain *d)
{
unsigned int i, j;
/* Free all event-channel buckets. */
for ( i = 0; i < NR_EVTCHN_GROUPS; i++ )
{
if ( !d->evtchn_group[i] )
continue;
for ( j = 0; j < BUCKETS_PER_GROUP; j++ )
free_evtchn_bucket(d, d->evtchn_group[i][j]);
xfree(d->evtchn_group[i]);
}
free_evtchn_bucket(d, d->evtchn);
#if MAX_VIRT_CPUS > BITS_PER_LONG
xfree(d->poll_mask);
d->poll_mask = NULL;
#endif
}
void evtchn_move_pirqs(struct vcpu *v)
{
struct domain *d = v->domain;
const cpumask_t *mask = cpumask_of(v->processor);
unsigned int port;
struct evtchn *chn;
read_lock(&d->event_lock);
for ( port = v->pirq_evtchn_head; port; port = chn->u.pirq.next_port )
{
chn = evtchn_from_port(d, port);
pirq_set_affinity(d, chn->u.pirq.irq, mask);
}
read_unlock(&d->event_lock);
}
static void domain_dump_evtchn_info(struct domain *d)
{
unsigned int port;
int irq;
printk("Event channel information for domain %d:\n"
"Polling vCPUs: {%*pbl}\n"
" port [p/m/s]\n", d->domain_id, d->max_vcpus, d->poll_mask);
read_lock(&d->event_lock);
for ( port = 1; ; ++port )
{
const struct evtchn *chn = _evtchn_from_port(d, port);
char *ssid;
if ( !chn )
break;
if ( chn->state == ECS_FREE )
continue;
printk(" %4u [%d/%d/",
port,
evtchn_is_pending(d, chn),
evtchn_is_masked(d, chn));
evtchn_port_print_state(d, chn);
printk("]: s=%d n=%d x=%d",
chn->state, chn->notify_vcpu_id, chn->xen_consumer);
switch ( chn->state )
{
case ECS_UNBOUND:
printk(" d=%d", chn->u.unbound.remote_domid);
break;
case ECS_INTERDOMAIN:
printk(" d=%d p=%d",
chn->u.interdomain.remote_dom->domain_id,
chn->u.interdomain.remote_port);
break;
case ECS_PIRQ:
irq = domain_pirq_to_irq(d, chn->u.pirq.irq);
printk(" p=%d i=%d", chn->u.pirq.irq, irq);
break;
case ECS_VIRQ:
printk(" v=%d", chn->u.virq);
break;
}
ssid = xsm_show_security_evtchn(d, chn);
if (ssid) {
printk(" Z=%s\n", ssid);
xfree(ssid);
} else {
printk("\n");
}
}
read_unlock(&d->event_lock);
}
static void cf_check dump_evtchn_info(unsigned char key)
{
struct domain *d;
printk("'%c' pressed -> dumping event-channel info\n", key);
rcu_read_lock(&domlist_read_lock);
for_each_domain ( d )
domain_dump_evtchn_info(d);
rcu_read_unlock(&domlist_read_lock);
}
static int __init cf_check dump_evtchn_info_key_init(void)
{
register_keyhandler('e', dump_evtchn_info, "dump evtchn info", 1);
return 0;
}
__initcall(dump_evtchn_info_key_init);
/*
* Local variables:
* mode: C
* c-file-style: "BSD"
* c-basic-offset: 4
* tab-width: 4
* indent-tabs-mode: nil
* End:
*/