/******************************************************************************
* Argo : Hypervisor-Mediated data eXchange
*
* Derived from v4v, the version 2 of v2v.
*
* Copyright (c) 2010, Citrix Systems
* Copyright (c) 2018-2019 BAE Systems
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef CONFIG_COMPAT
#include
CHECK_argo_addr;
#undef CHECK_argo_addr
#define CHECK_argo_addr struct xen_argo_addr
CHECK_argo_register_ring;
CHECK_argo_ring;
CHECK_argo_ring_data_ent;
#undef CHECK_argo_ring_data_ent
#define CHECK_argo_ring_data_ent struct xen_argo_ring_data_ent
CHECK_argo_ring_data;
CHECK_argo_ring_message_header;
CHECK_argo_unregister_ring;
CHECK_argo_send_addr;
#endif
#define MAX_RINGS_PER_DOMAIN 128U
#define MAX_NOTIFY_COUNT 256U
#define MAX_PENDING_PER_RING 32U
/* All messages on the ring are padded to a multiple of the slot size. */
#define ROUNDUP_MESSAGE(a) ROUNDUP((a), XEN_ARGO_MSG_SLOT_SIZE)
/* The maximum size of a message that may be sent on the largest Argo ring. */
#define MAX_ARGO_MESSAGE_SIZE ((XEN_ARGO_MAX_RING_SIZE) - \
(sizeof(struct xen_argo_ring_message_header)) - ROUNDUP_MESSAGE(1))
/* Number of PAGEs needed to hold a ring of a given size in bytes */
#define NPAGES_RING(ring_len) \
(ROUNDUP((ROUNDUP_MESSAGE(ring_len) + sizeof(xen_argo_ring_t)), PAGE_SIZE) \
>> PAGE_SHIFT)
DEFINE_XEN_GUEST_HANDLE(xen_argo_addr_t);
DEFINE_XEN_GUEST_HANDLE(xen_argo_gfn_t);
DEFINE_XEN_GUEST_HANDLE(xen_argo_iov_t);
DEFINE_XEN_GUEST_HANDLE(xen_argo_register_ring_t);
DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_t);
DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_t);
DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t);
DEFINE_XEN_GUEST_HANDLE(xen_argo_send_addr_t);
DEFINE_XEN_GUEST_HANDLE(xen_argo_unregister_ring_t);
#ifdef CONFIG_COMPAT
DEFINE_COMPAT_HANDLE(compat_argo_iov_t);
#endif
static bool __read_mostly opt_argo;
static bool __read_mostly opt_argo_mac_permissive;
static int __init cf_check parse_argo(const char *s)
{
const char *ss;
int val, rc = 0;
do {
ss = strchr(s, ',');
if ( !ss )
ss = strchr(s, '\0');
if ( (val = parse_bool(s, ss)) >= 0 )
opt_argo = val;
else if ( (val = parse_boolean("mac-permissive", s, ss)) >= 0 )
opt_argo_mac_permissive = val;
else
rc = -EINVAL;
s = ss + 1;
} while ( *ss );
return rc;
}
custom_param("argo", parse_argo);
typedef struct argo_ring_id
{
xen_argo_port_t aport;
domid_t partner_id;
domid_t domain_id;
} argo_ring_id;
/* Data about a domain's own ring that it has registered */
struct argo_ring_info
{
/* next node in the hash, protected by rings_L2 */
struct list_head node;
/* this ring's id, protected by rings_L2 */
struct argo_ring_id id;
/* L3, the ring_info lock: protects the members of this struct below */
spinlock_t L3_lock;
/* length of the ring, protected by L3 */
unsigned int len;
/* number of pages translated into mfns, protected by L3 */
unsigned int nmfns;
/* cached tx pointer location, protected by L3 */
unsigned int tx_ptr;
/* mapped ring pages protected by L3 */
void **mfn_mapping;
/* list of mfns of guest ring, protected by L3 */
mfn_t *mfns;
/* list of struct pending_ent for this ring, protected by L3 */
struct list_head pending;
/* number of pending entries queued for this ring, protected by L3 */
unsigned int npending;
};
/* Data about a single-sender ring, held by the sender (partner) domain */
struct argo_send_info
{
/* next node in the hash, protected by send_L2 */
struct list_head node;
/* this ring's id, protected by send_L2 */
struct argo_ring_id id;
};
/* A space-available notification that is awaiting sufficient space */
struct pending_ent
{
/* List node within argo_ring_info's pending list */
struct list_head node;
/*
* List node within argo_domain's wildcard_pend_list. Only used if the
* ring is one with a wildcard partner (ie. that any domain may send to)
* to enable cancelling signals on wildcard rings on domain destroy.
*/
struct list_head wildcard_node;
/*
* Pointer to the ring_info that this ent pertains to. Used to ensure that
* ring_info->npending is decremented when ents for wildcard rings are
* cancelled for domain destroy.
* Caution: Must hold the correct locks before accessing ring_info via this.
*/
struct argo_ring_info *ring_info;
/* minimum ring space available that this signal is waiting upon */
unsigned int len;
/* domain to be notified when space is available */
domid_t domain_id;
};
/*
* The value of the argo element in a struct domain is
* protected by L1_global_argo_rwlock
*/
#define ARGO_HASHTABLE_SIZE 32
struct argo_domain
{
/* rings_L2 */
rwlock_t rings_L2_rwlock;
/*
* Hash table of argo_ring_info about rings this domain has registered.
* Protected by rings_L2.
*/
struct list_head ring_hash[ARGO_HASHTABLE_SIZE];
/* Counter of rings registered by this domain. Protected by rings_L2. */
unsigned int ring_count;
/* send_L2 */
spinlock_t send_L2_lock;
/*
* Hash table of argo_send_info about rings other domains have registered
* for this domain to send to. Single partner, non-wildcard rings.
* Protected by send_L2.
*/
struct list_head send_hash[ARGO_HASHTABLE_SIZE];
/* wildcard_L2 */
spinlock_t wildcard_L2_lock;
/*
* List of pending space-available signals for this domain about wildcard
* rings registered by other domains. Protected by wildcard_L2.
*/
struct list_head wildcard_pend_list;
};
/*
* Locking is organized as follows:
*
* Terminology: R() means taking a read lock on the specified lock;
* W() means taking a write lock on it.
*
* == L1 : The global read/write lock: L1_global_argo_rwlock
* Protects the argo elements of all struct domain *d in the system.
*
* R(L1) does not protect any of the elements of d->argo; it protects their
* addresses. W(L1) protects those and more since it implies W on all the lower
* level locks - see the notes on those locks below.
*
* The destruction of an argo-enabled domain, which must have a non-NULL d->argo
* pointer, will need to free that d->argo pointer, which requires W(L1).
* Since holding R(L1) will block acquiring W(L1), it will ensure that
* no domains pointers that argo is interested in become invalid while either
* W(L1) or R(L1) are held.
*/
static DEFINE_RWLOCK(L1_global_argo_rwlock); /* L1 */
/*
* == rings_L2 : The per-domain ring hash lock: d->argo->rings_L2_rwlock
*
* Holding a read lock on rings_L2 protects the ring hash table and
* the elements in the hash_table d->argo->ring_hash, and
* the node and id fields in struct argo_ring_info in the
* hash table.
* Holding a write lock on rings_L2 protects all of the elements of all the
* struct argo_ring_info belonging to this domain.
*
* To take rings_L2 you must already have R(L1). W(L1) implies W(rings_L2) and
* L3.
*
* == L3 : The individual ring_info lock: ring_info->L3_lock
*
* Protects all the fields within the argo_ring_info, aside from the ones that
* rings_L2 already protects: node, id, lock.
*
* To acquire L3 you must already have R(rings_L2). W(rings_L2) implies L3.
*
* == send_L2 : The per-domain single-sender partner rings lock:
* d->argo->send_L2_lock
*
* Protects the per-domain send hash table : d->argo->send_hash
* and the elements in the hash table, and the node and id fields
* in struct argo_send_info in the hash table.
*
* To take send_L2, you must already have R(L1). W(L1) implies send_L2.
* Do not attempt to acquire a rings_L2 on any domain after taking and while
* holding a send_L2 lock -- acquire the rings_L2 (if one is needed) beforehand.
*
* == wildcard_L2 : The per-domain wildcard pending list lock:
* d->argo->wildcard_L2_lock
*
* Protects the per-domain list of outstanding signals for space availability
* on wildcard rings.
*
* To take wildcard_L2, you must already have R(L1). W(L1) implies wildcard_L2.
* No other locks are acquired after obtaining wildcard_L2.
*/
/*
* Lock state validations macros
*
* These macros encode the logic to verify that the locking has adhered to the
* locking discipline above.
* eg. On entry to logic that requires holding at least R(rings_L2), this:
* ASSERT(LOCKING_Read_rings_L2(d));
*
* checks that the lock state is sufficient, validating that one of the
* following must be true when executed: R(rings_L2) && R(L1)
* or: W(rings_L2) && R(L1)
* or: W(L1)
*
* The LOCKING macros defined below here are for use at verification points.
*/
#define LOCKING_Write_L1 (rw_is_write_locked(&L1_global_argo_rwlock))
/*
* While LOCKING_Read_L1 will return true even if the lock is write-locked,
* that's OK because everywhere that a Read lock is needed with these macros,
* holding a Write lock there instead is OK too: we're checking that _at least_
* the specified level of locks are held.
*/
#define LOCKING_Read_L1 (rw_is_locked(&L1_global_argo_rwlock))
#define LOCKING_Write_rings_L2(d) \
((LOCKING_Read_L1 && rw_is_write_locked(&(d)->argo->rings_L2_rwlock)) || \
LOCKING_Write_L1)
/*
* Skip checking LOCKING_Write_rings_L2(d) within this LOCKING_Read_rings_L2
* definition because the first clause that is testing R(L1) && R(L2) will also
* return true if R(L1) && W(L2) is true, because of the way that rw_is_locked
* behaves. This results in a slightly shorter and faster implementation.
*/
#define LOCKING_Read_rings_L2(d) \
((LOCKING_Read_L1 && rw_is_locked(&(d)->argo->rings_L2_rwlock)) || \
LOCKING_Write_L1)
/*
* Skip checking LOCKING_Write_L1 within this LOCKING_L3 definition because
* LOCKING_Write_rings_L2(d) will return true for that condition.
*/
#define LOCKING_L3(d, r) \
((LOCKING_Read_L1 && rw_is_locked(&(d)->argo->rings_L2_rwlock) \
&& spin_is_locked(&(r)->L3_lock)) || LOCKING_Write_rings_L2(d))
#define LOCKING_send_L2(d) \
((LOCKING_Read_L1 && spin_is_locked(&(d)->argo->send_L2_lock)) || \
LOCKING_Write_L1)
#define ARGO_DEBUG 0
#define argo_dprintk(fmt, args...) \
do { \
if ( ARGO_DEBUG ) \
printk(XENLOG_DEBUG "argo: " fmt, ##args); \
} while ( 0 )
/*
* This hash function is used to distribute rings within the per-domain
* hash tables (d->argo->ring_hash and d->argo_send_hash). The hash table
* will provide a struct if a match is found with a 'argo_ring_id' key:
* ie. the key is a (domain id, argo port, partner domain id) tuple.
* The algorithm approximates the string hashing function 'djb2'.
*/
static unsigned int
hash_index(const struct argo_ring_id *id)
{
unsigned int hash = 5381; /* prime constant from djb2 */
/* For each input: hash = hash * 33 + */
hash = ((hash << 5) + hash) + (id->aport & 0xff);
hash = ((hash << 5) + hash) + ((id->aport >> 8) & 0xff);
hash = ((hash << 5) + hash) + ((id->aport >> 16) & 0xff);
hash = ((hash << 5) + hash) + ((id->aport >> 24) & 0xff);
hash = ((hash << 5) + hash) + (id->domain_id & 0xff);
hash = ((hash << 5) + hash) + ((id->domain_id >> 8) & 0xff);
hash = ((hash << 5) + hash) + (id->partner_id & 0xff);
hash = ((hash << 5) + hash) + ((id->partner_id >> 8) & 0xff);
/*
* Since ARGO_HASHTABLE_SIZE is small, use higher-order bits of the
* hash to contribute to the lower-order bits before masking off.
*/
return (hash ^ (hash >> 15)) & (ARGO_HASHTABLE_SIZE - 1);
}
static struct argo_ring_info *
find_ring_info(const struct domain *d, const struct argo_ring_id *id)
{
struct argo_ring_info *ring_info;
const struct list_head *bucket;
ASSERT(LOCKING_Read_rings_L2(d));
/* List is not modified here. Search and return the match if found. */
bucket = &d->argo->ring_hash[hash_index(id)];
list_for_each_entry(ring_info, bucket, node)
{
const struct argo_ring_id *cmpid = &ring_info->id;
if ( cmpid->aport == id->aport &&
cmpid->domain_id == id->domain_id &&
cmpid->partner_id == id->partner_id )
{
argo_dprintk("found ring_info for ring(%u:%x %u)\n",
id->domain_id, id->aport, id->partner_id);
return ring_info;
}
}
argo_dprintk("no ring_info for ring(%u:%x %u)\n",
id->domain_id, id->aport, id->partner_id);
return NULL;
}
static struct argo_ring_info *
find_ring_info_by_match(const struct domain *d, xen_argo_port_t aport,
domid_t partner_id)
{
struct argo_ring_id id;
struct argo_ring_info *ring_info;
ASSERT(LOCKING_Read_rings_L2(d));
id.aport = aport;
id.domain_id = d->domain_id;
id.partner_id = partner_id;
ring_info = find_ring_info(d, &id);
if ( ring_info )
return ring_info;
id.partner_id = XEN_ARGO_DOMID_ANY;
return find_ring_info(d, &id);
}
static struct argo_send_info *
find_send_info(const struct domain *d, const struct argo_ring_id *id)
{
struct argo_send_info *send_info;
const struct list_head *bucket;
ASSERT(LOCKING_send_L2(d));
/* List is not modified here. Search and return the match if found. */
bucket = &d->argo->send_hash[hash_index(id)];
list_for_each_entry(send_info, bucket, node)
{
const struct argo_ring_id *cmpid = &send_info->id;
if ( cmpid->aport == id->aport &&
cmpid->domain_id == id->domain_id &&
cmpid->partner_id == id->partner_id )
{
argo_dprintk("found send_info for ring(%u:%x %u)\n",
id->domain_id, id->aport, id->partner_id);
return send_info;
}
}
argo_dprintk("no send_info for ring(%u:%x %u)\n",
id->domain_id, id->aport, id->partner_id);
return NULL;
}
static void
signal_domain(struct domain *d)
{
argo_dprintk("signalling domid:%u\n", d->domain_id);
send_guest_global_virq(d, VIRQ_ARGO);
}
static void
signal_domid(domid_t domain_id)
{
struct domain *d = rcu_lock_domain_by_id(domain_id);
if ( !d )
return;
signal_domain(d);
rcu_unlock_domain(d);
}
static void
ring_unmap(const struct domain *d, struct argo_ring_info *ring_info)
{
unsigned int i;
ASSERT(LOCKING_L3(d, ring_info));
if ( !ring_info->mfn_mapping )
return;
ASSERT(!ring_info->nmfns || ring_info->mfns);
for ( i = 0; i < ring_info->nmfns; i++ )
{
if ( !ring_info->mfn_mapping[i] )
continue;
ASSERT(!mfn_eq(ring_info->mfns[i], INVALID_MFN));
argo_dprintk(XENLOG_ERR "argo: unmapping page %"PRI_mfn" from %p\n",
mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
unmap_domain_page_global(ring_info->mfn_mapping[i]);
ring_info->mfn_mapping[i] = NULL;
}
}
static int
ring_map_page(const struct domain *d, struct argo_ring_info *ring_info,
unsigned int i, void **out_ptr)
{
ASSERT(LOCKING_L3(d, ring_info));
/*
* FIXME: Investigate using vmap to create a single contiguous virtual
* address space mapping of the ring instead of using the array of single
* page mappings.
* Affects logic in memcpy_to_guest_ring, the mfn_mapping array data
* structure, and places where ring mappings are added or removed.
*/
if ( i >= ring_info->nmfns )
{
gprintk(XENLOG_ERR,
"argo: ring (vm%u:%x vm%u) %p attempted to map page %u of %u\n",
ring_info->id.domain_id, ring_info->id.aport,
ring_info->id.partner_id, ring_info, i, ring_info->nmfns);
return -ENOMEM;
}
i = array_index_nospec(i, ring_info->nmfns);
if ( !ring_info->mfns || !ring_info->mfn_mapping )
{
ASSERT_UNREACHABLE();
ring_info->len = 0;
return -ENOMEM;
}
if ( !ring_info->mfn_mapping[i] )
{
ring_info->mfn_mapping[i] = map_domain_page_global(ring_info->mfns[i]);
if ( !ring_info->mfn_mapping[i] )
{
gprintk(XENLOG_ERR, "argo: ring (vm%u:%x vm%u) %p attempted to map "
"page %u of %u\n",
ring_info->id.domain_id, ring_info->id.aport,
ring_info->id.partner_id, ring_info, i, ring_info->nmfns);
return -ENOMEM;
}
argo_dprintk("mapping page %"PRI_mfn" to %p\n",
mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
}
if ( out_ptr )
*out_ptr = ring_info->mfn_mapping[i];
return 0;
}
static void
update_tx_ptr(const struct domain *d, struct argo_ring_info *ring_info,
uint32_t tx_ptr)
{
xen_argo_ring_t *ringp;
ASSERT(LOCKING_L3(d, ring_info));
ASSERT(ring_info->mfn_mapping[0]);
ring_info->tx_ptr = tx_ptr;
ringp = ring_info->mfn_mapping[0];
write_atomic(&ringp->tx_ptr, tx_ptr);
smp_wmb();
}
static int
memcpy_to_guest_ring(const struct domain *d, struct argo_ring_info *ring_info,
unsigned int offset,
const void *src, XEN_GUEST_HANDLE(uint8) src_hnd,
unsigned int len)
{
unsigned int mfns_index = offset >> PAGE_SHIFT;
void *dst;
int ret;
unsigned int src_offset = 0;
ASSERT(LOCKING_L3(d, ring_info));
offset &= ~PAGE_MASK;
if ( len + offset > XEN_ARGO_MAX_RING_SIZE )
return -EFAULT;
while ( len )
{
unsigned int head_len = (offset + len) > PAGE_SIZE ? PAGE_SIZE - offset
: len;
ret = ring_map_page(d, ring_info, mfns_index, &dst);
if ( ret )
return ret;
if ( src )
{
memcpy(dst + offset, src + src_offset, head_len);
src_offset += head_len;
}
else
{
if ( copy_from_guest(dst + offset, src_hnd, head_len) )
return -EFAULT;
guest_handle_add_offset(src_hnd, head_len);
}
mfns_index++;
len -= head_len;
offset = 0;
}
return 0;
}
/*
* Use this with caution: rx_ptr is under guest control and may be bogus.
* See get_sanitized_ring for a safer alternative.
*/
static int
get_rx_ptr(const struct domain *d, struct argo_ring_info *ring_info,
uint32_t *rx_ptr)
{
void *src;
xen_argo_ring_t *ringp;
int ret;
ASSERT(LOCKING_L3(d, ring_info));
if ( !ring_info->nmfns || ring_info->nmfns < NPAGES_RING(ring_info->len) )
return -EINVAL;
ret = ring_map_page(d, ring_info, 0, &src);
if ( ret )
return ret;
ringp = (xen_argo_ring_t *)src;
*rx_ptr = read_atomic(&ringp->rx_ptr);
return 0;
}
/*
* get_sanitized_ring creates a modified copy of the ring pointers where
* the rx_ptr is rounded up to ensure it is aligned, and then ring
* wrap is handled. Simplifies safe use of the rx_ptr for available
* space calculation.
*/
static int
get_sanitized_ring(const struct domain *d, xen_argo_ring_t *ring,
struct argo_ring_info *ring_info)
{
uint32_t rx_ptr;
int ret;
ASSERT(LOCKING_L3(d, ring_info));
ret = get_rx_ptr(d, ring_info, &rx_ptr);
if ( ret )
return ret;
ring->tx_ptr = ring_info->tx_ptr;
rx_ptr = ROUNDUP_MESSAGE(rx_ptr);
if ( rx_ptr >= ring_info->len )
rx_ptr = 0;
ring->rx_ptr = rx_ptr;
return 0;
}
static unsigned int
ringbuf_payload_space(const struct domain *d, struct argo_ring_info *ring_info)
{
xen_argo_ring_t ring;
unsigned int len;
int ret;
ASSERT(LOCKING_L3(d, ring_info));
len = ring_info->len;
if ( !len )
return 0;
if ( get_sanitized_ring(d, &ring, ring_info) )
return 0;
argo_dprintk("sanitized ringbuf_payload_space: tx_ptr=%u rx_ptr=%u\n",
ring.tx_ptr, ring.rx_ptr);
/*
* rx_ptr == tx_ptr means that the ring has been emptied.
* See message size checking logic in the entry to ringbuf_insert which
* ensures that there is always one message slot of size ROUNDUP_MESSAGE(1)
* left available, preventing a ring from being entirely filled.
* This ensures that matching ring indexes always indicate an empty ring
* and never a full one.
*/
ret = ring.rx_ptr - ring.tx_ptr;
if ( ret <= 0 )
ret += len;
/*
* In a sanitized ring, we can rely on:
* (rx_ptr < ring_info->len) &&
* (tx_ptr < ring_info->len) &&
* (ring_info->len <= XEN_ARGO_MAX_RING_SIZE)
*
* and since: XEN_ARGO_MAX_RING_SIZE < INT32_MAX
* therefore right here: ret < INT32_MAX
* and we are safe to return it as a unsigned value from this function.
* The subtractions below cannot increase its value.
*/
/*
* The maximum size payload for a message that will be accepted is:
* (the available space between the ring indexes)
* minus (space for a message header)
* minus (space for one message slot)
* since ringbuf_insert requires that one message slot be left
* unfilled, to avoid filling the ring to capacity and confusing a full
* ring with an empty one.
* Since the ring indexes are sanitized, the value in ret is aligned, so
* the simple subtraction here works to return the aligned value needed:
*/
ret -= sizeof(struct xen_argo_ring_message_header);
ret -= ROUNDUP_MESSAGE(1);
return (ret < 0) ? 0 : ret;
}
/*
* iov_count returns its count on success via an out variable to avoid
* potential for a negative return value to be used incorrectly
* (eg. coerced into an unsigned variable resulting in a large incorrect value)
*/
static int
iov_count(const xen_argo_iov_t *piov, unsigned int niov,
unsigned int *count)
{
unsigned int sum_iov_lens = 0;
if ( niov > XEN_ARGO_MAXIOV )
return -EINVAL;
for ( ; niov--; piov++ )
{
/* valid iovs must have the padding field set to zero */
if ( piov->pad )
{
argo_dprintk("invalid iov: padding is not zero\n");
return -EINVAL;
}
/* check each to protect sum against integer overflow */
if ( piov->iov_len > MAX_ARGO_MESSAGE_SIZE )
{
argo_dprintk("invalid iov_len: too big (%u)>%llu\n",
piov->iov_len, MAX_ARGO_MESSAGE_SIZE);
return -EINVAL;
}
sum_iov_lens += piov->iov_len;
/*
* Again protect sum from integer overflow
* and ensure total msg size will be within bounds.
*/
if ( sum_iov_lens > MAX_ARGO_MESSAGE_SIZE )
{
argo_dprintk("invalid iov series: total message too big\n");
return -EMSGSIZE;
}
}
*count = sum_iov_lens;
return 0;
}
static int
ringbuf_insert(const struct domain *d, struct argo_ring_info *ring_info,
const struct argo_ring_id *src_id, xen_argo_iov_t *iovs,
unsigned int niov, uint32_t message_type, unsigned int len)
{
xen_argo_ring_t ring;
struct xen_argo_ring_message_header mh = { };
int sp, ret;
xen_argo_iov_t *piov;
XEN_GUEST_HANDLE(uint8) NULL_hnd = { };
ASSERT(LOCKING_L3(d, ring_info));
/*
* Enforced below: no more than 'len' bytes of guest data
* (plus the message header) will be sent in this operation.
*/
/*
* Upper bound check the message len against the ring size.
* The message must not fill the ring; there must be at least one slot
* remaining so we can distinguish a full ring from an empty one.
* iov_count has already verified: len <= MAX_ARGO_MESSAGE_SIZE.
*/
if ( ring_info->len <= (sizeof(struct xen_argo_ring_message_header) +
ROUNDUP_MESSAGE(len)) )
return -EMSGSIZE;
ret = get_sanitized_ring(d, &ring, ring_info);
if ( ret )
return ret;
argo_dprintk("ring.tx_ptr=%u ring.rx_ptr=%u ring len=%u"
" ring_info->tx_ptr=%u\n",
ring.tx_ptr, ring.rx_ptr, ring_info->len, ring_info->tx_ptr);
if ( ring.rx_ptr == ring.tx_ptr )
sp = ring_info->len;
else
{
sp = ring.rx_ptr - ring.tx_ptr;
if ( sp < 0 )
sp += ring_info->len;
}
/*
* Size bounds check against currently available space in the ring.
* Again: the message must not fill the ring leaving no space remaining.
*/
if ( (ROUNDUP_MESSAGE(len) +
sizeof(struct xen_argo_ring_message_header)) >= sp )
{
argo_dprintk("EAGAIN\n");
return -EAGAIN;
}
mh.len = len + sizeof(struct xen_argo_ring_message_header);
mh.source.aport = src_id->aport;
mh.source.domain_id = src_id->domain_id;
mh.message_type = message_type;
/*
* For this copy to the guest ring, tx_ptr is always 16-byte aligned
* and the message header is 16 bytes long.
*/
BUILD_BUG_ON(
sizeof(struct xen_argo_ring_message_header) != ROUNDUP_MESSAGE(1));
/*
* First data write into the destination ring: fixed size, message header.
* This cannot overrun because the available free space (value in 'sp')
* is checked above and must be at least this size.
*/
ret = memcpy_to_guest_ring(d, ring_info,
ring.tx_ptr + sizeof(xen_argo_ring_t),
&mh, NULL_hnd, sizeof(mh));
if ( ret )
{
gprintk(XENLOG_ERR,
"argo: failed to write message header to ring (vm%u:%x vm%u)\n",
ring_info->id.domain_id, ring_info->id.aport,
ring_info->id.partner_id);
return ret;
}
ring.tx_ptr += sizeof(mh);
if ( ring.tx_ptr == ring_info->len )
ring.tx_ptr = 0;
for ( piov = iovs; niov--; piov++ )
{
XEN_GUEST_HANDLE(uint8) buf_hnd = piov->iov_hnd;
unsigned int iov_len = piov->iov_len;
/* If no data is provided in this iov, moan and skip on to the next */
if ( !iov_len )
{
gprintk(XENLOG_WARNING,
"argo: no data iov_len=0 iov_hnd=%p ring (vm%u:%x vm%u)\n",
buf_hnd.p, ring_info->id.domain_id, ring_info->id.aport,
ring_info->id.partner_id);
continue;
}
if ( unlikely(!guest_handle_okay(buf_hnd, iov_len)) )
{
gprintk(XENLOG_ERR,
"argo: bad iov handle [%p, %u] (vm%u:%x vm%u)\n",
buf_hnd.p, iov_len,
ring_info->id.domain_id, ring_info->id.aport,
ring_info->id.partner_id);
return -EFAULT;
}
sp = ring_info->len - ring.tx_ptr;
/* Check: iov data size versus free space at the tail of the ring */
if ( iov_len > sp )
{
/*
* Second possible data write: ring-tail-wrap-write.
* Populate the ring tail and update the internal tx_ptr to handle
* wrapping at the end of ring.
* Size of data written here: sp
* which is the exact full amount of free space available at the
* tail of the ring, so this cannot overrun.
*/
ret = memcpy_to_guest_ring(d, ring_info,
ring.tx_ptr + sizeof(xen_argo_ring_t),
NULL, buf_hnd, sp);
if ( ret )
{
gprintk(XENLOG_ERR,
"argo: failed to copy {%p, %d} (vm%u:%x vm%u)\n",
buf_hnd.p, sp,
ring_info->id.domain_id, ring_info->id.aport,
ring_info->id.partner_id);
return ret;
}
ring.tx_ptr = 0;
iov_len -= sp;
guest_handle_add_offset(buf_hnd, sp);
ASSERT(iov_len <= ring_info->len);
}
/*
* Third possible data write: all data remaining for this iov.
* Size of data written here: iov_len
*
* Case 1: if the ring-tail-wrap-write above was performed, then
* iov_len has been decreased by 'sp' and ring.tx_ptr is zero.
*
* We know from checking the result of iov_count:
* len + sizeof(message_header) <= ring_info->len
* We also know that len is the total of summing all iov_lens, so:
* iov_len <= len
* so by transitivity:
* iov_len <= len <= (ring_info->len - sizeof(msgheader))
* and therefore:
* (iov_len + sizeof(msgheader) <= ring_info->len) &&
* (ring.tx_ptr == 0)
* so this write cannot overrun here.
*
* Case 2: ring-tail-wrap-write above was not performed
* -> so iov_len is the guest-supplied value and: (iov_len <= sp)
* ie. less than available space at the tail of the ring:
* so this write cannot overrun.
*/
ret = memcpy_to_guest_ring(d, ring_info,
ring.tx_ptr + sizeof(xen_argo_ring_t),
NULL, buf_hnd, iov_len);
if ( ret )
{
gprintk(XENLOG_ERR,
"argo: failed to copy [%p, %u] (vm%u:%x vm%u)\n",
buf_hnd.p, iov_len, ring_info->id.domain_id,
ring_info->id.aport, ring_info->id.partner_id);
return ret;
}
ring.tx_ptr += iov_len;
if ( ring.tx_ptr == ring_info->len )
ring.tx_ptr = 0;
}
/*
* Finished writing data from all iovs into the ring: now need to round up
* tx_ptr to align to the next message boundary, and then wrap if necessary.
*/
ring.tx_ptr = ROUNDUP_MESSAGE(ring.tx_ptr);
if ( ring.tx_ptr >= ring_info->len )
ring.tx_ptr -= ring_info->len;
update_tx_ptr(d, ring_info, ring.tx_ptr);
/*
* At this point (and also on an error exit paths from this function) it is
* possible to unmap the ring_info, ie:
* ring_unmap(d, ring_info);
* but performance should be improved by not doing so, and retaining
* the mapping.
* An XSM policy control over level of confidentiality required
* versus performance cost could be added to decide that here.
*/
return ret;
}
static void
wildcard_pending_list_remove(domid_t domain_id, struct pending_ent *ent)
{
struct domain *d = rcu_lock_domain_by_id(domain_id);
if ( !d )
return;
ASSERT(LOCKING_Read_L1);
if ( d->argo )
{
spin_lock(&d->argo->wildcard_L2_lock);
list_del(&ent->wildcard_node);
spin_unlock(&d->argo->wildcard_L2_lock);
}
rcu_unlock_domain(d);
}
static void
wildcard_pending_list_insert(domid_t domain_id, struct pending_ent *ent)
{
struct domain *d = rcu_lock_domain_by_id(domain_id);
if ( !d )
return;
ASSERT(LOCKING_Read_L1);
if ( d->argo )
{
spin_lock(&d->argo->wildcard_L2_lock);
list_add(&ent->wildcard_node, &d->argo->wildcard_pend_list);
spin_unlock(&d->argo->wildcard_L2_lock);
}
rcu_unlock_domain(d);
}
static void
pending_remove_all(const struct domain *d, struct argo_ring_info *ring_info)
{
struct pending_ent *ent;
ASSERT(LOCKING_L3(d, ring_info));
/* Delete all pending notifications from this ring's list. */
while ( (ent = list_first_entry_or_null(&ring_info->pending,
struct pending_ent, node)) )
{
/* For wildcard rings, remove each from their wildcard list too. */
if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
wildcard_pending_list_remove(ent->domain_id, ent);
list_del(&ent->node);
xfree(ent);
}
ring_info->npending = 0;
}
static void
pending_notify(struct list_head *to_notify)
{
struct pending_ent *ent;
ASSERT(LOCKING_Read_L1);
/* Sending signals for all ents in this list, draining until it is empty. */
while ( (ent = list_first_entry_or_null(to_notify, struct pending_ent,
node)) )
{
list_del(&ent->node);
signal_domid(ent->domain_id);
xfree(ent);
}
}
static void
pending_find(const struct domain *d, struct argo_ring_info *ring_info,
unsigned int payload_space, struct list_head *to_notify)
{
struct pending_ent *ent, *next;
ASSERT(LOCKING_Read_rings_L2(d));
/*
* TODO: Current policy here is to signal _all_ of the waiting domains
* interested in sending a message of size less than payload_space.
*
* This is likely to be suboptimal, since once one of them has added
* their message to the ring, there may well be insufficient room
* available for any of the others to transmit, meaning that they were
* woken in vain, which created extra work just to requeue their wait.
*
* Retain this simple policy for now since it at least avoids starving a
* domain of available space notifications because of a policy that only
* notified other domains instead. Improvement may be possible;
* investigation required.
*/
spin_lock(&ring_info->L3_lock);
/* Remove matching ents from the ring list, and add them to "to_notify" */
list_for_each_entry_safe(ent, next, &ring_info->pending, node)
{
if ( payload_space >= ent->len )
{
if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
wildcard_pending_list_remove(ent->domain_id, ent);
list_del(&ent->node);
ring_info->npending--;
list_add(&ent->node, to_notify);
}
}
spin_unlock(&ring_info->L3_lock);
}
static int
pending_queue(const struct domain *d, struct argo_ring_info *ring_info,
domid_t src_id, unsigned int len)
{
struct pending_ent *ent;
ASSERT(LOCKING_L3(d, ring_info));
if ( ring_info->npending >= MAX_PENDING_PER_RING )
return -EBUSY;
ent = xmalloc(struct pending_ent);
if ( !ent )
return -ENOMEM;
ent->len = len;
ent->domain_id = src_id;
ent->ring_info = ring_info;
if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
wildcard_pending_list_insert(src_id, ent);
list_add(&ent->node, &ring_info->pending);
ring_info->npending++;
return 0;
}
static int
pending_requeue(const struct domain *d, struct argo_ring_info *ring_info,
domid_t src_id, unsigned int len)
{
struct pending_ent *ent;
ASSERT(LOCKING_L3(d, ring_info));
/* List structure is not modified here. Update len in a match if found. */
list_for_each_entry(ent, &ring_info->pending, node)
{
if ( ent->domain_id == src_id )
{
/*
* Reuse an existing queue entry for a notification rather than add
* another. If the existing entry is waiting for a smaller size than
* the current message then adjust the record to wait for the
* current (larger) size to be available before triggering a
* notification.
* This assists the waiting sender by ensuring that whenever a
* notification is triggered, there is sufficient space available
* for (at least) any one of the messages awaiting transmission.
*/
if ( ent->len < len )
ent->len = len;
return 0;
}
}
return pending_queue(d, ring_info, src_id, len);
}
static void
pending_cancel(const struct domain *d, struct argo_ring_info *ring_info,
domid_t src_id)
{
struct pending_ent *ent, *next;
ASSERT(LOCKING_L3(d, ring_info));
/* Remove all ents where domain_id matches src_id from the ring's list. */
list_for_each_entry_safe(ent, next, &ring_info->pending, node)
{
if ( ent->domain_id == src_id )
{
/* For wildcard rings, remove each from their wildcard list too. */
if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
wildcard_pending_list_remove(ent->domain_id, ent);
list_del(&ent->node);
xfree(ent);
ring_info->npending--;
}
}
}
static void
wildcard_rings_pending_remove(struct domain *d)
{
struct pending_ent *ent;
ASSERT(LOCKING_Write_L1);
/* Delete all pending signals to the domain about wildcard rings. */
while ( (ent = list_first_entry_or_null(&d->argo->wildcard_pend_list,
struct pending_ent, node)) )
{
/*
* The ent->node deleted here, and the npending value decreased,
* belong to the ring_info of another domain, which is why this
* function requires holding W(L1):
* it implies the L3 lock that protects that ring_info struct.
*/
ent->ring_info->npending--;
list_del(&ent->node);
list_del(&ent->wildcard_node);
xfree(ent);
}
}
static void
ring_remove_mfns(const struct domain *d, struct argo_ring_info *ring_info)
{
unsigned int i;
ASSERT(LOCKING_Write_rings_L2(d));
if ( !ring_info->mfns )
return;
if ( !ring_info->mfn_mapping )
{
ASSERT_UNREACHABLE();
return;
}
ring_unmap(d, ring_info);
for ( i = 0; i < ring_info->nmfns; i++ )
if ( !mfn_eq(ring_info->mfns[i], INVALID_MFN) )
put_page_and_type(mfn_to_page(ring_info->mfns[i]));
ring_info->nmfns = 0;
XFREE(ring_info->mfns);
XFREE(ring_info->mfn_mapping);
}
static void
ring_remove_info(const struct domain *d, struct argo_ring_info *ring_info)
{
ASSERT(LOCKING_Write_rings_L2(d));
pending_remove_all(d, ring_info);
list_del(&ring_info->node);
ring_remove_mfns(d, ring_info);
xfree(ring_info);
}
static void
domain_rings_remove_all(struct domain *d)
{
unsigned int i;
ASSERT(LOCKING_Write_rings_L2(d));
for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
{
struct argo_ring_info *ring_info;
struct list_head *bucket = &d->argo->ring_hash[i];
while ( (ring_info = list_first_entry_or_null(bucket,
struct argo_ring_info,
node)) )
ring_remove_info(d, ring_info);
}
d->argo->ring_count = 0;
}
/*
* Tear down all rings of other domains where src_d domain is the partner.
* (ie. it is the single domain that can send to those rings.)
* This will also cancel any pending notifications about those rings.
*/
static void
partner_rings_remove(struct domain *src_d)
{
unsigned int i;
ASSERT(LOCKING_Write_L1);
for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
{
struct argo_send_info *send_info;
struct list_head *bucket = &src_d->argo->send_hash[i];
/* Remove all ents from the send list. Take each off their ring list. */
while ( (send_info = list_first_entry_or_null(bucket,
struct argo_send_info,
node)) )
{
struct domain *dst_d = rcu_lock_domain_by_id(send_info->id.domain_id);
if ( dst_d && dst_d->argo )
{
struct argo_ring_info *ring_info =
find_ring_info(dst_d, &send_info->id);
if ( ring_info )
{
ring_remove_info(dst_d, ring_info);
dst_d->argo->ring_count--;
}
else
ASSERT_UNREACHABLE();
}
else
argo_dprintk("%pd has entry for stale partner d%u\n",
src_d, send_info->id.domain_id);
if ( dst_d )
rcu_unlock_domain(dst_d);
list_del(&send_info->node);
xfree(send_info);
}
}
}
static int
fill_ring_data(const struct domain *currd,
XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) data_ent_hnd)
{
xen_argo_ring_data_ent_t ent;
struct domain *dst_d;
struct argo_ring_info *ring_info;
int ret = 0;
ASSERT(currd == current->domain);
ASSERT(LOCKING_Read_L1);
if ( __copy_from_guest(&ent, data_ent_hnd, 1) )
return -EFAULT;
argo_dprintk("fill_ring_data: ent.ring.domain=%u,ent.ring.aport=%x\n",
ent.ring.domain_id, ent.ring.aport);
ent.flags = 0;
dst_d = rcu_lock_domain_by_id(ent.ring.domain_id);
if ( !dst_d || !dst_d->argo )
goto out;
/*
* Don't supply information about rings that a guest is not
* allowed to send to.
*/
ret = xsm_argo_send(currd, dst_d);
if ( ret )
goto out;
read_lock(&dst_d->argo->rings_L2_rwlock);
ring_info = find_ring_info_by_match(dst_d, ent.ring.aport,
currd->domain_id);
if ( ring_info )
{
unsigned int space_avail;
ent.flags |= XEN_ARGO_RING_EXISTS;
spin_lock(&ring_info->L3_lock);
ent.max_message_size = ring_info->len -
sizeof(struct xen_argo_ring_message_header) -
ROUNDUP_MESSAGE(1);
if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
ent.flags |= XEN_ARGO_RING_SHARED;
space_avail = ringbuf_payload_space(dst_d, ring_info);
argo_dprintk("fill_ring_data: aport=%x space_avail=%u"
" space_wanted=%u\n",
ring_info->id.aport, space_avail, ent.space_required);
/* Do not queue a notification for an unachievable size */
if ( ent.space_required > ent.max_message_size )
ent.flags |= XEN_ARGO_RING_EMSGSIZE;
else if ( space_avail >= ent.space_required )
{
pending_cancel(dst_d, ring_info, currd->domain_id);
ent.flags |= XEN_ARGO_RING_SUFFICIENT;
}
else
{
ret = pending_requeue(dst_d, ring_info, currd->domain_id,
ent.space_required);
if ( ret == -EBUSY )
{
/*
* Too many other domains are already awaiting notification
* about available space on this ring. Indicate this state via
* flag. No need to return an error to the caller; allow the
* processing of queries about other rings to continue.
*/
ent.flags |= XEN_ARGO_RING_EBUSY;
ret = 0;
}
}
spin_unlock(&ring_info->L3_lock);
if ( space_avail == ent.max_message_size )
ent.flags |= XEN_ARGO_RING_EMPTY;
}
read_unlock(&dst_d->argo->rings_L2_rwlock);
out:
if ( dst_d )
rcu_unlock_domain(dst_d);
if ( !ret && (__copy_field_to_guest(data_ent_hnd, &ent, flags) ||
__copy_field_to_guest(data_ent_hnd, &ent, max_message_size)) )
return -EFAULT;
return ret;
}
static int
find_ring_mfn(struct domain *d, gfn_t gfn, mfn_t *mfn)
{
struct page_info *page;
p2m_type_t p2mt;
int ret;
ret = check_get_page_from_gfn(d, gfn, false, &p2mt, &page);
if ( unlikely(ret) )
return ret;
*mfn = page_to_mfn(page);
if ( !mfn_valid(*mfn) )
ret = -EINVAL;
#ifdef CONFIG_X86
else if ( p2mt == p2m_ram_logdirty )
ret = -EAGAIN;
#endif
else if ( (p2mt != p2m_ram_rw) ||
!get_page_and_type(page, d, PGT_writable_page) )
ret = -EINVAL;
put_page(page);
return ret;
}
static int
find_ring_mfns(struct domain *d, struct argo_ring_info *ring_info,
const unsigned int npage,
XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd,
const unsigned int len)
{
unsigned int i;
int ret = 0;
mfn_t *mfns;
void **mfn_mapping;
ASSERT(LOCKING_Write_rings_L2(d));
if ( ring_info->mfns )
{
/* Ring already existed: drop the previous mapping. */
argo_dprintk("argo: vm%u re-register existing ring "
"(vm%u:%x vm%u) clears mapping\n",
d->domain_id, ring_info->id.domain_id,
ring_info->id.aport, ring_info->id.partner_id);
ring_remove_mfns(d, ring_info);
ASSERT(!ring_info->mfns);
}
mfns = xmalloc_array(mfn_t, npage);
if ( !mfns )
return -ENOMEM;
for ( i = 0; i < npage; i++ )
mfns[i] = INVALID_MFN;
mfn_mapping = xzalloc_array(void *, npage);
if ( !mfn_mapping )
{
xfree(mfns);
return -ENOMEM;
}
ring_info->mfns = mfns;
ring_info->mfn_mapping = mfn_mapping;
for ( i = 0; i < npage; i++ )
{
mfn_t mfn;
xen_argo_gfn_t argo_gfn;
ret = __copy_from_guest_offset(&argo_gfn, gfn_hnd, i, 1) ? -EFAULT : 0;
if ( ret )
break;
ret = find_ring_mfn(d, _gfn(argo_gfn), &mfn);
if ( ret )
{
gprintk(XENLOG_ERR, "argo: vm%u: invalid gfn %"PRI_gfn" "
"r:(vm%u:%x vm%u) %p %u/%u\n",
d->domain_id, gfn_x(_gfn(argo_gfn)),
ring_info->id.domain_id, ring_info->id.aport,
ring_info->id.partner_id, ring_info, i, npage);
break;
}
ring_info->mfns[i] = mfn;
argo_dprintk("%u: %"PRI_gfn" -> %"PRI_mfn"\n",
i, gfn_x(_gfn(argo_gfn)), mfn_x(ring_info->mfns[i]));
}
ring_info->nmfns = i;
if ( ret )
ring_remove_mfns(d, ring_info);
else
{
ASSERT(ring_info->nmfns == NPAGES_RING(len));
argo_dprintk("argo: vm%u ring (vm%u:%x vm%u) %p "
"mfn_mapping %p len %u nmfns %u\n",
d->domain_id, ring_info->id.domain_id,
ring_info->id.aport, ring_info->id.partner_id, ring_info,
ring_info->mfn_mapping, ring_info->len, ring_info->nmfns);
}
return ret;
}
static long
unregister_ring(struct domain *currd,
XEN_GUEST_HANDLE_PARAM(xen_argo_unregister_ring_t) unreg_hnd)
{
xen_argo_unregister_ring_t unreg;
struct argo_ring_id ring_id;
struct argo_ring_info *ring_info = NULL;
struct argo_send_info *send_info = NULL;
struct domain *dst_d = NULL;
ASSERT(currd == current->domain);
if ( copy_from_guest(&unreg, unreg_hnd, 1) )
return -EFAULT;
if ( unreg.pad )
return -EINVAL;
ring_id.partner_id = unreg.partner_id;
ring_id.aport = unreg.aport;
ring_id.domain_id = currd->domain_id;
read_lock(&L1_global_argo_rwlock);
if ( unlikely(!currd->argo) )
{
read_unlock(&L1_global_argo_rwlock);
return -ENODEV;
}
write_lock(&currd->argo->rings_L2_rwlock);
ring_info = find_ring_info(currd, &ring_id);
if ( !ring_info )
goto out;
ring_remove_info(currd, ring_info);
currd->argo->ring_count--;
if ( ring_id.partner_id == XEN_ARGO_DOMID_ANY )
goto out;
dst_d = rcu_lock_domain_by_id(ring_id.partner_id);
if ( !dst_d || !dst_d->argo )
{
ASSERT_UNREACHABLE();
goto out;
}
spin_lock(&dst_d->argo->send_L2_lock);
send_info = find_send_info(dst_d, &ring_id);
if ( send_info )
list_del(&send_info->node);
else
ASSERT_UNREACHABLE();
spin_unlock(&dst_d->argo->send_L2_lock);
out:
write_unlock(&currd->argo->rings_L2_rwlock);
read_unlock(&L1_global_argo_rwlock);
if ( dst_d )
rcu_unlock_domain(dst_d);
xfree(send_info);
if ( !ring_info )
{
argo_dprintk("unregister_ring: no ring_info found for ring(%u:%x %u)\n",
ring_id.domain_id, ring_id.aport, ring_id.partner_id);
return -ENOENT;
}
return 0;
}
static long
register_ring(struct domain *currd,
XEN_GUEST_HANDLE_PARAM(xen_argo_register_ring_t) reg_hnd,
XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd,
unsigned int npage, unsigned int flags)
{
xen_argo_register_ring_t reg;
struct argo_ring_id ring_id;
void *map_ringp;
xen_argo_ring_t *ringp;
struct argo_ring_info *ring_info, *new_ring_info = NULL;
struct argo_send_info *send_info = NULL;
struct domain *dst_d = NULL;
int ret = 0;
unsigned int private_tx_ptr;
ASSERT(currd == current->domain);
/* flags: reserve currently-undefined bits, require zero. */
if ( unlikely(flags & ~XEN_ARGO_REGISTER_FLAG_MASK) )
return -EINVAL;
if ( copy_from_guest(®, reg_hnd, 1) )
return -EFAULT;
/*
* A ring must be large enough to transmit messages, so requires space for:
* * 1 message header, plus
* * 1 payload slot (payload is always rounded to a multiple of 16 bytes)
* for the message payload to be written into, plus
* * 1 more slot, so that the ring cannot be filled to capacity with a
* single minimum-size message -- see the logic in ringbuf_insert --
* allowing for this ensures that there can be space remaining when a
* message is present.
* The above determines the minimum acceptable ring size.
*/
if ( (reg.len < (sizeof(struct xen_argo_ring_message_header)
+ ROUNDUP_MESSAGE(1) + ROUNDUP_MESSAGE(1))) ||
(reg.len > XEN_ARGO_MAX_RING_SIZE) ||
(reg.len != ROUNDUP_MESSAGE(reg.len)) ||
(NPAGES_RING(reg.len) != npage) ||
(reg.pad != 0) )
return -EINVAL;
ring_id.partner_id = reg.partner_id;
ring_id.aport = reg.aport;
ring_id.domain_id = currd->domain_id;
if ( reg.partner_id == XEN_ARGO_DOMID_ANY )
{
ret = opt_argo_mac_permissive ? xsm_argo_register_any_source(currd) :
-EPERM;
if ( ret )
return ret;
}
else
{
dst_d = rcu_lock_domain_by_id(reg.partner_id);
if ( !dst_d )
{
argo_dprintk("!dst_d, ESRCH\n");
return -ESRCH;
}
ret = xsm_argo_register_single_source(currd, dst_d);
if ( ret )
goto out;
send_info = xzalloc(struct argo_send_info);
if ( !send_info )
{
ret = -ENOMEM;
goto out;
}
send_info->id = ring_id;
}
/*
* Common case is that the ring doesn't already exist, so do the alloc here
* before picking up any locks.
*/
new_ring_info = xzalloc(struct argo_ring_info);
if ( !new_ring_info )
{
ret = -ENOMEM;
goto out;
}
read_lock(&L1_global_argo_rwlock);
if ( !currd->argo )
{
ret = -ENODEV;
goto out_unlock;
}
if ( dst_d && !dst_d->argo )
{
argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
ret = -ECONNREFUSED;
goto out_unlock;
}
write_lock(&currd->argo->rings_L2_rwlock);
if ( currd->argo->ring_count >= MAX_RINGS_PER_DOMAIN )
{
ret = -ENOSPC;
goto out_unlock2;
}
ring_info = find_ring_info(currd, &ring_id);
if ( !ring_info )
{
ring_info = new_ring_info;
new_ring_info = NULL;
spin_lock_init(&ring_info->L3_lock);
ring_info->id = ring_id;
INIT_LIST_HEAD(&ring_info->pending);
list_add(&ring_info->node,
&currd->argo->ring_hash[hash_index(&ring_info->id)]);
argo_dprintk("argo: vm%u registering ring (vm%u:%x vm%u)\n",
currd->domain_id, ring_id.domain_id, ring_id.aport,
ring_id.partner_id);
}
else if ( ring_info->len )
{
/*
* If the caller specified that the ring must not already exist,
* fail at attempt to add a completed ring which already exists.
*/
if ( flags & XEN_ARGO_REGISTER_FLAG_FAIL_EXIST )
{
gprintk(XENLOG_ERR, "argo: vm%u disallowed reregistration of "
"existing ring (vm%u:%x vm%u)\n",
currd->domain_id, ring_id.domain_id, ring_id.aport,
ring_id.partner_id);
ret = -EEXIST;
goto out_unlock2;
}
if ( ring_info->len != reg.len )
{
/*
* Change of ring size could result in entries on the pending
* notifications list that will never trigger.
* Simple blunt solution: disallow ring resize for now.
* TODO: investigate enabling ring resize.
*/
gprintk(XENLOG_ERR, "argo: vm%u attempted to change ring size "
"(vm%u:%x vm%u)\n",
currd->domain_id, ring_id.domain_id, ring_id.aport,
ring_id.partner_id);
/*
* Could return EINVAL here, but if the ring didn't already
* exist then the arguments would have been valid, so: EEXIST.
*/
ret = -EEXIST;
goto out_unlock2;
}
argo_dprintk("argo: vm%u re-registering existing ring (vm%u:%x vm%u)\n",
currd->domain_id, ring_id.domain_id, ring_id.aport,
ring_id.partner_id);
}
ret = find_ring_mfns(currd, ring_info, npage, gfn_hnd, reg.len);
if ( ret )
{
gprintk(XENLOG_ERR,
"argo: vm%u failed to find ring mfns (vm%u:%x vm%u)\n",
currd->domain_id, ring_id.domain_id, ring_id.aport,
ring_id.partner_id);
ring_remove_info(currd, ring_info);
goto out_unlock2;
}
/*
* The first page of the memory supplied for the ring has the xen_argo_ring
* structure at its head, which is where the ring indexes reside.
*/
ret = ring_map_page(currd, ring_info, 0, &map_ringp);
if ( ret )
{
gprintk(XENLOG_ERR,
"argo: vm%u failed to map ring mfn 0 (vm%u:%x vm%u)\n",
currd->domain_id, ring_id.domain_id, ring_id.aport,
ring_id.partner_id);
ring_remove_info(currd, ring_info);
goto out_unlock2;
}
ringp = map_ringp;
private_tx_ptr = read_atomic(&ringp->tx_ptr);
if ( (private_tx_ptr >= reg.len) ||
(ROUNDUP_MESSAGE(private_tx_ptr) != private_tx_ptr) )
{
/*
* Since the ring is a mess, attempt to flush the contents of it
* here by setting the tx_ptr to the next aligned message slot past
* the latest rx_ptr we have observed. Handle ring wrap correctly.
*/
private_tx_ptr = ROUNDUP_MESSAGE(read_atomic(&ringp->rx_ptr));
if ( private_tx_ptr >= reg.len )
private_tx_ptr = 0;
update_tx_ptr(currd, ring_info, private_tx_ptr);
}
ring_info->tx_ptr = private_tx_ptr;
ring_info->len = reg.len;
currd->argo->ring_count++;
if ( send_info )
{
spin_lock(&dst_d->argo->send_L2_lock);
list_add(&send_info->node,
&dst_d->argo->send_hash[hash_index(&send_info->id)]);
spin_unlock(&dst_d->argo->send_L2_lock);
}
out_unlock2:
write_unlock(&currd->argo->rings_L2_rwlock);
out_unlock:
read_unlock(&L1_global_argo_rwlock);
out:
if ( dst_d )
rcu_unlock_domain(dst_d);
if ( ret )
xfree(send_info);
xfree(new_ring_info);
return ret;
}
static void
notify_ring(const struct domain *d, struct argo_ring_info *ring_info,
struct list_head *to_notify)
{
unsigned int space;
ASSERT(LOCKING_Read_rings_L2(d));
spin_lock(&ring_info->L3_lock);
if ( ring_info->len )
space = ringbuf_payload_space(d, ring_info);
else
space = 0;
spin_unlock(&ring_info->L3_lock);
if ( space )
pending_find(d, ring_info, space, to_notify);
}
static void
notify_check_pending(struct domain *d)
{
unsigned int i;
LIST_HEAD(to_notify);
ASSERT(LOCKING_Read_L1);
read_lock(&d->argo->rings_L2_rwlock);
/* Walk all rings, call notify_ring on each to populate to_notify list */
for ( i = 0; i < ARGO_HASHTABLE_SIZE; i++ )
{
struct argo_ring_info *ring_info, *next;
struct list_head *bucket = &d->argo->ring_hash[i];
list_for_each_entry_safe(ring_info, next, bucket, node)
notify_ring(d, ring_info, &to_notify);
}
read_unlock(&d->argo->rings_L2_rwlock);
if ( !list_empty(&to_notify) )
pending_notify(&to_notify);
}
static long
notify(struct domain *currd,
XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd)
{
XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) ent_hnd;
xen_argo_ring_data_t ring_data;
int ret = 0;
ASSERT(currd == current->domain);
read_lock(&L1_global_argo_rwlock);
if ( !currd->argo )
{
argo_dprintk("!d->argo, ENODEV\n");
ret = -ENODEV;
goto out;
}
notify_check_pending(currd);
if ( guest_handle_is_null(ring_data_hnd) )
goto out;
ret = copy_from_guest(&ring_data, ring_data_hnd, 1) ? -EFAULT : 0;
if ( ret )
goto out;
if ( ring_data.nent > MAX_NOTIFY_COUNT )
{
gprintk(XENLOG_ERR, "argo: notify entry count(%u) exceeds max(%u)\n",
ring_data.nent, MAX_NOTIFY_COUNT);
ret = -EACCES;
goto out;
}
ent_hnd = guest_handle_for_field(ring_data_hnd,
xen_argo_ring_data_ent_t, data[0]);
if ( unlikely(!guest_handle_okay(ent_hnd, ring_data.nent)) )
{
ret = -EFAULT;
goto out;
}
while ( !ret && ring_data.nent-- )
{
ret = fill_ring_data(currd, ent_hnd);
guest_handle_add_offset(ent_hnd, 1);
}
out:
read_unlock(&L1_global_argo_rwlock);
return ret;
}
static long
sendv(struct domain *src_d, xen_argo_addr_t *src_addr,
const xen_argo_addr_t *dst_addr, xen_argo_iov_t *iovs, unsigned int niov,
uint32_t message_type)
{
struct domain *dst_d = NULL;
struct argo_ring_id src_id;
struct argo_ring_info *ring_info;
int ret = 0;
unsigned int len = 0;
argo_dprintk("sendv: (%u:%x)->(%u:%x) niov:%u type:%x\n",
src_addr->domain_id, src_addr->aport, dst_addr->domain_id,
dst_addr->aport, niov, message_type);
/* Check padding is zeroed. */
if ( unlikely(src_addr->pad || dst_addr->pad) )
return -EINVAL;
if ( src_addr->domain_id == XEN_ARGO_DOMID_ANY )
src_addr->domain_id = src_d->domain_id;
/* No domain is currently authorized to send on behalf of another */
if ( unlikely(src_addr->domain_id != src_d->domain_id) )
return -EPERM;
src_id.aport = src_addr->aport;
src_id.domain_id = src_d->domain_id;
src_id.partner_id = dst_addr->domain_id;
dst_d = rcu_lock_domain_by_id(dst_addr->domain_id);
if ( !dst_d )
return -ESRCH;
ret = xsm_argo_send(src_d, dst_d);
if ( ret )
{
gprintk(XENLOG_ERR, "argo: XSM REJECTED %i -> %i\n",
src_d->domain_id, dst_d->domain_id);
rcu_unlock_domain(dst_d);
return ret;
}
read_lock(&L1_global_argo_rwlock);
if ( !src_d->argo )
{
ret = -ENODEV;
goto out_unlock;
}
if ( !dst_d->argo )
{
argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
ret = -ECONNREFUSED;
goto out_unlock;
}
read_lock(&dst_d->argo->rings_L2_rwlock);
ring_info = find_ring_info_by_match(dst_d, dst_addr->aport,
src_id.domain_id);
if ( !ring_info )
{
gprintk(XENLOG_ERR,
"argo: vm%u connection refused, src (vm%u:%x) dst (vm%u:%x)\n",
current->domain->domain_id, src_id.domain_id, src_id.aport,
dst_addr->domain_id, dst_addr->aport);
ret = -ECONNREFUSED;
}
else
{
spin_lock(&ring_info->L3_lock);
/*
* Obtain the total size of data to transmit -- sets the 'len' variable
* -- and sanity check that the iovs conform to size and number limits.
*/
ret = iov_count(iovs, niov, &len);
if ( !ret )
{
ret = ringbuf_insert(dst_d, ring_info, &src_id, iovs, niov,
message_type, len);
if ( ret == -EAGAIN )
{
int rc;
argo_dprintk("argo_ringbuf_sendv failed, EAGAIN\n");
/* requeue to issue a notification when space is there */
rc = pending_requeue(dst_d, ring_info, src_id.domain_id, len);
if ( rc )
ret = rc;
}
}
spin_unlock(&ring_info->L3_lock);
}
read_unlock(&dst_d->argo->rings_L2_rwlock);
out_unlock:
read_unlock(&L1_global_argo_rwlock);
if ( ret >= 0 )
signal_domain(dst_d);
if ( dst_d )
rcu_unlock_domain(dst_d);
return ( ret < 0 ) ? ret : len;
}
long
do_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long raw_arg3,
unsigned long raw_arg4)
{
struct domain *currd = current->domain;
long rc;
unsigned int arg3 = raw_arg3, arg4 = raw_arg4;
argo_dprintk("->do_argo_op(%u,%p,%p,%lu,0x%lx)\n", cmd,
(void *)arg1.p, (void *)arg2.p, raw_arg3, raw_arg4);
/* Reject numeric hypercall args outside 32-bit range */
if ( (arg3 != raw_arg3) || (arg4 != raw_arg4) )
return -EINVAL;
if ( unlikely(!opt_argo) )
return -EOPNOTSUPP;
rc = xsm_argo_enable(currd);
if ( rc )
return rc;
switch ( cmd )
{
case XEN_ARGO_OP_register_ring:
{
XEN_GUEST_HANDLE_PARAM(xen_argo_register_ring_t) reg_hnd =
guest_handle_cast(arg1, xen_argo_register_ring_t);
XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd =
guest_handle_cast(arg2, xen_argo_gfn_t);
/* arg3: npage, arg4: flags */
BUILD_BUG_ON(!IS_ALIGNED(XEN_ARGO_MAX_RING_SIZE, PAGE_SIZE));
if ( unlikely(arg3 > (XEN_ARGO_MAX_RING_SIZE >> PAGE_SHIFT)) )
{
rc = -EINVAL;
break;
}
/* Check array to allow use of the faster __copy operations later */
if ( unlikely(!guest_handle_okay(gfn_hnd, arg3)) )
{
rc = -EFAULT;
break;
}
rc = register_ring(currd, reg_hnd, gfn_hnd, arg3, arg4);
break;
}
case XEN_ARGO_OP_unregister_ring:
{
XEN_GUEST_HANDLE_PARAM(xen_argo_unregister_ring_t) unreg_hnd =
guest_handle_cast(arg1, xen_argo_unregister_ring_t);
if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) )
{
rc = -EINVAL;
break;
}
rc = unregister_ring(currd, unreg_hnd);
break;
}
case XEN_ARGO_OP_sendv:
{
xen_argo_send_addr_t send_addr;
xen_argo_iov_t iovs[XEN_ARGO_MAXIOV];
unsigned int niov;
XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd =
guest_handle_cast(arg1, xen_argo_send_addr_t);
XEN_GUEST_HANDLE_PARAM(xen_argo_iov_t) iovs_hnd =
guest_handle_cast(arg2, xen_argo_iov_t);
/* arg3 is niov */
/* arg4 is message_type. Must be a 32-bit value. */
/* XEN_ARGO_MAXIOV value determines size of iov array on stack */
BUILD_BUG_ON(XEN_ARGO_MAXIOV > 8);
rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0;
if ( rc )
{
rc = -EFAULT;
break;
}
/*
* Reject niov above maximum limit or message_types that are outside
* 32 bit range.
*/
if ( unlikely((arg3 > XEN_ARGO_MAXIOV) || (arg4 != (uint32_t)arg4)) )
{
rc = -EINVAL;
break;
}
niov = array_index_nospec(arg3, XEN_ARGO_MAXIOV + 1);
rc = copy_from_guest(iovs, iovs_hnd, niov) ? -EFAULT : 0;
if ( rc )
{
rc = -EFAULT;
break;
}
rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs, niov, arg4);
break;
}
case XEN_ARGO_OP_notify:
{
XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd =
guest_handle_cast(arg1, xen_argo_ring_data_t);
if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) )
{
rc = -EINVAL;
break;
}
rc = notify(currd, ring_data_hnd);
break;
}
default:
rc = -EOPNOTSUPP;
break;
}
argo_dprintk("<-do_argo_op(%u)=%ld\n", cmd, rc);
return rc;
}
#ifdef CONFIG_COMPAT
int
compat_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long arg3,
unsigned long arg4)
{
struct domain *currd = current->domain;
int rc;
xen_argo_send_addr_t send_addr;
xen_argo_iov_t iovs[XEN_ARGO_MAXIOV];
compat_argo_iov_t compat_iovs[XEN_ARGO_MAXIOV];
unsigned int i, niov;
XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd;
/* check XEN_ARGO_MAXIOV as it sizes stack arrays: iovs, compat_iovs */
BUILD_BUG_ON(XEN_ARGO_MAXIOV > 8);
/* Forward all ops besides sendv to the native handler. */
if ( cmd != XEN_ARGO_OP_sendv )
return do_argo_op(cmd, arg1, arg2, arg3, arg4);
if ( unlikely(!opt_argo) )
return -EOPNOTSUPP;
rc = xsm_argo_enable(currd);
if ( rc )
return rc;
argo_dprintk("->compat_argo_op(%u,%p,%p,%lu,0x%lx)\n", cmd,
(void *)arg1.p, (void *)arg2.p, arg3, arg4);
send_addr_hnd = guest_handle_cast(arg1, xen_argo_send_addr_t);
/* arg2: iovs, arg3: niov, arg4: message_type */
rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0;
if ( rc )
goto out;
if ( unlikely(arg3 > XEN_ARGO_MAXIOV) )
{
rc = -EINVAL;
goto out;
}
niov = array_index_nospec(arg3, XEN_ARGO_MAXIOV + 1);
rc = copy_from_guest(compat_iovs, arg2, niov) ? -EFAULT : 0;
if ( rc )
goto out;
for ( i = 0; i < niov; i++ )
{
#define XLAT_argo_iov_HNDL_iov_hnd(_d_, _s_) \
guest_from_compat_handle((_d_)->iov_hnd, (_s_)->iov_hnd)
XLAT_argo_iov(&iovs[i], &compat_iovs[i]);
#undef XLAT_argo_iov_HNDL_iov_hnd
}
rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs, niov, arg4);
out:
argo_dprintk("<-compat_argo_op(%u)=%d\n", cmd, rc);
return rc;
}
#endif
static void
argo_domain_init(struct argo_domain *argo)
{
unsigned int i;
rwlock_init(&argo->rings_L2_rwlock);
spin_lock_init(&argo->send_L2_lock);
spin_lock_init(&argo->wildcard_L2_lock);
for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
{
INIT_LIST_HEAD(&argo->ring_hash[i]);
INIT_LIST_HEAD(&argo->send_hash[i]);
}
INIT_LIST_HEAD(&argo->wildcard_pend_list);
}
int
argo_init(struct domain *d)
{
struct argo_domain *argo;
if ( !opt_argo || xsm_argo_enable(d) )
{
argo_dprintk("argo disabled, domid: %u\n", d->domain_id);
return 0;
}
argo_dprintk("init: domid: %u\n", d->domain_id);
argo = xzalloc(struct argo_domain);
if ( !argo )
return -ENOMEM;
argo_domain_init(argo);
write_lock(&L1_global_argo_rwlock);
d->argo = argo;
write_unlock(&L1_global_argo_rwlock);
return 0;
}
void
argo_destroy(struct domain *d)
{
BUG_ON(!d->is_dying);
write_lock(&L1_global_argo_rwlock);
argo_dprintk("destroy: domid %u d->argo=%p\n", d->domain_id, d->argo);
if ( d->argo )
{
domain_rings_remove_all(d);
partner_rings_remove(d);
wildcard_rings_pending_remove(d);
XFREE(d->argo);
}
write_unlock(&L1_global_argo_rwlock);
}
void
argo_soft_reset(struct domain *d)
{
write_lock(&L1_global_argo_rwlock);
argo_dprintk("soft reset d=%u d->argo=%p\n", d->domain_id, d->argo);
if ( d->argo )
{
domain_rings_remove_all(d);
partner_rings_remove(d);
wildcard_rings_pending_remove(d);
/*
* Since neither opt_argo or xsm_argo_enable(d) can change at runtime,
* if d->argo is true then both opt_argo and xsm_argo_enable(d) must be
* true, and we can assume that init is allowed to proceed again here.
*/
argo_domain_init(d->argo);
}
write_unlock(&L1_global_argo_rwlock);
}