summaryrefslogtreecommitdiff
path: root/drivers/virtio
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/virtio')
-rw-r--r--drivers/virtio/virtio_mem.c338
-rw-r--r--drivers/virtio/virtio_pci_modern_dev.c21
-rw-r--r--drivers/virtio/virtio_ring.c229
-rw-r--r--drivers/virtio/virtio_vdpa.c15
4 files changed, 380 insertions, 223 deletions
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index dc2a2e2b2ff8..09ed55de07d7 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -75,10 +75,14 @@ enum virtio_mem_sbm_mb_state {
VIRTIO_MEM_SBM_MB_OFFLINE,
/* Partially plugged, fully added to Linux, offline. */
VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL,
- /* Fully plugged, fully added to Linux, online. */
- VIRTIO_MEM_SBM_MB_ONLINE,
- /* Partially plugged, fully added to Linux, online. */
- VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL,
+ /* Fully plugged, fully added to Linux, onlined to a kernel zone. */
+ VIRTIO_MEM_SBM_MB_KERNEL,
+ /* Partially plugged, fully added to Linux, online to a kernel zone */
+ VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL,
+ /* Fully plugged, fully added to Linux, onlined to ZONE_MOVABLE. */
+ VIRTIO_MEM_SBM_MB_MOVABLE,
+ /* Partially plugged, fully added to Linux, onlined to ZONE_MOVABLE. */
+ VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL,
VIRTIO_MEM_SBM_MB_COUNT
};
@@ -699,18 +703,6 @@ static int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id)
}
/*
- * See virtio_mem_remove_memory(): Try to remove all Linux memory blocks covered
- * by the big block.
- */
-static int virtio_mem_bbm_remove_bb(struct virtio_mem *vm, unsigned long bb_id)
-{
- const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
- const uint64_t size = vm->bbm.bb_size;
-
- return virtio_mem_remove_memory(vm, addr, size);
-}
-
-/*
* Try offlining and removing memory from Linux.
*
* Must not be called with the vm->hotplug_mutex held (possible deadlock with
@@ -832,11 +824,13 @@ static void virtio_mem_sbm_notify_offline(struct virtio_mem *vm,
unsigned long mb_id)
{
switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
- case VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL:
+ case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL:
+ case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL:
virtio_mem_sbm_set_mb_state(vm, mb_id,
VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL);
break;
- case VIRTIO_MEM_SBM_MB_ONLINE:
+ case VIRTIO_MEM_SBM_MB_KERNEL:
+ case VIRTIO_MEM_SBM_MB_MOVABLE:
virtio_mem_sbm_set_mb_state(vm, mb_id,
VIRTIO_MEM_SBM_MB_OFFLINE);
break;
@@ -847,21 +841,29 @@ static void virtio_mem_sbm_notify_offline(struct virtio_mem *vm,
}
static void virtio_mem_sbm_notify_online(struct virtio_mem *vm,
- unsigned long mb_id)
+ unsigned long mb_id,
+ unsigned long start_pfn)
{
+ const bool is_movable = page_zonenum(pfn_to_page(start_pfn)) ==
+ ZONE_MOVABLE;
+ int new_state;
+
switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL:
- virtio_mem_sbm_set_mb_state(vm, mb_id,
- VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL);
+ new_state = VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL;
+ if (is_movable)
+ new_state = VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL;
break;
case VIRTIO_MEM_SBM_MB_OFFLINE:
- virtio_mem_sbm_set_mb_state(vm, mb_id,
- VIRTIO_MEM_SBM_MB_ONLINE);
+ new_state = VIRTIO_MEM_SBM_MB_KERNEL;
+ if (is_movable)
+ new_state = VIRTIO_MEM_SBM_MB_MOVABLE;
break;
default:
BUG();
break;
}
+ virtio_mem_sbm_set_mb_state(vm, mb_id, new_state);
}
static void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm,
@@ -1015,7 +1017,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
break;
case MEM_ONLINE:
if (vm->in_sbm)
- virtio_mem_sbm_notify_online(vm, id);
+ virtio_mem_sbm_notify_online(vm, id, mhp->start_pfn);
atomic64_sub(size, &vm->offline_size);
/*
@@ -1137,7 +1139,7 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages)
*/
static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages)
{
- const bool is_movable = zone_idx(page_zone(pfn_to_page(pfn))) ==
+ const bool is_movable = page_zonenum(pfn_to_page(pfn)) ==
ZONE_MOVABLE;
int rc, retry_count;
@@ -1455,8 +1457,8 @@ static int virtio_mem_bbm_plug_bb(struct virtio_mem *vm, unsigned long bb_id)
*
* Note: can fail after some subblocks were unplugged.
*/
-static int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm,
- unsigned long mb_id, uint64_t *nb_sb)
+static int virtio_mem_sbm_unplug_any_sb_raw(struct virtio_mem *vm,
+ unsigned long mb_id, uint64_t *nb_sb)
{
int sb_id, count;
int rc;
@@ -1498,7 +1500,7 @@ static int virtio_mem_sbm_unplug_mb(struct virtio_mem *vm, unsigned long mb_id)
{
uint64_t nb_sb = vm->sbm.sbs_per_mb;
- return virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb);
+ return virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, &nb_sb);
}
/*
@@ -1585,9 +1587,9 @@ static int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm,
* Note: Can fail after some subblocks were successfully plugged.
*/
static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm,
- unsigned long mb_id, uint64_t *nb_sb,
- bool online)
+ unsigned long mb_id, uint64_t *nb_sb)
{
+ const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id);
unsigned long pfn, nr_pages;
int sb_id, count;
int rc;
@@ -1609,7 +1611,7 @@ static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm,
if (rc)
return rc;
*nb_sb -= count;
- if (!online)
+ if (old_state == VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL)
continue;
/* fake-online the pages if the memory block is online */
@@ -1619,23 +1621,22 @@ static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm,
virtio_mem_fake_online(pfn, nr_pages);
}
- if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
- if (online)
- virtio_mem_sbm_set_mb_state(vm, mb_id,
- VIRTIO_MEM_SBM_MB_ONLINE);
- else
- virtio_mem_sbm_set_mb_state(vm, mb_id,
- VIRTIO_MEM_SBM_MB_OFFLINE);
- }
+ if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
+ virtio_mem_sbm_set_mb_state(vm, mb_id, old_state - 1);
return 0;
}
static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff)
{
+ const int mb_states[] = {
+ VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL,
+ VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL,
+ VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL,
+ };
uint64_t nb_sb = diff / vm->sbm.sb_size;
unsigned long mb_id;
- int rc;
+ int rc, i;
if (!nb_sb)
return 0;
@@ -1643,22 +1644,13 @@ static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff)
/* Don't race with onlining/offlining */
mutex_lock(&vm->hotplug_mutex);
- /* Try to plug subblocks of partially plugged online blocks. */
- virtio_mem_sbm_for_each_mb(vm, mb_id,
- VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) {
- rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, true);
- if (rc || !nb_sb)
- goto out_unlock;
- cond_resched();
- }
-
- /* Try to plug subblocks of partially plugged offline blocks. */
- virtio_mem_sbm_for_each_mb(vm, mb_id,
- VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) {
- rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, false);
- if (rc || !nb_sb)
- goto out_unlock;
- cond_resched();
+ for (i = 0; i < ARRAY_SIZE(mb_states); i++) {
+ virtio_mem_sbm_for_each_mb(vm, mb_id, mb_states[i]) {
+ rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb);
+ if (rc || !nb_sb)
+ goto out_unlock;
+ cond_resched();
+ }
}
/*
@@ -1819,7 +1811,7 @@ static int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm,
{
int rc;
- rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, nb_sb);
+ rc = virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, nb_sb);
/* some subblocks might have been unplugged even on failure */
if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
@@ -1856,6 +1848,7 @@ static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm,
int count)
{
const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count;
+ const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id);
unsigned long start_pfn;
int rc;
@@ -1874,8 +1867,17 @@ static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm,
return rc;
}
- virtio_mem_sbm_set_mb_state(vm, mb_id,
- VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL);
+ switch (old_state) {
+ case VIRTIO_MEM_SBM_MB_KERNEL:
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL);
+ break;
+ case VIRTIO_MEM_SBM_MB_MOVABLE:
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL);
+ break;
+ }
+
return 0;
}
@@ -1942,11 +1944,50 @@ unplugged:
return 0;
}
+/*
+ * Unplug the desired number of plugged subblocks of a memory block that is
+ * already added to Linux. Will skip subblock of online memory blocks that are
+ * busy (by the OS). Will fail if any subblock that's not busy cannot get
+ * unplugged.
+ *
+ * Will modify the state of the memory block. Might temporarily drop the
+ * hotplug_mutex.
+ *
+ * Note: Can fail after some subblocks were successfully unplugged. Can
+ * return 0 even if subblocks were busy and could not get unplugged.
+ */
+static int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm,
+ unsigned long mb_id,
+ uint64_t *nb_sb)
+{
+ const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id);
+
+ switch (old_state) {
+ case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL:
+ case VIRTIO_MEM_SBM_MB_KERNEL:
+ case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL:
+ case VIRTIO_MEM_SBM_MB_MOVABLE:
+ return virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, nb_sb);
+ case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL:
+ case VIRTIO_MEM_SBM_MB_OFFLINE:
+ return virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, nb_sb);
+ }
+ return -EINVAL;
+}
+
static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
{
+ const int mb_states[] = {
+ VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL,
+ VIRTIO_MEM_SBM_MB_OFFLINE,
+ VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL,
+ VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL,
+ VIRTIO_MEM_SBM_MB_MOVABLE,
+ VIRTIO_MEM_SBM_MB_KERNEL,
+ };
uint64_t nb_sb = diff / vm->sbm.sb_size;
unsigned long mb_id;
- int rc;
+ int rc, i;
if (!nb_sb)
return 0;
@@ -1958,47 +1999,26 @@ static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
*/
mutex_lock(&vm->hotplug_mutex);
- /* Try to unplug subblocks of partially plugged offline blocks. */
- virtio_mem_sbm_for_each_mb_rev(vm, mb_id,
- VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) {
- rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb);
- if (rc || !nb_sb)
- goto out_unlock;
- cond_resched();
- }
-
- /* Try to unplug subblocks of plugged offline blocks. */
- virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE) {
- rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb);
- if (rc || !nb_sb)
- goto out_unlock;
- cond_resched();
- }
-
- if (!unplug_online) {
- mutex_unlock(&vm->hotplug_mutex);
- return 0;
- }
-
- /* Try to unplug subblocks of partially plugged online blocks. */
- virtio_mem_sbm_for_each_mb_rev(vm, mb_id,
- VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) {
- rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb);
- if (rc || !nb_sb)
- goto out_unlock;
- mutex_unlock(&vm->hotplug_mutex);
- cond_resched();
- mutex_lock(&vm->hotplug_mutex);
- }
-
- /* Try to unplug subblocks of plugged online blocks. */
- virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE) {
- rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb);
- if (rc || !nb_sb)
- goto out_unlock;
- mutex_unlock(&vm->hotplug_mutex);
- cond_resched();
- mutex_lock(&vm->hotplug_mutex);
+ /*
+ * We try unplug from partially plugged blocks first, to try removing
+ * whole memory blocks along with metadata. We prioritize ZONE_MOVABLE
+ * as it's more reliable to unplug memory and remove whole memory
+ * blocks, and we don't want to trigger a zone imbalances by
+ * accidentially removing too much kernel memory.
+ */
+ for (i = 0; i < ARRAY_SIZE(mb_states); i++) {
+ virtio_mem_sbm_for_each_mb_rev(vm, mb_id, mb_states[i]) {
+ rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb);
+ if (rc || !nb_sb)
+ goto out_unlock;
+ mutex_unlock(&vm->hotplug_mutex);
+ cond_resched();
+ mutex_lock(&vm->hotplug_mutex);
+ }
+ if (!unplug_online && i == 1) {
+ mutex_unlock(&vm->hotplug_mutex);
+ return 0;
+ }
}
mutex_unlock(&vm->hotplug_mutex);
@@ -2085,47 +2105,41 @@ rollback_safe_unplug:
}
/*
- * Try to remove a big block from Linux and unplug it. Will fail with
- * -EBUSY if some memory is online.
- *
- * Will modify the state of the memory block.
+ * Test if a big block is completely offline.
*/
-static int virtio_mem_bbm_remove_and_unplug_bb(struct virtio_mem *vm,
- unsigned long bb_id)
+static bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm,
+ unsigned long bb_id)
{
- int rc;
-
- if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
- VIRTIO_MEM_BBM_BB_ADDED))
- return -EINVAL;
+ const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
+ const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
+ unsigned long pfn;
- rc = virtio_mem_bbm_remove_bb(vm, bb_id);
- if (rc)
- return -EBUSY;
+ for (pfn = start_pfn; pfn < start_pfn + nr_pages;
+ pfn += PAGES_PER_SECTION) {
+ if (pfn_to_online_page(pfn))
+ return false;
+ }
- rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
- if (rc)
- virtio_mem_bbm_set_bb_state(vm, bb_id,
- VIRTIO_MEM_BBM_BB_PLUGGED);
- else
- virtio_mem_bbm_set_bb_state(vm, bb_id,
- VIRTIO_MEM_BBM_BB_UNUSED);
- return rc;
+ return true;
}
/*
- * Test if a big block is completely offline.
+ * Test if a big block is completely onlined to ZONE_MOVABLE (or offline).
*/
-static bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm,
+static bool virtio_mem_bbm_bb_is_movable(struct virtio_mem *vm,
unsigned long bb_id)
{
const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
+ struct page *page;
unsigned long pfn;
for (pfn = start_pfn; pfn < start_pfn + nr_pages;
pfn += PAGES_PER_SECTION) {
- if (pfn_to_online_page(pfn))
+ page = pfn_to_online_page(pfn);
+ if (!page)
+ continue;
+ if (page_zonenum(page) != ZONE_MOVABLE)
return false;
}
@@ -2136,42 +2150,37 @@ static int virtio_mem_bbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
{
uint64_t nb_bb = diff / vm->bbm.bb_size;
uint64_t bb_id;
- int rc;
+ int rc, i;
if (!nb_bb)
return 0;
- /* Try to unplug completely offline big blocks first. */
- virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) {
- cond_resched();
- /*
- * As we're holding no locks, this check is racy as memory
- * can get onlined in the meantime - but we'll fail gracefully.
- */
- if (!virtio_mem_bbm_bb_is_offline(vm, bb_id))
- continue;
- rc = virtio_mem_bbm_remove_and_unplug_bb(vm, bb_id);
- if (rc == -EBUSY)
- continue;
- if (!rc)
- nb_bb--;
- if (rc || !nb_bb)
- return rc;
- }
-
- if (!unplug_online)
- return 0;
+ /*
+ * Try to unplug big blocks. Similar to SBM, start with offline
+ * big blocks.
+ */
+ for (i = 0; i < 3; i++) {
+ virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) {
+ cond_resched();
- /* Try to unplug any big blocks. */
- virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) {
- cond_resched();
- rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id);
- if (rc == -EBUSY)
- continue;
- if (!rc)
- nb_bb--;
- if (rc || !nb_bb)
- return rc;
+ /*
+ * As we're holding no locks, these checks are racy,
+ * but we don't care.
+ */
+ if (i == 0 && !virtio_mem_bbm_bb_is_offline(vm, bb_id))
+ continue;
+ if (i == 1 && !virtio_mem_bbm_bb_is_movable(vm, bb_id))
+ continue;
+ rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id);
+ if (rc == -EBUSY)
+ continue;
+ if (!rc)
+ nb_bb--;
+ if (rc || !nb_bb)
+ return rc;
+ }
+ if (i == 0 && !unplug_online)
+ return 0;
}
return nb_bb ? -EBUSY : 0;
@@ -2422,6 +2431,10 @@ static int virtio_mem_init(struct virtio_mem *vm)
dev_warn(&vm->vdev->dev,
"Some device memory is not addressable/pluggable. This can make some memory unusable.\n");
+ /* Prepare the offline threshold - make sure we can add two blocks. */
+ vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(),
+ VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD);
+
/*
* We want subblocks to span at least MAX_ORDER_NR_PAGES and
* pageblock_nr_pages pages. This:
@@ -2468,14 +2481,11 @@ static int virtio_mem_init(struct virtio_mem *vm)
vm->bbm.bb_size - 1;
vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr);
vm->bbm.next_bb_id = vm->bbm.first_bb_id;
- }
- /* Prepare the offline threshold - make sure we can add two blocks. */
- vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(),
- VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD);
- /* In BBM, we also want at least two big blocks. */
- vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size,
- vm->offline_threshold);
+ /* Make sure we can add two big blocks. */
+ vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size,
+ vm->offline_threshold);
+ }
dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr);
dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size);
diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c
index 54f297028586..e11ed748e661 100644
--- a/drivers/virtio/virtio_pci_modern_dev.c
+++ b/drivers/virtio/virtio_pci_modern_dev.c
@@ -384,6 +384,27 @@ u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev)
EXPORT_SYMBOL_GPL(vp_modern_get_features);
/*
+ * vp_modern_get_driver_features - get driver features from device
+ * @mdev: the modern virtio-pci device
+ *
+ * Returns the driver features read from the device
+ */
+u64 vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev)
+{
+ struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
+
+ u64 features;
+
+ vp_iowrite32(0, &cfg->guest_feature_select);
+ features = vp_ioread32(&cfg->guest_feature);
+ vp_iowrite32(1, &cfg->guest_feature_select);
+ features |= ((u64)vp_ioread32(&cfg->guest_feature) << 32);
+
+ return features;
+}
+EXPORT_SYMBOL_GPL(vp_modern_get_driver_features);
+
+/*
* vp_modern_set_features - set features to device
* @mdev: the modern virtio-pci device
* @features: the features set to device
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 71e16b53e9c1..89bfe46a8a7f 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -74,14 +74,14 @@ struct vring_desc_state_packed {
void *data; /* Data for callback. */
struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
u16 num; /* Descriptor list length. */
- u16 next; /* The next desc state in a list. */
u16 last; /* The last desc state in a list. */
};
-struct vring_desc_extra_packed {
+struct vring_desc_extra {
dma_addr_t addr; /* Buffer DMA addr. */
u32 len; /* Buffer length. */
u16 flags; /* Descriptor flags. */
+ u16 next; /* The next desc state in a list. */
};
struct vring_virtqueue {
@@ -113,6 +113,9 @@ struct vring_virtqueue {
/* Last used index we've seen. */
u16 last_used_idx;
+ /* Hint for event idx: already triggered no need to disable. */
+ bool event_triggered;
+
union {
/* Available for split ring */
struct {
@@ -130,6 +133,7 @@ struct vring_virtqueue {
/* Per-descriptor state. */
struct vring_desc_state_split *desc_state;
+ struct vring_desc_extra *desc_extra;
/* DMA address and size information */
dma_addr_t queue_dma_addr;
@@ -166,7 +170,7 @@ struct vring_virtqueue {
/* Per-descriptor state. */
struct vring_desc_state_packed *desc_state;
- struct vring_desc_extra_packed *desc_extra;
+ struct vring_desc_extra *desc_extra;
/* DMA address and size information */
dma_addr_t ring_dma_addr;
@@ -364,8 +368,8 @@ static int vring_mapping_error(const struct vring_virtqueue *vq,
* Split ring specific functions - *_split().
*/
-static void vring_unmap_one_split(const struct vring_virtqueue *vq,
- struct vring_desc *desc)
+static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
+ struct vring_desc *desc)
{
u16 flags;
@@ -389,6 +393,35 @@ static void vring_unmap_one_split(const struct vring_virtqueue *vq,
}
}
+static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
+ unsigned int i)
+{
+ struct vring_desc_extra *extra = vq->split.desc_extra;
+ u16 flags;
+
+ if (!vq->use_dma_api)
+ goto out;
+
+ flags = extra[i].flags;
+
+ if (flags & VRING_DESC_F_INDIRECT) {
+ dma_unmap_single(vring_dma_dev(vq),
+ extra[i].addr,
+ extra[i].len,
+ (flags & VRING_DESC_F_WRITE) ?
+ DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ } else {
+ dma_unmap_page(vring_dma_dev(vq),
+ extra[i].addr,
+ extra[i].len,
+ (flags & VRING_DESC_F_WRITE) ?
+ DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ }
+
+out:
+ return extra[i].next;
+}
+
static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
unsigned int total_sg,
gfp_t gfp)
@@ -412,6 +445,35 @@ static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
return desc;
}
+static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
+ struct vring_desc *desc,
+ unsigned int i,
+ dma_addr_t addr,
+ unsigned int len,
+ u16 flags,
+ bool indirect)
+{
+ struct vring_virtqueue *vring = to_vvq(vq);
+ struct vring_desc_extra *extra = vring->split.desc_extra;
+ u16 next;
+
+ desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
+ desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
+ desc[i].len = cpu_to_virtio32(vq->vdev, len);
+
+ if (!indirect) {
+ next = extra[i].next;
+ desc[i].next = cpu_to_virtio16(vq->vdev, next);
+
+ extra[i].addr = addr;
+ extra[i].len = len;
+ extra[i].flags = flags;
+ } else
+ next = virtio16_to_cpu(vq->vdev, desc[i].next);
+
+ return next;
+}
+
static inline int virtqueue_add_split(struct virtqueue *_vq,
struct scatterlist *sgs[],
unsigned int total_sg,
@@ -484,11 +546,13 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
if (vring_mapping_error(vq, addr))
goto unmap_release;
- desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT);
- desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
- desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
prev = i;
- i = virtio16_to_cpu(_vq->vdev, desc[i].next);
+ /* Note that we trust indirect descriptor
+ * table since it use stream DMA mapping.
+ */
+ i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
+ VRING_DESC_F_NEXT,
+ indirect);
}
}
for (; n < (out_sgs + in_sgs); n++) {
@@ -497,15 +561,22 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
if (vring_mapping_error(vq, addr))
goto unmap_release;
- desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE);
- desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
- desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
prev = i;
- i = virtio16_to_cpu(_vq->vdev, desc[i].next);
+ /* Note that we trust indirect descriptor
+ * table since it use stream DMA mapping.
+ */
+ i = virtqueue_add_desc_split(_vq, desc, i, addr,
+ sg->length,
+ VRING_DESC_F_NEXT |
+ VRING_DESC_F_WRITE,
+ indirect);
}
}
/* Last one doesn't continue. */
desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
+ if (!indirect && vq->use_dma_api)
+ vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags =
+ ~VRING_DESC_F_NEXT;
if (indirect) {
/* Now that the indirect table is filled in, map it. */
@@ -515,13 +586,11 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
if (vring_mapping_error(vq, addr))
goto unmap_release;
- vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev,
- VRING_DESC_F_INDIRECT);
- vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev,
- addr);
-
- vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev,
- total_sg * sizeof(struct vring_desc));
+ virtqueue_add_desc_split(_vq, vq->split.vring.desc,
+ head, addr,
+ total_sg * sizeof(struct vring_desc),
+ VRING_DESC_F_INDIRECT,
+ false);
}
/* We're using some buffers from the free list. */
@@ -529,8 +598,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
/* Update free pointer */
if (indirect)
- vq->free_head = virtio16_to_cpu(_vq->vdev,
- vq->split.vring.desc[head].next);
+ vq->free_head = vq->split.desc_extra[head].next;
else
vq->free_head = i;
@@ -575,8 +643,11 @@ unmap_release:
for (n = 0; n < total_sg; n++) {
if (i == err_idx)
break;
- vring_unmap_one_split(vq, &desc[i]);
- i = virtio16_to_cpu(_vq->vdev, desc[i].next);
+ if (indirect) {
+ vring_unmap_one_split_indirect(vq, &desc[i]);
+ i = virtio16_to_cpu(_vq->vdev, desc[i].next);
+ } else
+ i = vring_unmap_one_split(vq, i);
}
if (indirect)
@@ -630,14 +701,13 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
i = head;
while (vq->split.vring.desc[i].flags & nextflag) {
- vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
- i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next);
+ vring_unmap_one_split(vq, i);
+ i = vq->split.desc_extra[i].next;
vq->vq.num_free++;
}
- vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
- vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev,
- vq->free_head);
+ vring_unmap_one_split(vq, i);
+ vq->split.desc_extra[i].next = vq->free_head;
vq->free_head = head;
/* Plus final descriptor */
@@ -652,15 +722,14 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
if (!indir_desc)
return;
- len = virtio32_to_cpu(vq->vq.vdev,
- vq->split.vring.desc[head].len);
+ len = vq->split.desc_extra[head].len;
- BUG_ON(!(vq->split.vring.desc[head].flags &
- cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
+ BUG_ON(!(vq->split.desc_extra[head].flags &
+ VRING_DESC_F_INDIRECT));
BUG_ON(len == 0 || len % sizeof(struct vring_desc));
for (j = 0; j < len / sizeof(struct vring_desc); j++)
- vring_unmap_one_split(vq, &indir_desc[j]);
+ vring_unmap_one_split_indirect(vq, &indir_desc[j]);
kfree(indir_desc);
vq->split.desc_state[head].indir_desc = NULL;
@@ -739,7 +808,10 @@ static void virtqueue_disable_cb_split(struct virtqueue *_vq)
if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
- if (!vq->event)
+ if (vq->event)
+ /* TODO: this is a hack. Figure out a cleaner value to write. */
+ vring_used_event(&vq->split.vring) = 0x0;
+ else
vq->split.vring.avail->flags =
cpu_to_virtio16(_vq->vdev,
vq->split.avail_flags_shadow);
@@ -912,7 +984,7 @@ static struct virtqueue *vring_create_virtqueue_split(
*/
static void vring_unmap_state_packed(const struct vring_virtqueue *vq,
- struct vring_desc_extra_packed *state)
+ struct vring_desc_extra *state)
{
u16 flags;
@@ -1061,7 +1133,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1 << VRING_PACKED_DESC_F_USED;
}
vq->packed.next_avail_idx = n;
- vq->free_head = vq->packed.desc_state[id].next;
+ vq->free_head = vq->packed.desc_extra[id].next;
/* Store token and indirect buffer state. */
vq->packed.desc_state[id].num = 1;
@@ -1169,7 +1241,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
le16_to_cpu(flags);
}
prev = curr;
- curr = vq->packed.desc_state[curr].next;
+ curr = vq->packed.desc_extra[curr].next;
if ((unlikely(++i >= vq->packed.vring.num))) {
i = 0;
@@ -1213,13 +1285,16 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
unmap_release:
err_idx = i;
i = head;
+ curr = vq->free_head;
vq->packed.avail_used_flags = avail_used_flags;
for (n = 0; n < total_sg; n++) {
if (i == err_idx)
break;
- vring_unmap_desc_packed(vq, &desc[i]);
+ vring_unmap_state_packed(vq,
+ &vq->packed.desc_extra[curr]);
+ curr = vq->packed.desc_extra[curr].next;
i++;
if (i >= vq->packed.vring.num)
i = 0;
@@ -1290,7 +1365,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
/* Clear data ptr. */
state->data = NULL;
- vq->packed.desc_state[state->last].next = vq->free_head;
+ vq->packed.desc_extra[state->last].next = vq->free_head;
vq->free_head = id;
vq->vq.num_free += state->num;
@@ -1299,7 +1374,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
for (i = 0; i < state->num; i++) {
vring_unmap_state_packed(vq,
&vq->packed.desc_extra[curr]);
- curr = vq->packed.desc_state[curr].next;
+ curr = vq->packed.desc_extra[curr].next;
}
}
@@ -1550,6 +1625,25 @@ static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
return NULL;
}
+static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq,
+ unsigned int num)
+{
+ struct vring_desc_extra *desc_extra;
+ unsigned int i;
+
+ desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
+ GFP_KERNEL);
+ if (!desc_extra)
+ return NULL;
+
+ memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
+
+ for (i = 0; i < num - 1; i++)
+ desc_extra[i].next = i + 1;
+
+ return desc_extra;
+}
+
static struct virtqueue *vring_create_virtqueue_packed(
unsigned int index,
unsigned int num,
@@ -1567,7 +1661,6 @@ static struct virtqueue *vring_create_virtqueue_packed(
struct vring_packed_desc_event *driver, *device;
dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
size_t ring_size_in_bytes, event_size_in_bytes;
- unsigned int i;
ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
@@ -1605,6 +1698,7 @@ static struct virtqueue *vring_create_virtqueue_packed(
vq->weak_barriers = weak_barriers;
vq->broken = false;
vq->last_used_idx = 0;
+ vq->event_triggered = false;
vq->num_added = 0;
vq->packed_ring = true;
vq->use_dma_api = vring_use_dma_api(vdev);
@@ -1649,18 +1743,11 @@ static struct virtqueue *vring_create_virtqueue_packed(
/* Put everything in free lists. */
vq->free_head = 0;
- for (i = 0; i < num-1; i++)
- vq->packed.desc_state[i].next = i + 1;
- vq->packed.desc_extra = kmalloc_array(num,
- sizeof(struct vring_desc_extra_packed),
- GFP_KERNEL);
+ vq->packed.desc_extra = vring_alloc_desc_extra(vq, num);
if (!vq->packed.desc_extra)
goto err_desc_extra;
- memset(vq->packed.desc_extra, 0,
- num * sizeof(struct vring_desc_extra_packed));
-
/* No callback? Tell other side not to bother us. */
if (!callback) {
vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
@@ -1875,7 +1962,7 @@ bool virtqueue_kick(struct virtqueue *vq)
EXPORT_SYMBOL_GPL(virtqueue_kick);
/**
- * virtqueue_get_buf - get the next used buffer
+ * virtqueue_get_buf_ctx - get the next used buffer
* @_vq: the struct virtqueue we're talking about.
* @len: the length written into the buffer
* @ctx: extra context for the token
@@ -1919,6 +2006,12 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
+ /* If device triggered an event already it won't trigger one again:
+ * no need to disable.
+ */
+ if (vq->event_triggered)
+ return;
+
if (vq->packed_ring)
virtqueue_disable_cb_packed(_vq);
else
@@ -1942,6 +2035,9 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
+ if (vq->event_triggered)
+ vq->event_triggered = false;
+
return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
virtqueue_enable_cb_prepare_split(_vq);
}
@@ -2005,6 +2101,9 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
+ if (vq->event_triggered)
+ vq->event_triggered = false;
+
return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
virtqueue_enable_cb_delayed_split(_vq);
}
@@ -2044,6 +2143,10 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
if (unlikely(vq->broken))
return IRQ_HANDLED;
+ /* Just a hint for performance: so it's ok that this can be racy! */
+ if (vq->event)
+ vq->event_triggered = true;
+
pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
if (vq->vq.callback)
vq->vq.callback(&vq->vq);
@@ -2062,7 +2165,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
void (*callback)(struct virtqueue *),
const char *name)
{
- unsigned int i;
struct vring_virtqueue *vq;
if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
@@ -2083,6 +2185,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
vq->weak_barriers = weak_barriers;
vq->broken = false;
vq->last_used_idx = 0;
+ vq->event_triggered = false;
vq->num_added = 0;
vq->use_dma_api = vring_use_dma_api(vdev);
#ifdef DEBUG
@@ -2114,20 +2217,26 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
vq->split.desc_state = kmalloc_array(vring.num,
sizeof(struct vring_desc_state_split), GFP_KERNEL);
- if (!vq->split.desc_state) {
- kfree(vq);
- return NULL;
- }
+ if (!vq->split.desc_state)
+ goto err_state;
+
+ vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num);
+ if (!vq->split.desc_extra)
+ goto err_extra;
/* Put everything in free lists. */
vq->free_head = 0;
- for (i = 0; i < vring.num-1; i++)
- vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
memset(vq->split.desc_state, 0, vring.num *
sizeof(struct vring_desc_state_split));
list_add_tail(&vq->vq.list, &vdev->vqs);
return &vq->vq;
+
+err_extra:
+ kfree(vq->split.desc_state);
+err_state:
+ kfree(vq);
+ return NULL;
}
EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
@@ -2208,8 +2317,10 @@ void vring_del_virtqueue(struct virtqueue *_vq)
vq->split.queue_dma_addr);
}
}
- if (!vq->packed_ring)
+ if (!vq->packed_ring) {
kfree(vq->split.desc_state);
+ kfree(vq->split.desc_extra);
+ }
list_del(&_vq->list);
kfree(vq);
}
diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
index e28acf482e0c..e1a141135992 100644
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@@ -142,6 +142,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
struct vdpa_callback cb;
struct virtqueue *vq;
u64 desc_addr, driver_addr, device_addr;
+ /* Assume split virtqueue, switch to packed if necessary */
+ struct vdpa_vq_state state = {0};
unsigned long flags;
u32 align, num;
int err;
@@ -191,6 +193,19 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
goto err_vq;
}
+ /* reset virtqueue state index */
+ if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+ struct vdpa_vq_state_packed *s = &state.packed;
+
+ s->last_avail_counter = 1;
+ s->last_avail_idx = 0;
+ s->last_used_counter = 1;
+ s->last_used_idx = 0;
+ }
+ err = ops->set_vq_state(vdpa, index, &state);
+ if (err)
+ goto err_vq;
+
ops->set_vq_ready(vdpa, index, 1);
vq->priv = info;