summaryrefslogtreecommitdiff
path: root/net/xdp/xsk.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/xdp/xsk.c')
-rw-r--r--net/xdp/xsk.c219
1 files changed, 143 insertions, 76 deletions
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 6c5e09e7440a..b71a32eeae65 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -33,71 +33,105 @@
static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
-bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
+void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
- return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) &&
- READ_ONCE(xs->umem->fq);
-}
-
-void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
-{
- if (umem->need_wakeup & XDP_WAKEUP_RX)
+ if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
return;
- umem->fq->ring->flags |= XDP_RING_NEED_WAKEUP;
- umem->need_wakeup |= XDP_WAKEUP_RX;
+ pool->fq->ring->flags |= XDP_RING_NEED_WAKEUP;
+ pool->cached_need_wakeup |= XDP_WAKEUP_RX;
}
EXPORT_SYMBOL(xsk_set_rx_need_wakeup);
-void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
+void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool)
{
struct xdp_sock *xs;
- if (umem->need_wakeup & XDP_WAKEUP_TX)
+ if (pool->cached_need_wakeup & XDP_WAKEUP_TX)
return;
rcu_read_lock();
- list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
+ list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
}
rcu_read_unlock();
- umem->need_wakeup |= XDP_WAKEUP_TX;
+ pool->cached_need_wakeup |= XDP_WAKEUP_TX;
}
EXPORT_SYMBOL(xsk_set_tx_need_wakeup);
-void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
+void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool)
{
- if (!(umem->need_wakeup & XDP_WAKEUP_RX))
+ if (!(pool->cached_need_wakeup & XDP_WAKEUP_RX))
return;
- umem->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP;
- umem->need_wakeup &= ~XDP_WAKEUP_RX;
+ pool->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP;
+ pool->cached_need_wakeup &= ~XDP_WAKEUP_RX;
}
EXPORT_SYMBOL(xsk_clear_rx_need_wakeup);
-void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
+void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool)
{
struct xdp_sock *xs;
- if (!(umem->need_wakeup & XDP_WAKEUP_TX))
+ if (!(pool->cached_need_wakeup & XDP_WAKEUP_TX))
return;
rcu_read_lock();
- list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
+ list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP;
}
rcu_read_unlock();
- umem->need_wakeup &= ~XDP_WAKEUP_TX;
+ pool->cached_need_wakeup &= ~XDP_WAKEUP_TX;
}
EXPORT_SYMBOL(xsk_clear_tx_need_wakeup);
-bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
+bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool)
+{
+ return pool->uses_need_wakeup;
+}
+EXPORT_SYMBOL(xsk_uses_need_wakeup);
+
+struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
+ u16 queue_id)
+{
+ if (queue_id < dev->real_num_rx_queues)
+ return dev->_rx[queue_id].pool;
+ if (queue_id < dev->real_num_tx_queues)
+ return dev->_tx[queue_id].pool;
+
+ return NULL;
+}
+EXPORT_SYMBOL(xsk_get_pool_from_qid);
+
+void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id)
+{
+ if (queue_id < dev->real_num_rx_queues)
+ dev->_rx[queue_id].pool = NULL;
+ if (queue_id < dev->real_num_tx_queues)
+ dev->_tx[queue_id].pool = NULL;
+}
+
+/* The buffer pool is stored both in the _rx struct and the _tx struct as we do
+ * not know if the device has more tx queues than rx, or the opposite.
+ * This might also change during run time.
+ */
+int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
+ u16 queue_id)
{
- return umem->flags & XDP_UMEM_USES_NEED_WAKEUP;
+ if (queue_id >= max_t(unsigned int,
+ dev->real_num_rx_queues,
+ dev->real_num_tx_queues))
+ return -EINVAL;
+
+ if (queue_id < dev->real_num_rx_queues)
+ dev->_rx[queue_id].pool = pool;
+ if (queue_id < dev->real_num_tx_queues)
+ dev->_tx[queue_id].pool = pool;
+
+ return 0;
}
-EXPORT_SYMBOL(xsk_umem_uses_need_wakeup);
void xp_release(struct xdp_buff_xsk *xskb)
{
@@ -155,12 +189,12 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len,
struct xdp_buff *xsk_xdp;
int err;
- if (len > xsk_umem_get_rx_frame_size(xs->umem)) {
+ if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
xs->rx_dropped++;
return -ENOSPC;
}
- xsk_xdp = xsk_buff_alloc(xs->umem);
+ xsk_xdp = xsk_buff_alloc(xs->pool);
if (!xsk_xdp) {
xs->rx_dropped++;
return -ENOSPC;
@@ -208,7 +242,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp,
static void xsk_flush(struct xdp_sock *xs)
{
xskq_prod_submit(xs->rx);
- __xskq_cons_release(xs->umem->fq);
+ __xskq_cons_release(xs->pool->fq);
sock_def_readable(&xs->sk);
}
@@ -249,32 +283,32 @@ void __xsk_map_flush(void)
}
}
-void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
+void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
{
- xskq_prod_submit_n(umem->cq, nb_entries);
+ xskq_prod_submit_n(pool->cq, nb_entries);
}
-EXPORT_SYMBOL(xsk_umem_complete_tx);
+EXPORT_SYMBOL(xsk_tx_completed);
-void xsk_umem_consume_tx_done(struct xdp_umem *umem)
+void xsk_tx_release(struct xsk_buff_pool *pool)
{
struct xdp_sock *xs;
rcu_read_lock();
- list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
+ list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
__xskq_cons_release(xs->tx);
xs->sk.sk_write_space(&xs->sk);
}
rcu_read_unlock();
}
-EXPORT_SYMBOL(xsk_umem_consume_tx_done);
+EXPORT_SYMBOL(xsk_tx_release);
-bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
+bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
{
struct xdp_sock *xs;
rcu_read_lock();
- list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
- if (!xskq_cons_peek_desc(xs->tx, desc, umem)) {
+ list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
+ if (!xskq_cons_peek_desc(xs->tx, desc, pool)) {
xs->tx->queue_empty_descs++;
continue;
}
@@ -284,7 +318,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- if (xskq_prod_reserve_addr(umem->cq, desc->addr))
+ if (xskq_prod_reserve_addr(pool->cq, desc->addr))
goto out;
xskq_cons_release(xs->tx);
@@ -296,7 +330,7 @@ out:
rcu_read_unlock();
return false;
}
-EXPORT_SYMBOL(xsk_umem_consume_tx);
+EXPORT_SYMBOL(xsk_tx_peek_desc);
static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
{
@@ -322,7 +356,7 @@ static void xsk_destruct_skb(struct sk_buff *skb)
unsigned long flags;
spin_lock_irqsave(&xs->tx_completion_lock, flags);
- xskq_prod_submit_addr(xs->umem->cq, addr);
+ xskq_prod_submit_addr(xs->pool->cq, addr);
spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
sock_wfree(skb);
@@ -342,7 +376,7 @@ static int xsk_generic_xmit(struct sock *sk)
if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
- while (xskq_cons_peek_desc(xs->tx, &desc, xs->umem)) {
+ while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
char *buffer;
u64 addr;
u32 len;
@@ -359,14 +393,14 @@ static int xsk_generic_xmit(struct sock *sk)
skb_put(skb, len);
addr = desc.addr;
- buffer = xsk_buff_raw_get_data(xs->umem, addr);
+ buffer = xsk_buff_raw_get_data(xs->pool, addr);
err = skb_store_bits(skb, 0, buffer, len);
/* This is the backpressure mechanism for the Tx path.
* Reserve space in the completion queue and only proceed
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- if (unlikely(err) || xskq_prod_reserve(xs->umem->cq)) {
+ if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
kfree_skb(skb);
goto out;
}
@@ -446,16 +480,16 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
__poll_t mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
- struct xdp_umem *umem;
+ struct xsk_buff_pool *pool;
if (unlikely(!xsk_is_bound(xs)))
return mask;
- umem = xs->umem;
+ pool = xs->pool;
- if (umem->need_wakeup) {
+ if (pool->cached_need_wakeup) {
if (xs->zc)
- xsk_wakeup(xs, umem->need_wakeup);
+ xsk_wakeup(xs, pool->cached_need_wakeup);
else
/* Poll needs to drive Tx also in copy mode */
__xsk_sendmsg(sk);
@@ -496,7 +530,7 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
WRITE_ONCE(xs->state, XSK_UNBOUND);
/* Wait for driver to stop using the xdp socket. */
- xdp_del_sk_umem(xs->umem, xs);
+ xp_del_xsk(xs->pool, xs);
xs->dev = NULL;
synchronize_net();
dev_put(dev);
@@ -574,6 +608,8 @@ static int xsk_release(struct socket *sock)
xskq_destroy(xs->rx);
xskq_destroy(xs->tx);
+ xskq_destroy(xs->fq_tmp);
+ xskq_destroy(xs->cq_tmp);
sock_orphan(sk);
sock->sk = NULL;
@@ -601,6 +637,11 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd)
return sock;
}
+static bool xsk_validate_queues(struct xdp_sock *xs)
+{
+ return xs->fq_tmp && xs->cq_tmp;
+}
+
static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -669,29 +710,66 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
sockfd_put(sock);
goto out_unlock;
}
- if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
- err = -EINVAL;
- sockfd_put(sock);
- goto out_unlock;
+
+ if (umem_xs->queue_id != qid || umem_xs->dev != dev) {
+ /* Share the umem with another socket on another qid
+ * and/or device.
+ */
+ xs->pool = xp_create_and_assign_umem(xs,
+ umem_xs->umem);
+ if (!xs->pool) {
+ err = -ENOMEM;
+ sockfd_put(sock);
+ goto out_unlock;
+ }
+
+ err = xp_assign_dev_shared(xs->pool, umem_xs->umem,
+ dev, qid);
+ if (err) {
+ xp_destroy(xs->pool);
+ xs->pool = NULL;
+ sockfd_put(sock);
+ goto out_unlock;
+ }
+ } else {
+ /* Share the buffer pool with the other socket. */
+ if (xs->fq_tmp || xs->cq_tmp) {
+ /* Do not allow setting your own fq or cq. */
+ err = -EINVAL;
+ sockfd_put(sock);
+ goto out_unlock;
+ }
+
+ xp_get_pool(umem_xs->pool);
+ xs->pool = umem_xs->pool;
}
xdp_get_umem(umem_xs->umem);
WRITE_ONCE(xs->umem, umem_xs->umem);
sockfd_put(sock);
- } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
+ } else if (!xs->umem || !xsk_validate_queues(xs)) {
err = -EINVAL;
goto out_unlock;
} else {
/* This xsk has its own umem. */
- err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
- if (err)
+ xs->pool = xp_create_and_assign_umem(xs, xs->umem);
+ if (!xs->pool) {
+ err = -ENOMEM;
goto out_unlock;
+ }
+
+ err = xp_assign_dev(xs->pool, dev, qid, flags);
+ if (err) {
+ xp_destroy(xs->pool);
+ xs->pool = NULL;
+ goto out_unlock;
+ }
}
xs->dev = dev;
xs->zc = xs->umem->zc;
xs->queue_id = qid;
- xdp_add_sk_umem(xs->umem, xs);
+ xp_add_xsk(xs->pool, xs);
out_unlock:
if (err) {
@@ -797,16 +875,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
mutex_unlock(&xs->mutex);
return -EBUSY;
}
- if (!xs->umem) {
- mutex_unlock(&xs->mutex);
- return -EINVAL;
- }
- q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
- &xs->umem->cq;
+ q = (optname == XDP_UMEM_FILL_RING) ? &xs->fq_tmp :
+ &xs->cq_tmp;
err = xsk_init_queue(entries, q, true);
- if (optname == XDP_UMEM_FILL_RING)
- xp_set_fq(xs->umem->pool, *q);
mutex_unlock(&xs->mutex);
return err;
}
@@ -873,7 +945,7 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
if (extra_stats) {
stats.rx_ring_full = xs->rx_queue_full;
stats.rx_fill_ring_empty_descs =
- xs->umem ? xskq_nb_queue_empty_descs(xs->umem->fq) : 0;
+ xs->pool ? xskq_nb_queue_empty_descs(xs->pool->fq) : 0;
stats.tx_ring_empty_descs = xskq_nb_queue_empty_descs(xs->tx);
} else {
stats.rx_dropped += xs->rx_queue_full;
@@ -975,7 +1047,6 @@ static int xsk_mmap(struct file *file, struct socket *sock,
unsigned long size = vma->vm_end - vma->vm_start;
struct xdp_sock *xs = xdp_sk(sock->sk);
struct xsk_queue *q = NULL;
- struct xdp_umem *umem;
unsigned long pfn;
struct page *qpg;
@@ -987,16 +1058,12 @@ static int xsk_mmap(struct file *file, struct socket *sock,
} else if (offset == XDP_PGOFF_TX_RING) {
q = READ_ONCE(xs->tx);
} else {
- umem = READ_ONCE(xs->umem);
- if (!umem)
- return -EINVAL;
-
/* Matches the smp_wmb() in XDP_UMEM_REG */
smp_rmb();
if (offset == XDP_UMEM_PGOFF_FILL_RING)
- q = READ_ONCE(umem->fq);
+ q = READ_ONCE(xs->fq_tmp);
else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
- q = READ_ONCE(umem->cq);
+ q = READ_ONCE(xs->cq_tmp);
}
if (!q)
@@ -1034,8 +1101,8 @@ static int xsk_notifier(struct notifier_block *this,
xsk_unbind_dev(xs);
- /* Clear device references in umem. */
- xdp_umem_clear_dev(xs->umem);
+ /* Clear device references. */
+ xp_clear_dev(xs->pool);
}
mutex_unlock(&xs->mutex);
}
@@ -1079,7 +1146,7 @@ static void xsk_destruct(struct sock *sk)
if (!sock_flag(sk, SOCK_DEAD))
return;
- xdp_put_umem(xs->umem);
+ xp_put_pool(xs->pool);
sk_refcnt_debug_dec(sk);
}
@@ -1087,8 +1154,8 @@ static void xsk_destruct(struct sock *sk)
static int xsk_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
- struct sock *sk;
struct xdp_sock *xs;
+ struct sock *sk;
if (!ns_capable(net->user_ns, CAP_NET_RAW))
return -EPERM;