diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-15 13:22:29 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-15 13:22:29 -0800 |
commit | d635a69dd4981cc51f90293f5f64268620ed1565 (patch) | |
tree | 5e0a758b402ea7d624c25c3a343545dd29e80f31 /drivers/net/ethernet/mellanox/mlxsw/spectrum_router_xm.c | |
parent | ac73e3dc8acd0a3be292755db30388c3580f5674 (diff) | |
parent | efd5a1584537698220578227e6467638307c2a0b (diff) | |
download | linux-d635a69dd4981cc51f90293f5f64268620ed1565.tar.gz |
Merge tag 'net-next-5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski:
"Core:
- support "prefer busy polling" NAPI operation mode, where we defer
softirq for some time expecting applications to periodically busy
poll
- AF_XDP: improve efficiency by more batching and hindering the
adjacency cache prefetcher
- af_packet: make packet_fanout.arr size configurable up to 64K
- tcp: optimize TCP zero copy receive in presence of partial or
unaligned reads making zero copy a performance win for much smaller
messages
- XDP: add bulk APIs for returning / freeing frames
- sched: support fragmenting IP packets as they come out of conntrack
- net: allow virtual netdevs to forward UDP L4 and fraglist GSO skbs
BPF:
- BPF switch from crude rlimit-based to memcg-based memory accounting
- BPF type format information for kernel modules and related tracing
enhancements
- BPF implement task local storage for BPF LSM
- allow the FENTRY/FEXIT/RAW_TP tracing programs to use
bpf_sk_storage
Protocols:
- mptcp: improve multiple xmit streams support, memory accounting and
many smaller improvements
- TLS: support CHACHA20-POLY1305 cipher
- seg6: add support for SRv6 End.DT4/DT6 behavior
- sctp: Implement RFC 6951: UDP Encapsulation of SCTP
- ppp_generic: add ability to bridge channels directly
- bridge: Connectivity Fault Management (CFM) support as is defined
in IEEE 802.1Q section 12.14.
Drivers:
- mlx5: make use of the new auxiliary bus to organize the driver
internals
- mlx5: more accurate port TX timestamping support
- mlxsw:
- improve the efficiency of offloaded next hop updates by using
the new nexthop object API
- support blackhole nexthops
- support IEEE 802.1ad (Q-in-Q) bridging
- rtw88: major bluetooth co-existance improvements
- iwlwifi: support new 6 GHz frequency band
- ath11k: Fast Initial Link Setup (FILS)
- mt7915: dual band concurrent (DBDC) support
- net: ipa: add basic support for IPA v4.5
Refactor:
- a few pieces of in_interrupt() cleanup work from Sebastian Andrzej
Siewior
- phy: add support for shared interrupts; get rid of multiple driver
APIs and have the drivers write a full IRQ handler, slight growth
of driver code should be compensated by the simpler API which also
allows shared IRQs
- add common code for handling netdev per-cpu counters
- move TX packet re-allocation from Ethernet switch tag drivers to a
central place
- improve efficiency and rename nla_strlcpy
- number of W=1 warning cleanups as we now catch those in a patchwork
build bot
Old code removal:
- wan: delete the DLCI / SDLA drivers
- wimax: move to staging
- wifi: remove old WDS wifi bridging support"
* tag 'net-next-5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1922 commits)
net: hns3: fix expression that is currently always true
net: fix proc_fs init handling in af_packet and tls
nfc: pn533: convert comma to semicolon
af_vsock: Assign the vsock transport considering the vsock address flags
af_vsock: Set VMADDR_FLAG_TO_HOST flag on the receive path
vsock_addr: Check for supported flag values
vm_sockets: Add VMADDR_FLAG_TO_HOST vsock flag
vm_sockets: Add flags field in the vsock address data structure
net: Disable NETIF_F_HW_TLS_TX when HW_CSUM is disabled
tcp: Add logic to check for SYN w/ data in tcp_simple_retransmit
net: mscc: ocelot: install MAC addresses in .ndo_set_rx_mode from process context
nfc: s3fwrn5: Release the nfc firmware
net: vxget: clean up sparse warnings
mlxsw: spectrum_router: Use eXtended mezzanine to offload IPv4 router
mlxsw: spectrum: Set KVH XLT cache mode for Spectrum2/3
mlxsw: spectrum_router_xm: Introduce basic XM cache flushing
mlxsw: reg: Add Router LPM Cache Enable Register
mlxsw: reg: Add Router LPM Cache ML Delete Register
mlxsw: spectrum_router_xm: Implement L-value tracking for M-index
mlxsw: reg: Add XM Router M Table Register
...
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlxsw/spectrum_router_xm.c')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum_router_xm.c | 812 |
1 files changed, 812 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router_xm.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router_xm.c new file mode 100644 index 000000000000..d213af723a2a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router_xm.c @@ -0,0 +1,812 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2020 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/rhashtable.h> + +#include "spectrum.h" +#include "core.h" +#include "reg.h" +#include "spectrum_router.h" + +#define MLXSW_SP_ROUTER_XM_M_VAL 16 + +static const u8 mlxsw_sp_router_xm_m_val[] = { + [MLXSW_SP_L3_PROTO_IPV4] = MLXSW_SP_ROUTER_XM_M_VAL, + [MLXSW_SP_L3_PROTO_IPV6] = 0, /* Currently unused. */ +}; + +#define MLXSW_SP_ROUTER_XM_L_VAL_MAX 16 + +struct mlxsw_sp_router_xm { + bool ipv4_supported; + bool ipv6_supported; + unsigned int entries_size; + struct rhashtable ltable_ht; + struct rhashtable flush_ht; /* Stores items about to be flushed from cache */ + unsigned int flush_count; + bool flush_all_mode; +}; + +struct mlxsw_sp_router_xm_ltable_node { + struct rhash_head ht_node; /* Member of router_xm->ltable_ht */ + u16 mindex; + u8 current_lvalue; + refcount_t refcnt; + unsigned int lvalue_ref[MLXSW_SP_ROUTER_XM_L_VAL_MAX + 1]; +}; + +static const struct rhashtable_params mlxsw_sp_router_xm_ltable_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_router_xm_ltable_node, mindex), + .head_offset = offsetof(struct mlxsw_sp_router_xm_ltable_node, ht_node), + .key_len = sizeof(u16), + .automatic_shrinking = true, +}; + +struct mlxsw_sp_router_xm_flush_info { + bool all; + enum mlxsw_sp_l3proto proto; + u16 virtual_router; + u8 prefix_len; + unsigned char addr[sizeof(struct in6_addr)]; +}; + +struct mlxsw_sp_router_xm_fib_entry { + bool committed; + struct mlxsw_sp_router_xm_ltable_node *ltable_node; /* Parent node */ + u16 mindex; /* Store for processing from commit op */ + u8 lvalue; + struct mlxsw_sp_router_xm_flush_info flush_info; +}; + +#define MLXSW_SP_ROUTE_LL_XM_ENTRIES_MAX \ + (MLXSW_REG_XMDR_TRANS_LEN / MLXSW_REG_XMDR_C_LT_ROUTE_V4_LEN) + +struct mlxsw_sp_fib_entry_op_ctx_xm { + bool initialized; + char xmdr_pl[MLXSW_REG_XMDR_LEN]; + unsigned int trans_offset; /* Offset of the current command within one + * transaction of XMDR register. + */ + unsigned int trans_item_len; /* The current command length. This is used + * to advance 'trans_offset' when the next + * command is appended. + */ + unsigned int entries_count; + struct mlxsw_sp_router_xm_fib_entry *entries[MLXSW_SP_ROUTE_LL_XM_ENTRIES_MAX]; +}; + +static int mlxsw_sp_router_ll_xm_init(struct mlxsw_sp *mlxsw_sp, u16 vr_id, + enum mlxsw_sp_l3proto proto) +{ + char rxlte_pl[MLXSW_REG_RXLTE_LEN]; + + mlxsw_reg_rxlte_pack(rxlte_pl, vr_id, + (enum mlxsw_reg_rxlte_protocol) proto, true); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rxlte), rxlte_pl); +} + +static int mlxsw_sp_router_ll_xm_ralta_write(struct mlxsw_sp *mlxsw_sp, char *xralta_pl) +{ + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xralta), xralta_pl); +} + +static int mlxsw_sp_router_ll_xm_ralst_write(struct mlxsw_sp *mlxsw_sp, char *xralst_pl) +{ + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xralst), xralst_pl); +} + +static int mlxsw_sp_router_ll_xm_raltb_write(struct mlxsw_sp *mlxsw_sp, char *xraltb_pl) +{ + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xraltb), xraltb_pl); +} + +static u16 mlxsw_sp_router_ll_xm_mindex_get4(const u32 addr) +{ + /* Currently the M-index is set to linear mode. That means it is defined + * as 16 MSB of IP address. + */ + return addr >> MLXSW_SP_ROUTER_XM_L_VAL_MAX; +} + +static u16 mlxsw_sp_router_ll_xm_mindex_get6(const unsigned char *addr) +{ + WARN_ON_ONCE(1); + return 0; /* currently unused */ +} + +static void mlxsw_sp_router_ll_xm_op_ctx_check_init(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm) +{ + if (op_ctx->initialized) + return; + op_ctx->initialized = true; + + mlxsw_reg_xmdr_pack(op_ctx_xm->xmdr_pl, true); + op_ctx_xm->trans_offset = 0; + op_ctx_xm->entries_count = 0; +} + +static void mlxsw_sp_router_ll_xm_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_sp_l3proto proto, + enum mlxsw_sp_fib_entry_op op, + u16 virtual_router, u8 prefix_len, + unsigned char *addr, + struct mlxsw_sp_fib_entry_priv *priv) +{ + struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv; + struct mlxsw_sp_router_xm_fib_entry *fib_entry = (void *) priv->priv; + struct mlxsw_sp_router_xm_flush_info *flush_info; + enum mlxsw_reg_xmdr_c_ltr_op xmdr_c_ltr_op; + unsigned int len; + + mlxsw_sp_router_ll_xm_op_ctx_check_init(op_ctx, op_ctx_xm); + + switch (op) { + case MLXSW_SP_FIB_ENTRY_OP_WRITE: + xmdr_c_ltr_op = MLXSW_REG_XMDR_C_LTR_OP_WRITE; + break; + case MLXSW_SP_FIB_ENTRY_OP_UPDATE: + xmdr_c_ltr_op = MLXSW_REG_XMDR_C_LTR_OP_UPDATE; + break; + case MLXSW_SP_FIB_ENTRY_OP_DELETE: + xmdr_c_ltr_op = MLXSW_REG_XMDR_C_LTR_OP_DELETE; + break; + default: + WARN_ON_ONCE(1); + return; + } + + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + len = mlxsw_reg_xmdr_c_ltr_pack4(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset, + op_ctx_xm->entries_count, xmdr_c_ltr_op, + virtual_router, prefix_len, (u32 *) addr); + fib_entry->mindex = mlxsw_sp_router_ll_xm_mindex_get4(*((u32 *) addr)); + break; + case MLXSW_SP_L3_PROTO_IPV6: + len = mlxsw_reg_xmdr_c_ltr_pack6(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset, + op_ctx_xm->entries_count, xmdr_c_ltr_op, + virtual_router, prefix_len, addr); + fib_entry->mindex = mlxsw_sp_router_ll_xm_mindex_get6(addr); + break; + default: + WARN_ON_ONCE(1); + return; + } + if (!op_ctx_xm->trans_offset) + op_ctx_xm->trans_item_len = len; + else + WARN_ON_ONCE(op_ctx_xm->trans_item_len != len); + + op_ctx_xm->entries[op_ctx_xm->entries_count] = fib_entry; + + fib_entry->lvalue = prefix_len > mlxsw_sp_router_xm_m_val[proto] ? + prefix_len - mlxsw_sp_router_xm_m_val[proto] : 0; + + flush_info = &fib_entry->flush_info; + flush_info->proto = proto; + flush_info->virtual_router = virtual_router; + flush_info->prefix_len = prefix_len; + if (addr) + memcpy(flush_info->addr, addr, sizeof(flush_info->addr)); + else + memset(flush_info->addr, 0, sizeof(flush_info->addr)); +} + +static void +mlxsw_sp_router_ll_xm_fib_entry_act_remote_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u32 adjacency_index, u16 ecmp_size) +{ + struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv; + + mlxsw_reg_xmdr_c_ltr_act_remote_pack(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset, + trap_action, trap_id, adjacency_index, ecmp_size); +} + +static void +mlxsw_sp_router_ll_xm_fib_entry_act_local_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u16 local_erif) +{ + struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv; + + mlxsw_reg_xmdr_c_ltr_act_local_pack(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset, + trap_action, trap_id, local_erif); +} + +static void +mlxsw_sp_router_ll_xm_fib_entry_act_ip2me_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx) +{ + struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv; + + mlxsw_reg_xmdr_c_ltr_act_ip2me_pack(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset); +} + +static void +mlxsw_sp_router_ll_xm_fib_entry_act_ip2me_tun_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + u32 tunnel_ptr) +{ + struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv; + + mlxsw_reg_xmdr_c_ltr_act_ip2me_tun_pack(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset, + tunnel_ptr); +} + +static struct mlxsw_sp_router_xm_ltable_node * +mlxsw_sp_router_xm_ltable_node_get(struct mlxsw_sp_router_xm *router_xm, u16 mindex) +{ + struct mlxsw_sp_router_xm_ltable_node *ltable_node; + int err; + + ltable_node = rhashtable_lookup_fast(&router_xm->ltable_ht, &mindex, + mlxsw_sp_router_xm_ltable_ht_params); + if (ltable_node) { + refcount_inc(<able_node->refcnt); + return ltable_node; + } + ltable_node = kzalloc(sizeof(*ltable_node), GFP_KERNEL); + if (!ltable_node) + return ERR_PTR(-ENOMEM); + ltable_node->mindex = mindex; + refcount_set(<able_node->refcnt, 1); + + err = rhashtable_insert_fast(&router_xm->ltable_ht, <able_node->ht_node, + mlxsw_sp_router_xm_ltable_ht_params); + if (err) + goto err_insert; + + return ltable_node; + +err_insert: + kfree(ltable_node); + return ERR_PTR(err); +} + +static void mlxsw_sp_router_xm_ltable_node_put(struct mlxsw_sp_router_xm *router_xm, + struct mlxsw_sp_router_xm_ltable_node *ltable_node) +{ + if (!refcount_dec_and_test(<able_node->refcnt)) + return; + rhashtable_remove_fast(&router_xm->ltable_ht, <able_node->ht_node, + mlxsw_sp_router_xm_ltable_ht_params); + kfree(ltable_node); +} + +static int mlxsw_sp_router_xm_ltable_lvalue_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_router_xm_ltable_node *ltable_node) +{ + char xrmt_pl[MLXSW_REG_XRMT_LEN]; + + mlxsw_reg_xrmt_pack(xrmt_pl, ltable_node->mindex, ltable_node->current_lvalue); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xrmt), xrmt_pl); +} + +struct mlxsw_sp_router_xm_flush_node { + struct rhash_head ht_node; /* Member of router_xm->flush_ht */ + struct list_head list; + struct mlxsw_sp_router_xm_flush_info flush_info; + struct delayed_work dw; + struct mlxsw_sp *mlxsw_sp; + unsigned long start_jiffies; + unsigned int reuses; /* By how many flush calls this was reused. */ + refcount_t refcnt; +}; + +static const struct rhashtable_params mlxsw_sp_router_xm_flush_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_router_xm_flush_node, flush_info), + .head_offset = offsetof(struct mlxsw_sp_router_xm_flush_node, ht_node), + .key_len = sizeof(struct mlxsw_sp_router_xm_flush_info), + .automatic_shrinking = true, +}; + +static struct mlxsw_sp_router_xm_flush_node * +mlxsw_sp_router_xm_cache_flush_node_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_router_xm_flush_info *flush_info) +{ + struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm; + struct mlxsw_sp_router_xm_flush_node *flush_node; + int err; + + flush_node = kzalloc(sizeof(*flush_node), GFP_KERNEL); + if (!flush_node) + return ERR_PTR(-ENOMEM); + + flush_node->flush_info = *flush_info; + err = rhashtable_insert_fast(&router_xm->flush_ht, &flush_node->ht_node, + mlxsw_sp_router_xm_flush_ht_params); + if (err) { + kfree(flush_node); + return ERR_PTR(err); + } + router_xm->flush_count++; + flush_node->mlxsw_sp = mlxsw_sp; + flush_node->start_jiffies = jiffies; + refcount_set(&flush_node->refcnt, 1); + return flush_node; +} + +static void +mlxsw_sp_router_xm_cache_flush_node_hold(struct mlxsw_sp_router_xm_flush_node *flush_node) +{ + if (!flush_node) + return; + refcount_inc(&flush_node->refcnt); +} + +static void +mlxsw_sp_router_xm_cache_flush_node_put(struct mlxsw_sp_router_xm_flush_node *flush_node) +{ + if (!flush_node || !refcount_dec_and_test(&flush_node->refcnt)) + return; + kfree(flush_node); +} + +static void +mlxsw_sp_router_xm_cache_flush_node_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_router_xm_flush_node *flush_node) +{ + struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm; + + router_xm->flush_count--; + rhashtable_remove_fast(&router_xm->flush_ht, &flush_node->ht_node, + mlxsw_sp_router_xm_flush_ht_params); + mlxsw_sp_router_xm_cache_flush_node_put(flush_node); +} + +static u32 mlxsw_sp_router_xm_flush_mask4(u8 prefix_len) +{ + return GENMASK(31, 32 - prefix_len); +} + +static unsigned char *mlxsw_sp_router_xm_flush_mask6(u8 prefix_len) +{ + static unsigned char mask[sizeof(struct in6_addr)]; + + memset(mask, 0, sizeof(mask)); + memset(mask, 0xff, prefix_len / 8); + mask[prefix_len / 8] = GENMASK(8, 8 - prefix_len % 8); + return mask; +} + +#define MLXSW_SP_ROUTER_XM_CACHE_PARALLEL_FLUSHES_LIMIT 15 +#define MLXSW_SP_ROUTER_XM_CACHE_FLUSH_ALL_MIN_REUSES 15 +#define MLXSW_SP_ROUTER_XM_CACHE_DELAY 50 /* usecs */ +#define MLXSW_SP_ROUTER_XM_CACHE_MAX_WAIT (MLXSW_SP_ROUTER_XM_CACHE_DELAY * 10) + +static void mlxsw_sp_router_xm_cache_flush_work(struct work_struct *work) +{ + struct mlxsw_sp_router_xm_flush_info *flush_info; + struct mlxsw_sp_router_xm_flush_node *flush_node; + char rlcmld_pl[MLXSW_REG_RLCMLD_LEN]; + enum mlxsw_reg_rlcmld_select select; + struct mlxsw_sp *mlxsw_sp; + u32 addr4; + int err; + + flush_node = container_of(work, struct mlxsw_sp_router_xm_flush_node, + dw.work); + mlxsw_sp = flush_node->mlxsw_sp; + flush_info = &flush_node->flush_info; + + if (flush_info->all) { + char rlpmce_pl[MLXSW_REG_RLPMCE_LEN]; + + mlxsw_reg_rlpmce_pack(rlpmce_pl, true, false); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rlpmce), + rlpmce_pl); + if (err) + dev_err(mlxsw_sp->bus_info->dev, "Failed to flush XM cache\n"); + + if (flush_node->reuses < + MLXSW_SP_ROUTER_XM_CACHE_FLUSH_ALL_MIN_REUSES) + /* Leaving flush-all mode. */ + mlxsw_sp->router->xm->flush_all_mode = false; + goto out; + } + + select = MLXSW_REG_RLCMLD_SELECT_M_AND_ML_ENTRIES; + + switch (flush_info->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + addr4 = *((u32 *) flush_info->addr); + addr4 &= mlxsw_sp_router_xm_flush_mask4(flush_info->prefix_len); + + /* In case the flush prefix length is bigger than M-value, + * it makes no sense to flush M entries. So just flush + * the ML entries. + */ + if (flush_info->prefix_len > MLXSW_SP_ROUTER_XM_M_VAL) + select = MLXSW_REG_RLCMLD_SELECT_ML_ENTRIES; + + mlxsw_reg_rlcmld_pack4(rlcmld_pl, select, + flush_info->virtual_router, addr4, + mlxsw_sp_router_xm_flush_mask4(flush_info->prefix_len)); + break; + case MLXSW_SP_L3_PROTO_IPV6: + mlxsw_reg_rlcmld_pack6(rlcmld_pl, select, + flush_info->virtual_router, flush_info->addr, + mlxsw_sp_router_xm_flush_mask6(flush_info->prefix_len)); + break; + default: + WARN_ON(true); + goto out; + } + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rlcmld), rlcmld_pl); + if (err) + dev_err(mlxsw_sp->bus_info->dev, "Failed to flush XM cache\n"); + +out: + mlxsw_sp_router_xm_cache_flush_node_destroy(mlxsw_sp, flush_node); +} + +static bool +mlxsw_sp_router_xm_cache_flush_may_cancel(struct mlxsw_sp_router_xm_flush_node *flush_node) +{ + unsigned long max_wait = usecs_to_jiffies(MLXSW_SP_ROUTER_XM_CACHE_MAX_WAIT); + unsigned long delay = usecs_to_jiffies(MLXSW_SP_ROUTER_XM_CACHE_DELAY); + + /* In case there is the same flushing work pending, check + * if we can consolidate with it. We can do it up to MAX_WAIT. + * Cancel the delayed work. If the work was still pending. + */ + if (time_is_before_jiffies(flush_node->start_jiffies + max_wait - delay) && + cancel_delayed_work_sync(&flush_node->dw)) + return true; + return false; +} + +static int +mlxsw_sp_router_xm_cache_flush_schedule(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_router_xm_flush_info *flush_info) +{ + unsigned long delay = usecs_to_jiffies(MLXSW_SP_ROUTER_XM_CACHE_DELAY); + struct mlxsw_sp_router_xm_flush_info flush_all_info = {.all = true}; + struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm; + struct mlxsw_sp_router_xm_flush_node *flush_node; + + /* Check if the queued number of flushes reached critical amount after + * which it is better to just flush the whole cache. + */ + if (router_xm->flush_count == MLXSW_SP_ROUTER_XM_CACHE_PARALLEL_FLUSHES_LIMIT) + /* Entering flush-all mode. */ + router_xm->flush_all_mode = true; + + if (router_xm->flush_all_mode) + flush_info = &flush_all_info; + + rcu_read_lock(); + flush_node = rhashtable_lookup_fast(&router_xm->flush_ht, flush_info, + mlxsw_sp_router_xm_flush_ht_params); + /* Take a reference so the object is not freed before possible + * delayed work cancel could be done. + */ + mlxsw_sp_router_xm_cache_flush_node_hold(flush_node); + rcu_read_unlock(); + + if (flush_node && mlxsw_sp_router_xm_cache_flush_may_cancel(flush_node)) { + flush_node->reuses++; + mlxsw_sp_router_xm_cache_flush_node_put(flush_node); + /* Original work was within wait period and was canceled. + * That means that the reference is still held and the + * flush_node_put() call above did not free the flush_node. + * Reschedule it with fresh delay. + */ + goto schedule_work; + } else { + mlxsw_sp_router_xm_cache_flush_node_put(flush_node); + } + + flush_node = mlxsw_sp_router_xm_cache_flush_node_create(mlxsw_sp, flush_info); + if (IS_ERR(flush_node)) + return PTR_ERR(flush_node); + INIT_DELAYED_WORK(&flush_node->dw, mlxsw_sp_router_xm_cache_flush_work); + +schedule_work: + mlxsw_core_schedule_dw(&flush_node->dw, delay); + return 0; +} + +static int +mlxsw_sp_router_xm_ml_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_router_xm_fib_entry *fib_entry) +{ + struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm; + struct mlxsw_sp_router_xm_ltable_node *ltable_node; + u8 lvalue = fib_entry->lvalue; + int err; + + ltable_node = mlxsw_sp_router_xm_ltable_node_get(router_xm, + fib_entry->mindex); + if (IS_ERR(ltable_node)) + return PTR_ERR(ltable_node); + if (lvalue > ltable_node->current_lvalue) { + /* The L-value is bigger then the one currently set, update. */ + ltable_node->current_lvalue = lvalue; + err = mlxsw_sp_router_xm_ltable_lvalue_set(mlxsw_sp, + ltable_node); + if (err) + goto err_lvalue_set; + + /* The L value for prefix/M is increased. + * Therefore, all entries in M and ML caches matching + * {prefix/M, proto, VR} need to be flushed. Set the flush + * prefix length to M to achieve that. + */ + fib_entry->flush_info.prefix_len = MLXSW_SP_ROUTER_XM_M_VAL; + } + + ltable_node->lvalue_ref[lvalue]++; + fib_entry->ltable_node = ltable_node; + + return 0; + +err_lvalue_set: + mlxsw_sp_router_xm_ltable_node_put(router_xm, ltable_node); + return err; +} + +static void +mlxsw_sp_router_xm_ml_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_router_xm_fib_entry *fib_entry) +{ + struct mlxsw_sp_router_xm_ltable_node *ltable_node = + fib_entry->ltable_node; + struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm; + u8 lvalue = fib_entry->lvalue; + + ltable_node->lvalue_ref[lvalue]--; + if (lvalue == ltable_node->current_lvalue && lvalue && + !ltable_node->lvalue_ref[lvalue]) { + u8 new_lvalue = lvalue - 1; + + /* Find the biggest L-value left out there. */ + while (new_lvalue > 0 && !ltable_node->lvalue_ref[lvalue]) + new_lvalue--; + + ltable_node->current_lvalue = new_lvalue; + mlxsw_sp_router_xm_ltable_lvalue_set(mlxsw_sp, ltable_node); + + /* The L value for prefix/M is decreased. + * Therefore, all entries in M and ML caches matching + * {prefix/M, proto, VR} need to be flushed. Set the flush + * prefix length to M to achieve that. + */ + fib_entry->flush_info.prefix_len = MLXSW_SP_ROUTER_XM_M_VAL; + } + mlxsw_sp_router_xm_ltable_node_put(router_xm, ltable_node); +} + +static int +mlxsw_sp_router_xm_ml_entries_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm) +{ + struct mlxsw_sp_router_xm_fib_entry *fib_entry; + int err; + int i; + + for (i = 0; i < op_ctx_xm->entries_count; i++) { + fib_entry = op_ctx_xm->entries[i]; + err = mlxsw_sp_router_xm_ml_entry_add(mlxsw_sp, fib_entry); + if (err) + goto rollback; + } + return 0; + +rollback: + for (i--; i >= 0; i--) { + fib_entry = op_ctx_xm->entries[i]; + mlxsw_sp_router_xm_ml_entry_del(mlxsw_sp, fib_entry); + } + return err; +} + +static void +mlxsw_sp_router_xm_ml_entries_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm) +{ + struct mlxsw_sp_router_xm_fib_entry *fib_entry; + int i; + + for (i = 0; i < op_ctx_xm->entries_count; i++) { + fib_entry = op_ctx_xm->entries[i]; + mlxsw_sp_router_xm_ml_entry_del(mlxsw_sp, fib_entry); + } +} + +static void +mlxsw_sp_router_xm_ml_entries_cache_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm) +{ + struct mlxsw_sp_router_xm_fib_entry *fib_entry; + int err; + int i; + + for (i = 0; i < op_ctx_xm->entries_count; i++) { + fib_entry = op_ctx_xm->entries[i]; + err = mlxsw_sp_router_xm_cache_flush_schedule(mlxsw_sp, + &fib_entry->flush_info); + if (err) + dev_err(mlxsw_sp->bus_info->dev, "Failed to flush XM cache\n"); + } +} + +static int mlxsw_sp_router_ll_xm_fib_entry_commit(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + bool *postponed_for_bulk) +{ + struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv; + struct mlxsw_sp_router_xm_fib_entry *fib_entry; + u8 num_rec; + int err; + int i; + + op_ctx_xm->trans_offset += op_ctx_xm->trans_item_len; + op_ctx_xm->entries_count++; + + /* Check if bulking is possible and there is still room for another + * FIB entry record. The size of 'trans_item_len' is either size of IPv4 + * command or size of IPv6 command. Not possible to mix those in a + * single XMDR write. + */ + if (op_ctx->bulk_ok && + op_ctx_xm->trans_offset + op_ctx_xm->trans_item_len <= MLXSW_REG_XMDR_TRANS_LEN) { + if (postponed_for_bulk) + *postponed_for_bulk = true; + return 0; + } + + if (op_ctx->event == FIB_EVENT_ENTRY_REPLACE) { + /* The L-table is updated inside. It has to be done before + * the prefix is inserted. + */ + err = mlxsw_sp_router_xm_ml_entries_add(mlxsw_sp, op_ctx_xm); + if (err) + goto out; + } + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xmdr), op_ctx_xm->xmdr_pl); + if (err) + goto out; + num_rec = mlxsw_reg_xmdr_num_rec_get(op_ctx_xm->xmdr_pl); + if (num_rec > op_ctx_xm->entries_count) { + dev_err(mlxsw_sp->bus_info->dev, "Invalid XMDR number of records\n"); + err = -EIO; + goto out; + } + for (i = 0; i < num_rec; i++) { + if (!mlxsw_reg_xmdr_reply_vect_get(op_ctx_xm->xmdr_pl, i)) { + dev_err(mlxsw_sp->bus_info->dev, "Command send over XMDR failed\n"); + err = -EIO; + goto out; + } else { + fib_entry = op_ctx_xm->entries[i]; + fib_entry->committed = true; + } + } + + if (op_ctx->event == FIB_EVENT_ENTRY_DEL) + /* The L-table is updated inside. It has to be done after + * the prefix was removed. + */ + mlxsw_sp_router_xm_ml_entries_del(mlxsw_sp, op_ctx_xm); + + /* At the very end, do the XLT cache flushing to evict stale + * M and ML cache entries after prefixes were inserted/removed. + */ + mlxsw_sp_router_xm_ml_entries_cache_flush(mlxsw_sp, op_ctx_xm); + +out: + /* Next pack call is going to do reinitialization */ + op_ctx->initialized = false; + return err; +} + +static bool mlxsw_sp_router_ll_xm_fib_entry_is_committed(struct mlxsw_sp_fib_entry_priv *priv) +{ + struct mlxsw_sp_router_xm_fib_entry *fib_entry = (void *) priv->priv; + + return fib_entry->committed; +} + +const struct mlxsw_sp_router_ll_ops mlxsw_sp_router_ll_xm_ops = { + .init = mlxsw_sp_router_ll_xm_init, + .ralta_write = mlxsw_sp_router_ll_xm_ralta_write, + .ralst_write = mlxsw_sp_router_ll_xm_ralst_write, + .raltb_write = mlxsw_sp_router_ll_xm_raltb_write, + .fib_entry_op_ctx_size = sizeof(struct mlxsw_sp_fib_entry_op_ctx_xm), + .fib_entry_priv_size = sizeof(struct mlxsw_sp_router_xm_fib_entry), + .fib_entry_pack = mlxsw_sp_router_ll_xm_fib_entry_pack, + .fib_entry_act_remote_pack = mlxsw_sp_router_ll_xm_fib_entry_act_remote_pack, + .fib_entry_act_local_pack = mlxsw_sp_router_ll_xm_fib_entry_act_local_pack, + .fib_entry_act_ip2me_pack = mlxsw_sp_router_ll_xm_fib_entry_act_ip2me_pack, + .fib_entry_act_ip2me_tun_pack = mlxsw_sp_router_ll_xm_fib_entry_act_ip2me_tun_pack, + .fib_entry_commit = mlxsw_sp_router_ll_xm_fib_entry_commit, + .fib_entry_is_committed = mlxsw_sp_router_ll_xm_fib_entry_is_committed, +}; + +#define MLXSW_SP_ROUTER_XM_MINDEX_SIZE (64 * 1024) + +int mlxsw_sp_router_xm_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_router_xm *router_xm; + char rxltm_pl[MLXSW_REG_RXLTM_LEN]; + char xltq_pl[MLXSW_REG_XLTQ_LEN]; + u32 mindex_size; + u16 device_id; + int err; + + if (!mlxsw_sp->bus_info->xm_exists) + return 0; + + router_xm = kzalloc(sizeof(*router_xm), GFP_KERNEL); + if (!router_xm) + return -ENOMEM; + + mlxsw_reg_xltq_pack(xltq_pl); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(xltq), xltq_pl); + if (err) + goto err_xltq_query; + mlxsw_reg_xltq_unpack(xltq_pl, &device_id, &router_xm->ipv4_supported, + &router_xm->ipv6_supported, &router_xm->entries_size, &mindex_size); + + if (device_id != MLXSW_REG_XLTQ_XM_DEVICE_ID_XLT) { + dev_err(mlxsw_sp->bus_info->dev, "Invalid XM device id\n"); + err = -EINVAL; + goto err_device_id_check; + } + + if (mindex_size != MLXSW_SP_ROUTER_XM_MINDEX_SIZE) { + dev_err(mlxsw_sp->bus_info->dev, "Unexpected M-index size\n"); + err = -EINVAL; + goto err_mindex_size_check; + } + + mlxsw_reg_rxltm_pack(rxltm_pl, mlxsw_sp_router_xm_m_val[MLXSW_SP_L3_PROTO_IPV4], + mlxsw_sp_router_xm_m_val[MLXSW_SP_L3_PROTO_IPV6]); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rxltm), rxltm_pl); + if (err) + goto err_rxltm_write; + + err = rhashtable_init(&router_xm->ltable_ht, &mlxsw_sp_router_xm_ltable_ht_params); + if (err) + goto err_ltable_ht_init; + + err = rhashtable_init(&router_xm->flush_ht, &mlxsw_sp_router_xm_flush_ht_params); + if (err) + goto err_flush_ht_init; + + mlxsw_sp->router->xm = router_xm; + return 0; + +err_flush_ht_init: + rhashtable_destroy(&router_xm->ltable_ht); +err_ltable_ht_init: +err_rxltm_write: +err_mindex_size_check: +err_device_id_check: +err_xltq_query: + kfree(router_xm); + return err; +} + +void mlxsw_sp_router_xm_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm; + + if (!mlxsw_sp->bus_info->xm_exists) + return; + + rhashtable_destroy(&router_xm->flush_ht); + rhashtable_destroy(&router_xm->ltable_ht); + kfree(router_xm); +} + +bool mlxsw_sp_router_xm_ipv4_is_supported(const struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm; + + return router_xm && router_xm->ipv4_supported; +} |