summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/ice.rst1027
-rw-r--r--drivers/net/ethernet/intel/ice/Makefile1
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h52
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h25
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c58
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb.c40
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.c47
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_nl.c50
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c14
-rw-r--r--drivers/net/ethernet/intel/ice/ice_flex_pipe.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_hw_autogen.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.c445
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.h87
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c142
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c87
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.c1059
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.h24
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c61
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h27
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c72
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c71
25 files changed, 3122 insertions, 290 deletions
diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
index ee43ea57d443..e7d9cbff771b 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
@@ -1,46 +1,1031 @@
.. SPDX-License-Identifier: GPL-2.0+
-==================================================================
-Linux Base Driver for the Intel(R) Ethernet Connection E800 Series
-==================================================================
+=================================================================
+Linux Base Driver for the Intel(R) Ethernet Controller 800 Series
+=================================================================
Intel ice Linux driver.
-Copyright(c) 2018 Intel Corporation.
+Copyright(c) 2018-2021 Intel Corporation.
Contents
========
-- Enabling the driver
-- Support
+- Overview
+- Identifying Your Adapter
+- Important Notes
+- Additional Features & Configurations
+- Performance Optimization
-The driver in this release supports Intel's E800 Series of products. For
-more information, visit Intel's support page at https://support.intel.com.
-Enabling the driver
-===================
-The driver is enabled via the standard kernel configuration system,
-using the make command::
+The associated Virtual Function (VF) driver for this driver is iavf.
- make oldconfig/menuconfig/etc.
+Driver information can be obtained using ethtool and lspci.
-The driver is located in the menu structure at:
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel adapter. All hardware requirements listed apply to use
+with Linux.
+
+This driver supports XDP (Express Data Path) and AF_XDP zero-copy. Note that
+XDP is blocked for frame sizes larger than 3KB.
+
+
+Identifying Your Adapter
+========================
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+https://www.intel.com/support
+
+
+Important Notes
+===============
+
+Packet drops may occur under receive stress
+-------------------------------------------
+Devices based on the Intel(R) Ethernet Controller 800 Series are designed to
+tolerate a limited amount of system latency during PCIe and DMA transactions.
+If these transactions take longer than the tolerated latency, it can impact the
+length of time the packets are buffered in the device and associated memory,
+which may result in dropped packets. These packets drops typically do not have
+a noticeable impact on throughput and performance under standard workloads.
+
+If these packet drops appear to affect your workload, the following may improve
+the situation:
+
+1) Make sure that your system's physical memory is in a high-performance
+ configuration, as recommended by the platform vendor. A common
+ recommendation is for all channels to be populated with a single DIMM
+ module.
+2) In your system's BIOS/UEFI settings, select the "Performance" profile.
+3) Your distribution may provide tools like "tuned," which can help tweak
+ kernel settings to achieve better standard settings for different workloads.
+
+
+Configuring SR-IOV for improved network security
+------------------------------------------------
+In a virtualized environment, on Intel(R) Ethernet Network Adapters that
+support SR-IOV, the virtual function (VF) may be subject to malicious behavior.
+Software-generated layer two frames, like IEEE 802.3x (link flow control), IEEE
+802.1Qbb (priority based flow-control), and others of this type, are not
+expected and can throttle traffic between the host and the virtual switch,
+reducing performance. To resolve this issue, and to ensure isolation from
+unintended traffic streams, configure all SR-IOV enabled ports for VLAN tagging
+from the administrative interface on the PF. This configuration allows
+unexpected, and potentially malicious, frames to be dropped.
+
+See "Configuring VLAN Tagging on SR-IOV Enabled Adapter Ports" later in this
+README for configuration instructions.
+
+
+Do not unload port driver if VF with active VM is bound to it
+-------------------------------------------------------------
+Do not unload a port's driver if a Virtual Function (VF) with an active Virtual
+Machine (VM) is bound to it. Doing so will cause the port to appear to hang.
+Once the VM shuts down, or otherwise releases the VF, the command will
+complete.
+
+
+Important notes for SR-IOV and Link Aggregation
+-----------------------------------------------
+Link Aggregation is mutually exclusive with SR-IOV.
+
+- If Link Aggregation is active, SR-IOV VFs cannot be created on the PF.
+- If SR-IOV is active, you cannot set up Link Aggregation on the interface.
+
+Bridging and MACVLAN are also affected by this. If you wish to use bridging or
+MACVLAN with SR-IOV, you must set up bridging or MACVLAN before enabling
+SR-IOV. If you are using bridging or MACVLAN in conjunction with SR-IOV, and
+you want to remove the interface from the bridge or MACVLAN, you must follow
+these steps:
+
+1. Destroy SR-IOV VFs if they exist
+2. Remove the interface from the bridge or MACVLAN
+3. Recreate SRIOV VFs as needed
+
+
+Additional Features and Configurations
+======================================
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. Download it at:
+https://kernel.org/pub/software/network/ethtool/
+
+NOTE: The rx_bytes value of ethtool does not match the rx_bytes value of
+Netdev, due to the 4-byte CRC being stripped by the device. The difference
+between the two rx_bytes values will be 4 x the number of Rx packets. For
+example, if Rx packets are 10 and Netdev (software statistics) displays
+rx_bytes as "X", then ethtool (hardware statistics) will display rx_bytes as
+"X+40" (4 bytes CRC x 10 packets).
+
+
+Viewing Link Messages
+---------------------
+Link messages will not be displayed to the console if the distribution is
+restricting system messages. In order to see network driver link messages on
+your console, set dmesg to eight by entering the following::
+
+ # dmesg -n 8
+
+NOTE: This setting is not saved across reboots.
+
+
+Dynamic Device Personalization
+------------------------------
+Dynamic Device Personalization (DDP) allows you to change the packet processing
+pipeline of a device by applying a profile package to the device at runtime.
+Profiles can be used to, for example, add support for new protocols, change
+existing protocols, or change default settings. DDP profiles can also be rolled
+back without rebooting the system.
+
+The DDP package loads during device initialization. The driver looks for
+``intel/ice/ddp/ice.pkg`` in your firmware root (typically ``/lib/firmware/``
+or ``/lib/firmware/updates/``) and checks that it contains a valid DDP package
+file.
+
+NOTE: Your distribution should likely have provided the latest DDP file, but if
+ice.pkg is missing, you can find it in the linux-firmware repository or from
+intel.com.
+
+If the driver is unable to load the DDP package, the device will enter Safe
+Mode. Safe Mode disables advanced and performance features and supports only
+basic traffic and minimal functionality, such as updating the NVM or
+downloading a new driver or DDP package. Safe Mode only applies to the affected
+physical function and does not impact any other PFs. See the "Intel(R) Ethernet
+Adapters and Devices User Guide" for more details on DDP and Safe Mode.
+
+NOTES:
+
+- If you encounter issues with the DDP package file, you may need to download
+ an updated driver or DDP package file. See the log messages for more
+ information.
+
+- The ice.pkg file is a symbolic link to the default DDP package file.
+
+- You cannot update the DDP package if any PF drivers are already loaded. To
+ overwrite a package, unload all PFs and then reload the driver with the new
+ package.
+
+- Only the first loaded PF per device can download a package for that device.
+
+You can install specific DDP package files for different physical devices in
+the same system. To install a specific DDP package file:
+
+1. Download the DDP package file you want for your device.
+
+2. Rename the file ice-xxxxxxxxxxxxxxxx.pkg, where 'xxxxxxxxxxxxxxxx' is the
+ unique 64-bit PCI Express device serial number (in hex) of the device you
+ want the package downloaded on. The filename must include the complete
+ serial number (including leading zeros) and be all lowercase. For example,
+ if the 64-bit serial number is b887a3ffffca0568, then the file name would be
+ ice-b887a3ffffca0568.pkg.
+
+ To find the serial number from the PCI bus address, you can use the
+ following command::
+
+ # lspci -vv -s af:00.0 | grep -i Serial
+ Capabilities: [150 v1] Device Serial Number b8-87-a3-ff-ff-ca-05-68
+
+ You can use the following command to format the serial number without the
+ dashes::
+
+ # lspci -vv -s af:00.0 | grep -i Serial | awk '{print $7}' | sed s/-//g
+ b887a3ffffca0568
+
+3. Copy the renamed DDP package file to
+ ``/lib/firmware/updates/intel/ice/ddp/``. If the directory does not yet
+ exist, create it before copying the file.
+
+4. Unload all of the PFs on the device.
+
+5. Reload the driver with the new package.
+
+NOTE: The presence of a device-specific DDP package file overrides the loading
+of the default DDP package file (ice.pkg).
+
+
+Intel(R) Ethernet Flow Director
+-------------------------------
+The Intel Ethernet Flow Director performs the following tasks:
+
+- Directs receive packets according to their flows to different queues
+- Enables tight control on routing a flow in the platform
+- Matches flows and CPU cores for flow affinity
+
+NOTE: This driver supports the following flow types:
+
+- IPv4
+- TCPv4
+- UDPv4
+- SCTPv4
+- IPv6
+- TCPv6
+- UDPv6
+- SCTPv6
+
+Each flow type supports valid combinations of IP addresses (source or
+destination) and UDP/TCP/SCTP ports (source and destination). You can supply
+only a source IP address, a source IP address and a destination port, or any
+combination of one or more of these four parameters.
+
+NOTE: This driver allows you to filter traffic based on a user-defined flexible
+two-byte pattern and offset by using the ethtool user-def and mask fields. Only
+L3 and L4 flow types are supported for user-defined flexible filters. For a
+given flow type, you must clear all Intel Ethernet Flow Director filters before
+changing the input set (for that flow type).
+
+
+Flow Director Filters
+---------------------
+Flow Director filters are used to direct traffic that matches specified
+characteristics. They are enabled through ethtool's ntuple interface. To enable
+or disable the Intel Ethernet Flow Director and these filters::
+
+ # ethtool -K <ethX> ntuple <off|on>
+
+NOTE: When you disable ntuple filters, all the user programmed filters are
+flushed from the driver cache and hardware. All needed filters must be re-added
+when ntuple is re-enabled.
+
+To display all of the active filters::
+
+ # ethtool -u <ethX>
+
+To add a new filter::
+
+ # ethtool -U <ethX> flow-type <type> src-ip <ip> [m <ip_mask>] dst-ip <ip>
+ [m <ip_mask>] src-port <port> [m <port_mask>] dst-port <port> [m <port_mask>]
+ action <queue>
+
+ Where:
+ <ethX> - the Ethernet device to program
+ <type> - can be ip4, tcp4, udp4, sctp4, ip6, tcp6, udp6, sctp6
+ <ip> - the IP address to match on
+ <ip_mask> - the IPv4 address to mask on
+ NOTE: These filters use inverted masks.
+ <port> - the port number to match on
+ <port_mask> - the 16-bit integer for masking
+ NOTE: These filters use inverted masks.
+ <queue> - the queue to direct traffic toward (-1 discards the
+ matched traffic)
+
+To delete a filter::
+
+ # ethtool -U <ethX> delete <N>
+
+ Where <N> is the filter ID displayed when printing all the active filters,
+ and may also have been specified using "loc <N>" when adding the filter.
+
+EXAMPLES:
+
+To add a filter that directs packet to queue 2::
+
+ # ethtool -U <ethX> flow-type tcp4 src-ip 192.168.10.1 dst-ip \
+ 192.168.10.2 src-port 2000 dst-port 2001 action 2 [loc 1]
+
+To set a filter using only the source and destination IP address::
+
+ # ethtool -U <ethX> flow-type tcp4 src-ip 192.168.10.1 dst-ip \
+ 192.168.10.2 action 2 [loc 1]
+
+To set a filter based on a user-defined pattern and offset::
+
+ # ethtool -U <ethX> flow-type tcp4 src-ip 192.168.10.1 dst-ip \
+ 192.168.10.2 user-def 0x4FFFF action 2 [loc 1]
+
+ where the value of the user-def field contains the offset (4 bytes) and
+ the pattern (0xffff).
+
+To match TCP traffic sent from 192.168.0.1, port 5300, directed to 192.168.0.5,
+port 80, and then send it to queue 7::
+
+ # ethtool -U enp130s0 flow-type tcp4 src-ip 192.168.0.1 dst-ip 192.168.0.5
+ src-port 5300 dst-port 80 action 7
+
+To add a TCPv4 filter with a partial mask for a source IP subnet::
+
+ # ethtool -U <ethX> flow-type tcp4 src-ip 192.168.0.0 m 0.255.255.255 dst-ip
+ 192.168.5.12 src-port 12600 dst-port 31 action 12
+
+NOTES:
+
+For each flow-type, the programmed filters must all have the same matching
+input set. For example, issuing the following two commands is acceptable::
+
+ # ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7
+ # ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.5 src-port 55 action 10
+
+Issuing the next two commands, however, is not acceptable, since the first
+specifies src-ip and the second specifies dst-ip::
+
+ # ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7
+ # ethtool -U enp130s0 flow-type ip4 dst-ip 192.168.0.5 src-port 55 action 10
+
+The second command will fail with an error. You may program multiple filters
+with the same fields, using different values, but, on one device, you may not
+program two tcp4 filters with different matching fields.
+
+The ice driver does not support matching on a subportion of a field, thus
+partial mask fields are not supported.
+
+
+Flex Byte Flow Director Filters
+-------------------------------
+The driver also supports matching user-defined data within the packet payload.
+This flexible data is specified using the "user-def" field of the ethtool
+command in the following way:
+
+.. table::
+
+ ============================== ============================
+ ``31 28 24 20 16`` ``15 12 8 4 0``
+ ``offset into packet payload`` ``2 bytes of flexible data``
+ ============================== ============================
+
+For example,
+
+::
+
+ ... user-def 0x4FFFF ...
+
+tells the filter to look 4 bytes into the payload and match that value against
+0xFFFF. The offset is based on the beginning of the payload, and not the
+beginning of the packet. Thus
+
+::
+
+ flow-type tcp4 ... user-def 0x8BEAF ...
+
+would match TCP/IPv4 packets which have the value 0xBEAF 8 bytes into the
+TCP/IPv4 payload.
+
+Note that ICMP headers are parsed as 4 bytes of header and 4 bytes of payload.
+Thus to match the first byte of the payload, you must actually add 4 bytes to
+the offset. Also note that ip4 filters match both ICMP frames as well as raw
+(unknown) ip4 frames, where the payload will be the L3 payload of the IP4
+frame.
+
+The maximum offset is 64. The hardware will only read up to 64 bytes of data
+from the payload. The offset must be even because the flexible data is 2 bytes
+long and must be aligned to byte 0 of the packet payload.
+
+The user-defined flexible offset is also considered part of the input set and
+cannot be programmed separately for multiple filters of the same type. However,
+the flexible data is not part of the input set and multiple filters may use the
+same offset but match against different data.
+
+
+RSS Hash Flow
+-------------
+Allows you to set the hash bytes per flow type and any combination of one or
+more options for Receive Side Scaling (RSS) hash byte configuration.
+
+::
+
+ # ethtool -N <ethX> rx-flow-hash <type> <option>
+
+ Where <type> is:
+ tcp4 signifying TCP over IPv4
+ udp4 signifying UDP over IPv4
+ tcp6 signifying TCP over IPv6
+ udp6 signifying UDP over IPv6
+ And <option> is one or more of:
+ s Hash on the IP source address of the Rx packet.
+ d Hash on the IP destination address of the Rx packet.
+ f Hash on bytes 0 and 1 of the Layer 4 header of the Rx packet.
+ n Hash on bytes 2 and 3 of the Layer 4 header of the Rx packet.
+
+
+Accelerated Receive Flow Steering (aRFS)
+----------------------------------------
+Devices based on the Intel(R) Ethernet Controller 800 Series support
+Accelerated Receive Flow Steering (aRFS) on the PF. aRFS is a load-balancing
+mechanism that allows you to direct packets to the same CPU where an
+application is running or consuming the packets in that flow.
+
+NOTES:
+
+- aRFS requires that ntuple filtering is enabled via ethtool.
+- aRFS support is limited to the following packet types:
+
+ - TCP over IPv4 and IPv6
+ - UDP over IPv4 and IPv6
+ - Nonfragmented packets
+
+- aRFS only supports Flow Director filters, which consist of the
+ source/destination IP addresses and source/destination ports.
+- aRFS and ethtool's ntuple interface both use the device's Flow Director. aRFS
+ and ntuple features can coexist, but you may encounter unexpected results if
+ there's a conflict between aRFS and ntuple requests. See "Intel(R) Ethernet
+ Flow Director" for additional information.
+
+To set up aRFS:
+
+1. Enable the Intel Ethernet Flow Director and ntuple filters using ethtool.
+
+::
+
+ # ethtool -K <ethX> ntuple on
+
+2. Set up the number of entries in the global flow table. For example:
+
+::
+
+ # NUM_RPS_ENTRIES=16384
+ # echo $NUM_RPS_ENTRIES > /proc/sys/net/core/rps_sock_flow_entries
+
+3. Set up the number of entries in the per-queue flow table. For example:
+
+::
+
+ # NUM_RX_QUEUES=64
+ # for file in /sys/class/net/$IFACE/queues/rx-*/rps_flow_cnt; do
+ # echo $(($NUM_RPS_ENTRIES/$NUM_RX_QUEUES)) > $file;
+ # done
+
+4. Disable the IRQ balance daemon (this is only a temporary stop of the service
+ until the next reboot).
+
+::
+
+ # systemctl stop irqbalance
+
+5. Configure the interrupt affinity.
+
+ See ``/Documentation/core-api/irq/irq-affinity.rst``
+
+
+To disable aRFS using ethtool::
+
+ # ethtool -K <ethX> ntuple off
+
+NOTE: This command will disable ntuple filters and clear any aRFS filters in
+software and hardware.
+
+Example Use Case:
+
+1. Set the server application on the desired CPU (e.g., CPU 4).
+
+::
+
+ # taskset -c 4 netserver
+
+2. Use netperf to route traffic from the client to CPU 4 on the server with
+ aRFS configured. This example uses TCP over IPv4.
+
+::
+
+ # netperf -H <Host IPv4 Address> -t TCP_STREAM
+
+
+Enabling Virtual Functions (VFs)
+--------------------------------
+Use sysfs to enable virtual functions (VF).
+
+For example, you can create 4 VFs as follows::
+
+ # echo 4 > /sys/class/net/<ethX>/device/sriov_numvfs
+
+To disable VFs, write 0 to the same file::
+
+ # echo 0 > /sys/class/net/<ethX>/device/sriov_numvfs
+
+The maximum number of VFs for the ice driver is 256 total (all ports). To check
+how many VFs each PF supports, use the following command::
+
+ # cat /sys/class/net/<ethX>/device/sriov_totalvfs
+
+Note: You cannot use SR-IOV when link aggregation (LAG)/bonding is active, and
+vice versa. To enforce this, the driver checks for this mutual exclusion.
+
+
+Displaying VF Statistics on the PF
+----------------------------------
+Use the following command to display the statistics for the PF and its VFs::
+
+ # ip -s link show dev <ethX>
+
+NOTE: The output of this command can be very large due to the maximum number of
+possible VFs.
+
+The PF driver will display a subset of the statistics for the PF and for all
+VFs that are configured. The PF will always print a statistics block for each
+of the possible VFs, and it will show zero for all unconfigured VFs.
+
+
+Configuring VLAN Tagging on SR-IOV Enabled Adapter Ports
+--------------------------------------------------------
+To configure VLAN tagging for the ports on an SR-IOV enabled adapter, use the
+following command. The VLAN configuration should be done before the VF driver
+is loaded or the VM is booted. The VF is not aware of the VLAN tag being
+inserted on transmit and removed on received frames (sometimes called "port
+VLAN" mode).
+
+::
+
+ # ip link set dev <ethX> vf <id> vlan <vlan id>
+
+For example, the following will configure PF eth0 and the first VF on VLAN 10::
+
+ # ip link set dev eth0 vf 0 vlan 10
+
+
+Enabling a VF link if the port is disconnected
+----------------------------------------------
+If the physical function (PF) link is down, you can force link up (from the
+host PF) on any virtual functions (VF) bound to the PF.
+
+For example, to force link up on VF 0 bound to PF eth0::
+
+ # ip link set eth0 vf 0 state enable
+
+Note: If the command does not work, it may not be supported by your system.
+
+
+Setting the MAC Address for a VF
+--------------------------------
+To change the MAC address for the specified VF::
+
+ # ip link set <ethX> vf 0 mac <address>
+
+For example::
+
+ # ip link set <ethX> vf 0 mac 00:01:02:03:04:05
+
+This setting lasts until the PF is reloaded.
+
+NOTE: Assigning a MAC address for a VF from the host will disable any
+subsequent requests to change the MAC address from within the VM. This is a
+security feature. The VM is not aware of this restriction, so if this is
+attempted in the VM, it will trigger MDD events.
+
+
+Trusted VFs and VF Promiscuous Mode
+-----------------------------------
+This feature allows you to designate a particular VF as trusted and allows that
+trusted VF to request selective promiscuous mode on the Physical Function (PF).
+
+To set a VF as trusted or untrusted, enter the following command in the
+Hypervisor::
+
+ # ip link set dev <ethX> vf 1 trust [on|off]
+
+NOTE: It's important to set the VF to trusted before setting promiscuous mode.
+If the VM is not trusted, the PF will ignore promiscuous mode requests from the
+VF. If the VM becomes trusted after the VF driver is loaded, you must make a
+new request to set the VF to promiscuous.
+
+Once the VF is designated as trusted, use the following commands in the VM to
+set the VF to promiscuous mode.
+
+For promiscuous all::
+
+ # ip link set <ethX> promisc on
+ Where <ethX> is a VF interface in the VM
+
+For promiscuous Multicast::
+
+ # ip link set <ethX> allmulticast on
+ Where <ethX> is a VF interface in the VM
+
+NOTE: By default, the ethtool private flag vf-true-promisc-support is set to
+"off," meaning that promiscuous mode for the VF will be limited. To set the
+promiscuous mode for the VF to true promiscuous and allow the VF to see all
+ingress traffic, use the following command::
+
+ # ethtool --set-priv-flags <ethX> vf-true-promisc-support on
+
+The vf-true-promisc-support private flag does not enable promiscuous mode;
+rather, it designates which type of promiscuous mode (limited or true) you will
+get when you enable promiscuous mode using the ip link commands above. Note
+that this is a global setting that affects the entire device. However, the
+vf-true-promisc-support private flag is only exposed to the first PF of the
+device. The PF remains in limited promiscuous mode regardless of the
+vf-true-promisc-support setting.
+
+Next, add a VLAN interface on the VF interface. For example::
+
+ # ip link add link eth2 name eth2.100 type vlan id 100
+
+Note that the order in which you set the VF to promiscuous mode and add the
+VLAN interface does not matter (you can do either first). The result in this
+example is that the VF will get all traffic that is tagged with VLAN 100.
+
+
+Malicious Driver Detection (MDD) for VFs
+----------------------------------------
+Some Intel Ethernet devices use Malicious Driver Detection (MDD) to detect
+malicious traffic from the VF and disable Tx/Rx queues or drop the offending
+packet until a VF driver reset occurs. You can view MDD messages in the PF's
+system log using the dmesg command.
+
+- If the PF driver logs MDD events from the VF, confirm that the correct VF
+ driver is installed.
+- To restore functionality, you can manually reload the VF or VM or enable
+ automatic VF resets.
+- When automatic VF resets are enabled, the PF driver will immediately reset
+ the VF and reenable queues when it detects MDD events on the receive path.
+- If automatic VF resets are disabled, the PF will not automatically reset the
+ VF when it detects MDD events.
+
+To enable or disable automatic VF resets, use the following command::
+
+ # ethtool --set-priv-flags <ethX> mdd-auto-reset-vf on|off
+
+
+MAC and VLAN Anti-Spoofing Feature for VFs
+------------------------------------------
+When a malicious driver on a Virtual Function (VF) interface attempts to send a
+spoofed packet, it is dropped by the hardware and not transmitted.
+
+NOTE: This feature can be disabled for a specific VF::
+
+ # ip link set <ethX> vf <vf id> spoofchk {off|on}
+
+
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU)
+to a value larger than the default value of 1500.
+
+Use the ifconfig command to increase the MTU size. For example, enter the
+following where <ethX> is the interface number::
+
+ # ifconfig <ethX> mtu 9000 up
+
+Alternatively, you can use the ip command as follows::
+
+ # ip link set mtu 9000 dev <ethX>
+ # ip link set up dev <ethX>
+
+This setting is not saved across reboots.
+
+
+NOTE: The maximum MTU setting for jumbo frames is 9702. This corresponds to the
+maximum jumbo frame size of 9728 bytes.
+
+NOTE: This driver will attempt to use multiple page sized buffers to receive
+each jumbo packet. This should help to avoid buffer starvation issues when
+allocating receive packets.
+
+NOTE: Packet loss may have a greater impact on throughput when you use jumbo
+frames. If you observe a drop in performance after enabling jumbo frames,
+enabling flow control may mitigate the issue.
+
+
+Speed and Duplex Configuration
+------------------------------
+In addressing speed and duplex configuration issues, you need to distinguish
+between copper-based adapters and fiber-based adapters.
+
+In the default mode, an Intel(R) Ethernet Network Adapter using copper
+connections will attempt to auto-negotiate with its link partner to determine
+the best setting. If the adapter cannot establish link with the link partner
+using auto-negotiation, you may need to manually configure the adapter and link
+partner to identical settings to establish link and pass packets. This should
+only be needed when attempting to link with an older switch that does not
+support auto-negotiation or one that has been forced to a specific speed or
+duplex mode. Your link partner must match the setting you choose. 1 Gbps speeds
+and higher cannot be forced. Use the autonegotiation advertising setting to
+manually set devices for 1 Gbps and higher.
+
+Speed, duplex, and autonegotiation advertising are configured through the
+ethtool utility. For the latest version, download and install ethtool from the
+following website:
+
+ https://kernel.org/pub/software/network/ethtool/
+
+To see the speed configurations your device supports, run the following::
+
+ # ethtool <ethX>
+
+Caution: Only experienced network administrators should force speed and duplex
+or change autonegotiation advertising manually. The settings at the switch must
+always match the adapter settings. Adapter performance may suffer or your
+adapter may not operate if you configure the adapter differently from your
+switch.
+
+
+Data Center Bridging (DCB)
+--------------------------
+NOTE: The kernel assumes that TC0 is available, and will disable Priority Flow
+Control (PFC) on the device if TC0 is not available. To fix this, ensure TC0 is
+enabled when setting up DCB on your switch.
+
+DCB is a configuration Quality of Service implementation in hardware. It uses
+the VLAN priority tag (802.1p) to filter traffic. That means that there are 8
+different priorities that traffic can be filtered into. It also enables
+priority flow control (802.1Qbb) which can limit or eliminate the number of
+dropped packets during network stress. Bandwidth can be allocated to each of
+these priorities, which is enforced at the hardware level (802.1Qaz).
+
+DCB is normally configured on the network using the DCBX protocol (802.1Qaz), a
+specialization of LLDP (802.1AB). The ice driver supports the following
+mutually exclusive variants of DCBX support:
+
+1) Firmware-based LLDP Agent
+2) Software-based LLDP Agent
+
+In firmware-based mode, firmware intercepts all LLDP traffic and handles DCBX
+negotiation transparently for the user. In this mode, the adapter operates in
+"willing" DCBX mode, receiving DCB settings from the link partner (typically a
+switch). The local user can only query the negotiated DCB configuration. For
+information on configuring DCBX parameters on a switch, please consult the
+switch manufacturer's documentation.
+
+In software-based mode, LLDP traffic is forwarded to the network stack and user
+space, where a software agent can handle it. In this mode, the adapter can
+operate in either "willing" or "nonwilling" DCBX mode and DCB configuration can
+be both queried and set locally. This mode requires the FW-based LLDP Agent to
+be disabled.
+
+NOTE:
+
+- You can enable and disable the firmware-based LLDP Agent using an ethtool
+ private flag. Refer to the "FW-LLDP (Firmware Link Layer Discovery Protocol)"
+ section in this README for more information.
+- In software-based DCBX mode, you can configure DCB parameters using software
+ LLDP/DCBX agents that interface with the Linux kernel's DCB Netlink API. We
+ recommend using OpenLLDP as the DCBX agent when running in software mode. For
+ more information, see the OpenLLDP man pages and
+ https://github.com/intel/openlldp.
+- The driver implements the DCB netlink interface layer to allow the user space
+ to communicate with the driver and query DCB configuration for the port.
+- iSCSI with DCB is not supported.
+
+
+FW-LLDP (Firmware Link Layer Discovery Protocol)
+------------------------------------------------
+Use ethtool to change FW-LLDP settings. The FW-LLDP setting is per port and
+persists across boots.
+
+To enable LLDP::
+
+ # ethtool --set-priv-flags <ethX> fw-lldp-agent on
+
+To disable LLDP::
+
+ # ethtool --set-priv-flags <ethX> fw-lldp-agent off
+
+To check the current LLDP setting::
+
+ # ethtool --show-priv-flags <ethX>
+
+NOTE: You must enable the UEFI HII "LLDP Agent" attribute for this setting to
+take effect. If "LLDP AGENT" is set to disabled, you cannot enable it from the
+OS.
+
+
+Flow Control
+------------
+Ethernet Flow Control (IEEE 802.3x) can be configured with ethtool to enable
+receiving and transmitting pause frames for ice. When transmit is enabled,
+pause frames are generated when the receive packet buffer crosses a predefined
+threshold. When receive is enabled, the transmit unit will halt for the time
+delay specified when a pause frame is received.
+
+NOTE: You must have a flow control capable link partner.
+
+Flow Control is disabled by default.
+
+Use ethtool to change the flow control settings.
+
+To enable or disable Rx or Tx Flow Control::
+
+ # ethtool -A <ethX> rx <on|off> tx <on|off>
+
+Note: This command only enables or disables Flow Control if auto-negotiation is
+disabled. If auto-negotiation is enabled, this command changes the parameters
+used for auto-negotiation with the link partner.
+
+Note: Flow Control auto-negotiation is part of link auto-negotiation. Depending
+on your device, you may not be able to change the auto-negotiation setting.
+
+NOTE:
+
+- The ice driver requires flow control on both the port and link partner. If
+ flow control is disabled on one of the sides, the port may appear to hang on
+ heavy traffic.
+- You may encounter issues with link-level flow control (LFC) after disabling
+ DCB. The LFC status may show as enabled but traffic is not paused. To resolve
+ this issue, disable and reenable LFC using ethtool::
+
+ # ethtool -A <ethX> rx off tx off
+ # ethtool -A <ethX> rx on tx on
+
+
+NAPI
+----
+This driver supports NAPI (Rx polling mode).
+For more information on NAPI, see
+https://www.linuxfoundation.org/collaborate/workgroups/networking/napi
+
+
+MACVLAN
+-------
+This driver supports MACVLAN. Kernel support for MACVLAN can be tested by
+checking if the MACVLAN driver is loaded. You can run 'lsmod | grep macvlan' to
+see if the MACVLAN driver is loaded or run 'modprobe macvlan' to try to load
+the MACVLAN driver.
+
+NOTE:
+
+- In passthru mode, you can only set up one MACVLAN device. It will inherit the
+ MAC address of the underlying PF (Physical Function) device.
+
+
+IEEE 802.1ad (QinQ) Support
+---------------------------
+The IEEE 802.1ad standard, informally known as QinQ, allows for multiple VLAN
+IDs within a single Ethernet frame. VLAN IDs are sometimes referred to as
+"tags," and multiple VLAN IDs are thus referred to as a "tag stack." Tag stacks
+allow L2 tunneling and the ability to segregate traffic within a particular
+VLAN ID, among other uses.
+
+NOTES:
+
+- Receive checksum offloads and VLAN acceleration are not supported for 802.1ad
+ (QinQ) packets.
+
+- 0x88A8 traffic will not be received unless VLAN stripping is disabled with
+ the following command::
+
+ # ethool -K <ethX> rxvlan off
+
+- 0x88A8/0x8100 double VLANs cannot be used with 0x8100 or 0x8100/0x8100 VLANS
+ configured on the same port. 0x88a8/0x8100 traffic will not be received if
+ 0x8100 VLANs are configured.
+
+- The VF can only transmit 0x88A8/0x8100 (i.e., 802.1ad/802.1Q) traffic if:
+
+ 1) The VF is not assigned a port VLAN.
+ 2) spoofchk is disabled from the PF. If you enable spoofchk, the VF will
+ not transmit 0x88A8/0x8100 traffic.
+
+- The VF may not receive all network traffic based on the Inner VLAN header
+ when VF true promiscuous mode (vf-true-promisc-support) and double VLANs are
+ enabled in SR-IOV mode.
+
+The following are examples of how to configure 802.1ad (QinQ)::
+
+ # ip link add link eth0 eth0.24 type vlan proto 802.1ad id 24
+ # ip link add link eth0.24 eth0.24.371 type vlan proto 802.1Q id 371
+
+ Where "24" and "371" are example VLAN IDs.
+
+
+Tunnel/Overlay Stateless Offloads
+---------------------------------
+Supported tunnels and overlays include VXLAN, GENEVE, and others depending on
+hardware and software configuration. Stateless offloads are enabled by default.
+
+To view the current state of all offloads::
+
+ # ethtool -k <ethX>
+
+
+UDP Segmentation Offload
+------------------------
+Allows the adapter to offload transmit segmentation of UDP packets with
+payloads up to 64K into valid Ethernet frames. Because the adapter hardware is
+able to complete data segmentation much faster than operating system software,
+this feature may improve transmission performance.
+In addition, the adapter may use fewer CPU resources.
+
+NOTE:
+
+- The application sending UDP packets must support UDP segmentation offload.
+
+To enable/disable UDP Segmentation Offload, issue the following command::
+
+ # ethtool -K <ethX> tx-udp-segmentation [off|on]
+
+
+Performance Optimization
+========================
+Driver defaults are meant to fit a wide variety of workloads, but if further
+optimization is required, we recommend experimenting with the following
+settings.
+
+
+Rx Descriptor Ring Size
+-----------------------
+To reduce the number of Rx packet discards, increase the number of Rx
+descriptors for each Rx ring using ethtool.
+
+ Check if the interface is dropping Rx packets due to buffers being full
+ (rx_dropped.nic can mean that there is no PCIe bandwidth)::
+
+ # ethtool -S <ethX> | grep "rx_dropped"
+
+ If the previous command shows drops on queues, it may help to increase
+ the number of descriptors using 'ethtool -G'::
+
+ # ethtool -G <ethX> rx <N>
+ Where <N> is the desired number of ring entries/descriptors
+
+ This can provide temporary buffering for issues that create latency while
+ the CPUs process descriptors.
+
+
+Interrupt Rate Limiting
+-----------------------
+This driver supports an adaptive interrupt throttle rate (ITR) mechanism that
+is tuned for general workloads. The user can customize the interrupt rate
+control for specific workloads, via ethtool, adjusting the number of
+microseconds between interrupts.
+
+To set the interrupt rate manually, you must disable adaptive mode::
+
+ # ethtool -C <ethX> adaptive-rx off adaptive-tx off
+
+For lower CPU utilization:
+
+ Disable adaptive ITR and lower Rx and Tx interrupts. The examples below
+ affect every queue of the specified interface.
+
+ Setting rx-usecs and tx-usecs to 80 will limit interrupts to about
+ 12,500 interrupts per second per queue::
+
+ # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs 80 tx-usecs 80
+
+For reduced latency:
+
+ Disable adaptive ITR and ITR by setting rx-usecs and tx-usecs to 0
+ using ethtool::
+
+ # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs 0 tx-usecs 0
+
+Per-queue interrupt rate settings:
+
+ The following examples are for queues 1 and 3, but you can adjust other
+ queues.
+
+ To disable Rx adaptive ITR and set static Rx ITR to 10 microseconds or
+ about 100,000 interrupts/second, for queues 1 and 3::
+
+ # ethtool --per-queue <ethX> queue_mask 0xa --coalesce adaptive-rx off
+ rx-usecs 10
+
+ To show the current coalesce settings for queues 1 and 3::
+
+ # ethtool --per-queue <ethX> queue_mask 0xa --show-coalesce
+
+Bounding interrupt rates using rx-usecs-high:
+
+ :Valid Range: 0-236 (0=no limit)
+
+ The range of 0-236 microseconds provides an effective range of 4,237 to
+ 250,000 interrupts per second. The value of rx-usecs-high can be set
+ independently of rx-usecs and tx-usecs in the same ethtool command, and is
+ also independent of the adaptive interrupt moderation algorithm. The
+ underlying hardware supports granularity in 4-microsecond intervals, so
+ adjacent values may result in the same interrupt rate.
+
+ The following command would disable adaptive interrupt moderation, and allow
+ a maximum of 5 microseconds before indicating a receive or transmit was
+ complete. However, instead of resulting in as many as 200,000 interrupts per
+ second, it limits total interrupts per second to 50,000 via the rx-usecs-high
+ parameter.
+
+ ::
+
+ # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs-high 20
+ rx-usecs 5 tx-usecs 5
+
+
+Virtualized Environments
+------------------------
+In addition to the other suggestions in this section, the following may be
+helpful to optimize performance in VMs.
+
+ Using the appropriate mechanism (vcpupin) in the VM, pin the CPUs to
+ individual LCPUs, making sure to use a set of CPUs included in the
+ device's local_cpulist: ``/sys/class/net/<ethX>/device/local_cpulist``.
+
+ Configure as many Rx/Tx queues in the VM as available. (See the iavf driver
+ documentation for the number of queues supported.) For example::
+
+ # ethtool -L <virt_interface> rx <max> tx <max>
- -> Device Drivers
- -> Network device support (NETDEVICES [=y])
- -> Ethernet driver support
- -> Intel devices
- -> Intel(R) Ethernet Connection E800 Series Support
Support
=======
For general information, go to the Intel support website at:
-
https://www.intel.com/support/
or the Intel Wired Networking project hosted by Sourceforge at:
-
https://sourceforge.net/projects/e1000
If an issue is identified with the released source code on a supported kernel
with a supported adapter, email the specific information related to the issue
to e1000-devel@lists.sf.net.
+
+
+Trademarks
+==========
+Intel is a trademark or registered trademark of Intel Corporation or its
+subsidiaries in the United States and/or other countries.
+
+* Other names and brands may be claimed as the property of others.
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 6da4f43f2348..73da4f71f530 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -24,6 +24,7 @@ ice-y := ice_main.o \
ice_flow.o \
ice_devlink.o \
ice_fw_update.o \
+ ice_lag.o \
ice_ethtool.o
ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index fca428c879ec..dae8280ce17c 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -39,6 +39,7 @@
#include <net/devlink.h>
#include <net/ipv6.h>
#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include <net/geneve.h>
#include <net/gre.h>
#include <net/udp_tunnel.h>
@@ -55,6 +56,7 @@
#include "ice_fdir.h"
#include "ice_xsk.h"
#include "ice_arfs.h"
+#include "ice_lag.h"
#define ICE_BAR0 0
#define ICE_REQ_DESC_MULTIPLE 32
@@ -326,9 +328,11 @@ struct ice_vsi {
struct ice_ring **xdp_rings; /* XDP ring array */
u16 num_xdp_txq; /* Used XDP queues */
u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
- struct xsk_buff_pool **xsk_pools;
- u16 num_xsk_pools_used;
- u16 num_xsk_pools;
+
+ /* setup back reference, to which aggregator node this VSI
+ * corresponds to
+ */
+ struct ice_agg_node *agg_node;
} ____cacheline_internodealigned_in_smp;
/* struct that defines an interrupt vector */
@@ -377,6 +381,13 @@ enum ice_pf_flags {
ICE_PF_FLAGS_NBITS /* must be last */
};
+struct ice_agg_node {
+ u32 agg_id;
+#define ICE_MAX_VSIS_IN_AGG_NODE 64
+ u32 num_vsis;
+ u8 valid;
+};
+
struct ice_pf {
struct pci_dev *pdev;
@@ -455,6 +466,15 @@ struct ice_pf {
__le64 nvm_phy_type_lo; /* NVM PHY type low */
__le64 nvm_phy_type_hi; /* NVM PHY type high */
struct ice_link_default_override_tlv link_dflt_override;
+ struct ice_lag *lag; /* Link Aggregation information */
+
+#define ICE_INVALID_AGG_NODE_ID 0
+#define ICE_PF_AGG_NODE_ID_START 1
+#define ICE_MAX_PF_AGG_NODES 32
+ struct ice_agg_node pf_agg_node[ICE_MAX_PF_AGG_NODES];
+#define ICE_VF_AGG_NODE_ID_START 65
+#define ICE_MAX_VF_AGG_NODES 32
+ struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES];
};
struct ice_netdev_priv {
@@ -517,17 +537,15 @@ static inline void ice_set_ring_xdp(struct ice_ring *ring)
*/
static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_ring *ring)
{
- struct xsk_buff_pool **pools = ring->vsi->xsk_pools;
u16 qid = ring->q_index;
if (ice_ring_is_xdp(ring))
qid -= ring->vsi->num_xdp_txq;
- if (qid >= ring->vsi->num_xsk_pools || !pools || !pools[qid] ||
- !ice_is_xdp_ena_vsi(ring->vsi))
+ if (!ice_is_xdp_ena_vsi(ring->vsi))
return NULL;
- return pools[qid];
+ return xsk_get_pool_from_qid(ring->vsi->netdev, qid);
}
/**
@@ -557,11 +575,31 @@ static inline struct ice_vsi *ice_get_ctrl_vsi(struct ice_pf *pf)
return pf->vsi[pf->ctrl_vsi_idx];
}
+/**
+ * ice_set_sriov_cap - enable SRIOV in PF flags
+ * @pf: PF struct
+ */
+static inline void ice_set_sriov_cap(struct ice_pf *pf)
+{
+ if (pf->hw.func_caps.common_cap.sr_iov_1_1)
+ set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
+}
+
+/**
+ * ice_clear_sriov_cap - disable SRIOV in PF flags
+ * @pf: PF struct
+ */
+static inline void ice_clear_sriov_cap(struct ice_pf *pf)
+{
+ clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
+}
+
#define ICE_FD_STAT_CTR_BLOCK_COUNT 256
#define ICE_FD_STAT_PF_IDX(base_idx) \
((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT)
#define ICE_FD_SB_STAT_IDX(base_idx) ICE_FD_STAT_PF_IDX(base_idx)
+bool netif_is_ice(struct net_device *dev);
int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
int ice_vsi_open_ctrl(struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index a51470b68d54..80186589153b 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -695,6 +695,18 @@ struct ice_aqc_sched_elem_cmd {
__le32 addr_low;
};
+struct ice_aqc_txsched_move_grp_info_hdr {
+ __le32 src_parent_teid;
+ __le32 dest_parent_teid;
+ __le16 num_elems;
+ __le16 reserved;
+};
+
+struct ice_aqc_move_elem {
+ struct ice_aqc_txsched_move_grp_info_hdr hdr;
+ __le32 teid[];
+};
+
struct ice_aqc_elem_info_bw {
__le16 bw_profile_idx;
__le16 bw_alloc;
@@ -1528,6 +1540,16 @@ struct ice_aqc_lldp_stop_start_specific_agent {
u8 reserved[15];
};
+/* LLDP Filter Control (direct 0x0A0A) */
+struct ice_aqc_lldp_filter_ctrl {
+ u8 cmd_flags;
+#define ICE_AQC_LLDP_FILTER_ACTION_ADD 0x0
+#define ICE_AQC_LLDP_FILTER_ACTION_DELETE 0x1
+ u8 reserved1;
+ __le16 vsi_num;
+ u8 reserved2[12];
+};
+
/* Get/Set RSS key (indirect 0x0B04/0x0B02) */
struct ice_aqc_get_set_rss_key {
#define ICE_AQC_GSET_RSS_KEY_VSI_VALID BIT(15)
@@ -1851,6 +1873,7 @@ struct ice_aq_desc {
struct ice_aqc_lldp_start lldp_start;
struct ice_aqc_lldp_set_local_mib lldp_set_mib;
struct ice_aqc_lldp_stop_start_specific_agent lldp_agent_ctrl;
+ struct ice_aqc_lldp_filter_ctrl lldp_filter_ctrl;
struct ice_aqc_get_set_rss_lut get_set_rss_lut;
struct ice_aqc_get_set_rss_key get_set_rss_key;
struct ice_aqc_add_txqs add_txqs;
@@ -1950,6 +1973,7 @@ enum ice_adminq_opc {
ice_aqc_opc_add_sched_elems = 0x0401,
ice_aqc_opc_cfg_sched_elems = 0x0403,
ice_aqc_opc_get_sched_elems = 0x0404,
+ ice_aqc_opc_move_sched_elems = 0x0408,
ice_aqc_opc_suspend_sched_elems = 0x0409,
ice_aqc_opc_resume_sched_elems = 0x040A,
ice_aqc_opc_query_port_ets = 0x040E,
@@ -1991,6 +2015,7 @@ enum ice_adminq_opc {
ice_aqc_opc_get_cee_dcb_cfg = 0x0A07,
ice_aqc_opc_lldp_set_local_mib = 0x0A08,
ice_aqc_opc_lldp_stop_start_specific_agent = 0x0A09,
+ ice_aqc_opc_lldp_filter_ctrl = 0x0A0A,
/* RSS commands */
ice_aqc_opc_set_rss_key = 0x0B02,
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 607d33d05a0c..3d9475e222cd 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -110,7 +110,7 @@ ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size,
if (status)
return status;
- resp = (struct ice_aqc_manage_mac_read_resp *)buf;
+ resp = buf;
flags = le16_to_cpu(cmd->flags) & ICE_AQC_MAN_MAC_READ_M;
if (!(flags & ICE_AQC_MAN_MAC_LAN_ADDR_VALID)) {
@@ -907,6 +907,7 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
ice_debug(hw, ICE_DBG_SCHED, "Failed to get scheduler allocated resources\n");
goto err_unroll_alloc;
}
+ ice_sched_get_psm_clk_freq(hw);
/* Initialize port_info struct with scheduler data */
status = ice_sched_init_port(hw->port_info);
@@ -1979,7 +1980,7 @@ ice_parse_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
struct ice_aqc_list_caps_elem *cap_resp;
u32 i;
- cap_resp = (struct ice_aqc_list_caps_elem *)buf;
+ cap_resp = buf;
memset(func_p, 0, sizeof(*func_p));
@@ -2109,7 +2110,7 @@ ice_parse_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
struct ice_aqc_list_caps_elem *cap_resp;
u32 i;
- cap_resp = (struct ice_aqc_list_caps_elem *)buf;
+ cap_resp = buf;
memset(dev_p, 0, sizeof(*dev_p));
@@ -4078,6 +4079,7 @@ static enum ice_status ice_replay_pre_init(struct ice_hw *hw)
for (i = 0; i < ICE_SW_LKUP_LAST; i++)
list_replace_init(&sw->recp_list[i].filt_rules,
&sw->recp_list[i].filt_replay_rules);
+ ice_sched_replay_agg_vsi_preinit(hw);
return 0;
}
@@ -4109,6 +4111,8 @@ enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle)
return status;
/* Replay per VSI all filters */
status = ice_replay_vsi_all_fltr(hw, vsi_handle);
+ if (!status)
+ status = ice_replay_vsi_agg(hw, vsi_handle);
return status;
}
@@ -4122,6 +4126,7 @@ void ice_replay_post(struct ice_hw *hw)
{
/* Delete old entries from replay filter list head */
ice_rm_all_sw_replay_rule_info(hw);
+ ice_sched_replay_agg(hw);
}
/**
@@ -4366,3 +4371,50 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
}
+
+/**
+ * ice_fw_supports_lldp_fltr - check NVM version supports lldp_fltr_ctrl
+ * @hw: pointer to HW struct
+ */
+bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw)
+{
+ if (hw->mac_type != ICE_MAC_E810)
+ return false;
+
+ if (hw->api_maj_ver == ICE_FW_API_LLDP_FLTR_MAJ) {
+ if (hw->api_min_ver > ICE_FW_API_LLDP_FLTR_MIN)
+ return true;
+ if (hw->api_min_ver == ICE_FW_API_LLDP_FLTR_MIN &&
+ hw->api_patch >= ICE_FW_API_LLDP_FLTR_PATCH)
+ return true;
+ } else if (hw->api_maj_ver > ICE_FW_API_LLDP_FLTR_MAJ) {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * ice_lldp_fltr_add_remove - add or remove a LLDP Rx switch filter
+ * @hw: pointer to HW struct
+ * @vsi_num: absolute HW index for VSI
+ * @add: boolean for if adding or removing a filter
+ */
+enum ice_status
+ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add)
+{
+ struct ice_aqc_lldp_filter_ctrl *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.lldp_filter_ctrl;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_filter_ctrl);
+
+ if (add)
+ cmd->cmd_flags = ICE_AQC_LLDP_FILTER_ACTION_ADD;
+ else
+ cmd->cmd_flags = ICE_AQC_LLDP_FILTER_ACTION_DELETE;
+
+ cmd->vsi_num = cpu_to_le16(vsi_num);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 3ebb973878c7..baf4064fcbfe 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -175,4 +175,7 @@ ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
enum ice_status
ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
struct ice_sq_cd *cd);
+bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw);
+enum ice_status
+ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add);
#endif /* _ICE_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index 4db12d1f5808..b2d8a5932b1d 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -838,7 +838,7 @@ static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
*/
static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len)
{
- struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc;
+ struct ice_aq_desc *cq_desc = desc;
u16 len;
if (!IS_ENABLED(CONFIG_DYNAMIC_DEBUG) &&
@@ -868,7 +868,7 @@ static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len)
if (buf_len < len)
len = buf_len;
- ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, (u8 *)buf, len);
+ ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, buf, len);
}
}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index 2a3147ee0bbb..e42727941ef5 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -850,9 +850,9 @@ ice_get_ieee_or_cee_dcb_cfg(struct ice_port_info *pi, u8 dcbx_mode)
return ICE_ERR_PARAM;
if (dcbx_mode == ICE_DCBX_MODE_IEEE)
- dcbx_cfg = &pi->local_dcbx_cfg;
+ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
else if (dcbx_mode == ICE_DCBX_MODE_CEE)
- dcbx_cfg = &pi->desired_dcbx_cfg;
+ dcbx_cfg = &pi->qos_cfg.desired_dcbx_cfg;
/* Get Local DCB Config in case of ICE_DCBX_MODE_IEEE
* or get CEE DCB Desired Config in case of ICE_DCBX_MODE_CEE
@@ -863,7 +863,7 @@ ice_get_ieee_or_cee_dcb_cfg(struct ice_port_info *pi, u8 dcbx_mode)
goto out;
/* Get Remote DCB Config */
- dcbx_cfg = &pi->remote_dcbx_cfg;
+ dcbx_cfg = &pi->qos_cfg.remote_dcbx_cfg;
ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbx_cfg);
/* Don't treat ENOENT as an error for Remote MIBs */
@@ -892,14 +892,14 @@ enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi)
ret = ice_aq_get_cee_dcb_cfg(pi->hw, &cee_cfg, NULL);
if (!ret) {
/* CEE mode */
- dcbx_cfg = &pi->local_dcbx_cfg;
+ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_CEE;
dcbx_cfg->tlv_status = le32_to_cpu(cee_cfg.tlv_status);
ice_cee_to_dcb_cfg(&cee_cfg, dcbx_cfg);
ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_CEE);
} else if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
/* CEE mode not enabled try querying IEEE data */
- dcbx_cfg = &pi->local_dcbx_cfg;
+ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_IEEE);
}
@@ -916,26 +916,26 @@ enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi)
*/
enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
{
- struct ice_port_info *pi = hw->port_info;
+ struct ice_qos_cfg *qos_cfg = &hw->port_info->qos_cfg;
enum ice_status ret = 0;
if (!hw->func_caps.common_cap.dcb)
return ICE_ERR_NOT_SUPPORTED;
- pi->is_sw_lldp = true;
+ qos_cfg->is_sw_lldp = true;
/* Get DCBX status */
- pi->dcbx_status = ice_get_dcbx_status(hw);
+ qos_cfg->dcbx_status = ice_get_dcbx_status(hw);
- if (pi->dcbx_status == ICE_DCBX_STATUS_DONE ||
- pi->dcbx_status == ICE_DCBX_STATUS_IN_PROGRESS ||
- pi->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
+ if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DONE ||
+ qos_cfg->dcbx_status == ICE_DCBX_STATUS_IN_PROGRESS ||
+ qos_cfg->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
/* Get current DCBX configuration */
- ret = ice_get_dcb_cfg(pi);
+ ret = ice_get_dcb_cfg(hw->port_info);
if (ret)
return ret;
- pi->is_sw_lldp = false;
- } else if (pi->dcbx_status == ICE_DCBX_STATUS_DIS) {
+ qos_cfg->is_sw_lldp = false;
+ } else if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DIS) {
return ICE_ERR_NOT_READY;
}
@@ -943,7 +943,7 @@ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
if (enable_mib_change) {
ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL);
if (ret)
- pi->is_sw_lldp = true;
+ qos_cfg->is_sw_lldp = true;
}
return ret;
@@ -958,21 +958,21 @@ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
*/
enum ice_status ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib)
{
- struct ice_port_info *pi = hw->port_info;
+ struct ice_qos_cfg *qos_cfg = &hw->port_info->qos_cfg;
enum ice_status ret;
if (!hw->func_caps.common_cap.dcb)
return ICE_ERR_NOT_SUPPORTED;
/* Get DCBX status */
- pi->dcbx_status = ice_get_dcbx_status(hw);
+ qos_cfg->dcbx_status = ice_get_dcbx_status(hw);
- if (pi->dcbx_status == ICE_DCBX_STATUS_DIS)
+ if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DIS)
return ICE_ERR_NOT_READY;
ret = ice_aq_cfg_lldp_mib_change(hw, ena_mib, NULL);
if (!ret)
- pi->is_sw_lldp = !ena_mib;
+ qos_cfg->is_sw_lldp = !ena_mib;
return ret;
}
@@ -1270,7 +1270,7 @@ enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi)
hw = pi->hw;
/* update the HW local config */
- dcbcfg = &pi->local_dcbx_cfg;
+ dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
/* Allocate the LLDPDU */
lldpmib = devm_kzalloc(ice_hw_to_dev(hw), ICE_LLDPDU_SIZE, GFP_KERNEL);
if (!lldpmib)
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 36abd6b7280c..1e8f71ffc8ce 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -28,7 +28,7 @@ void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc)
if (netdev_set_num_tc(netdev, vsi->tc_cfg.numtc))
return;
- dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+ dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
ice_for_each_traffic_class(i)
if (vsi->tc_cfg.ena_tc & BIT(i))
@@ -134,7 +134,7 @@ static u8 ice_dcb_get_mode(struct ice_port_info *port_info, bool host)
else
mode = DCB_CAP_DCBX_LLD_MANAGED;
- if (port_info->local_dcbx_cfg.dcbx_mode & ICE_DCBX_MODE_CEE)
+ if (port_info->qos_cfg.local_dcbx_cfg.dcbx_mode & ICE_DCBX_MODE_CEE)
return mode | DCB_CAP_DCBX_VER_CEE;
else
return mode | DCB_CAP_DCBX_VER_IEEE;
@@ -277,10 +277,10 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
int ret = ICE_DCB_NO_HW_CHG;
struct ice_vsi *pf_vsi;
- curr_cfg = &pf->hw.port_info->local_dcbx_cfg;
+ curr_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
/* FW does not care if change happened */
- if (!pf->hw.port_info->is_sw_lldp)
+ if (!pf->hw.port_info->qos_cfg.is_sw_lldp)
ret = ICE_DCB_HW_CHG_RST;
/* Enable DCB tagging only when more than one TC */
@@ -327,7 +327,7 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
/* Only send new config to HW if we are in SW LLDP mode. Otherwise,
* the new config came from the HW in the first place.
*/
- if (pf->hw.port_info->is_sw_lldp) {
+ if (pf->hw.port_info->qos_cfg.is_sw_lldp) {
ret = ice_set_dcb_cfg(pf->hw.port_info);
if (ret) {
dev_err(dev, "Set DCB Config failed\n");
@@ -360,7 +360,7 @@ free_cfg:
*/
static void ice_cfg_etsrec_defaults(struct ice_port_info *pi)
{
- struct ice_dcbx_cfg *dcbcfg = &pi->local_dcbx_cfg;
+ struct ice_dcbx_cfg *dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
u8 i;
/* Ensure ETS recommended DCB configuration is not already set */
@@ -446,7 +446,7 @@ void ice_dcb_rebuild(struct ice_pf *pf)
mutex_lock(&pf->tc_mutex);
- if (!pf->hw.port_info->is_sw_lldp)
+ if (!pf->hw.port_info->qos_cfg.is_sw_lldp)
ice_cfg_etsrec_defaults(pf->hw.port_info);
ret = ice_set_dcb_cfg(pf->hw.port_info);
@@ -455,9 +455,9 @@ void ice_dcb_rebuild(struct ice_pf *pf)
goto dcb_error;
}
- if (!pf->hw.port_info->is_sw_lldp) {
+ if (!pf->hw.port_info->qos_cfg.is_sw_lldp) {
ret = ice_cfg_lldp_mib_change(&pf->hw, true);
- if (ret && !pf->hw.port_info->is_sw_lldp) {
+ if (ret && !pf->hw.port_info->qos_cfg.is_sw_lldp) {
dev_err(dev, "Failed to register for MIB changes\n");
goto dcb_error;
}
@@ -510,11 +510,12 @@ static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
int ret = 0;
pi = pf->hw.port_info;
- newcfg = kmemdup(&pi->local_dcbx_cfg, sizeof(*newcfg), GFP_KERNEL);
+ newcfg = kmemdup(&pi->qos_cfg.local_dcbx_cfg, sizeof(*newcfg),
+ GFP_KERNEL);
if (!newcfg)
return -ENOMEM;
- memset(&pi->local_dcbx_cfg, 0, sizeof(*newcfg));
+ memset(&pi->qos_cfg.local_dcbx_cfg, 0, sizeof(*newcfg));
dev_info(ice_pf_to_dev(pf), "Configuring initial DCB values\n");
if (ice_pf_dcb_cfg(pf, newcfg, locked))
@@ -545,7 +546,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked)
if (!dcbcfg)
return -ENOMEM;
- memset(&pi->local_dcbx_cfg, 0, sizeof(*dcbcfg));
+ memset(&pi->qos_cfg.local_dcbx_cfg, 0, sizeof(*dcbcfg));
dcbcfg->etscfg.willing = ets_willing ? 1 : 0;
dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc;
@@ -608,7 +609,7 @@ static bool ice_dcb_tc_contig(u8 *prio_table)
*/
static int ice_dcb_noncontig_cfg(struct ice_pf *pf)
{
- struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+ struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
struct device *dev = ice_pf_to_dev(pf);
int ret;
@@ -638,7 +639,7 @@ static int ice_dcb_noncontig_cfg(struct ice_pf *pf)
*/
void ice_pf_dcb_recfg(struct ice_pf *pf)
{
- struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+ struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
u8 tc_map = 0;
int v, ret;
@@ -691,7 +692,7 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
port_info = hw->port_info;
err = ice_init_dcb(hw, false);
- if (err && !port_info->is_sw_lldp) {
+ if (err && !port_info->qos_cfg.is_sw_lldp) {
dev_err(dev, "Error initializing DCB %d\n", err);
goto dcb_init_err;
}
@@ -858,7 +859,7 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
/* Update the remote cached instance and return */
ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
- &pi->remote_dcbx_cfg);
+ &pi->qos_cfg.remote_dcbx_cfg);
if (ret) {
dev_err(dev, "Failed to get remote DCB config\n");
return;
@@ -868,10 +869,11 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
mutex_lock(&pf->tc_mutex);
/* store the old configuration */
- tmp_dcbx_cfg = pf->hw.port_info->local_dcbx_cfg;
+ tmp_dcbx_cfg = pf->hw.port_info->qos_cfg.local_dcbx_cfg;
/* Reset the old DCBX configuration data */
- memset(&pi->local_dcbx_cfg, 0, sizeof(pi->local_dcbx_cfg));
+ memset(&pi->qos_cfg.local_dcbx_cfg, 0,
+ sizeof(pi->qos_cfg.local_dcbx_cfg));
/* Get updated DCBX data from firmware */
ret = ice_get_dcb_cfg(pf->hw.port_info);
@@ -881,7 +883,8 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
}
/* No change detected in DCBX configs */
- if (!memcmp(&tmp_dcbx_cfg, &pi->local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) {
+ if (!memcmp(&tmp_dcbx_cfg, &pi->qos_cfg.local_dcbx_cfg,
+ sizeof(tmp_dcbx_cfg))) {
dev_dbg(dev, "No change detected in DCBX configuration.\n");
goto out;
}
@@ -889,13 +892,13 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
pf->dcbx_cap = ice_dcb_get_mode(pi, false);
need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg,
- &pi->local_dcbx_cfg);
- ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->local_dcbx_cfg);
+ &pi->qos_cfg.local_dcbx_cfg);
+ ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->qos_cfg.local_dcbx_cfg);
if (!need_reconfig)
goto out;
/* Enable DCB tagging only when more than one TC */
- if (ice_dcb_get_num_tc(&pi->local_dcbx_cfg) > 1) {
+ if (ice_dcb_get_num_tc(&pi->qos_cfg.local_dcbx_cfg) > 1) {
dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n");
set_bit(ICE_FLAG_DCB_ENA, pf->flags);
} else {
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
index 87f91b750d59..fcfefad00d1c 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
@@ -34,12 +34,10 @@ static void ice_dcbnl_devreset(struct net_device *netdev)
static int ice_dcbnl_getets(struct net_device *netdev, struct ieee_ets *ets)
{
struct ice_dcbx_cfg *dcbxcfg;
- struct ice_port_info *pi;
struct ice_pf *pf;
pf = ice_netdev_to_pf(netdev);
- pi = pf->hw.port_info;
- dcbxcfg = &pi->local_dcbx_cfg;
+ dcbxcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
ets->willing = dcbxcfg->etscfg.willing;
ets->ets_cap = dcbxcfg->etscfg.maxtcs;
@@ -74,7 +72,7 @@ static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets)
!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
return -EINVAL;
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
mutex_lock(&pf->tc_mutex);
@@ -159,6 +157,7 @@ static u8 ice_dcbnl_getdcbx(struct net_device *netdev)
static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_qos_cfg *qos_cfg;
/* No support for LLD_MANAGED modes or CEE+IEEE */
if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
@@ -171,10 +170,11 @@ static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode)
return ICE_DCB_NO_HW_CHG;
pf->dcbx_cap = mode;
+ qos_cfg = &pf->hw.port_info->qos_cfg;
if (mode & DCB_CAP_DCBX_VER_CEE)
- pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE;
+ qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE;
else
- pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE;
+ qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE;
dev_info(ice_pf_to_dev(pf), "DCBx mode = 0x%x\n", mode);
return ICE_DCB_HW_CHG_RST;
@@ -225,7 +225,7 @@ static int ice_dcbnl_getpfc(struct net_device *netdev, struct ieee_pfc *pfc)
struct ice_dcbx_cfg *dcbxcfg;
int i;
- dcbxcfg = &pi->local_dcbx_cfg;
+ dcbxcfg = &pi->qos_cfg.local_dcbx_cfg;
pfc->pfc_cap = dcbxcfg->pfc.pfccap;
pfc->pfc_en = dcbxcfg->pfc.pfcena;
pfc->mbc = dcbxcfg->pfc.mbc;
@@ -256,7 +256,7 @@ static int ice_dcbnl_setpfc(struct net_device *netdev, struct ieee_pfc *pfc)
mutex_lock(&pf->tc_mutex);
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
if (pfc->pfc_cap)
new_cfg->pfc.pfccap = pfc->pfc_cap;
@@ -293,9 +293,9 @@ ice_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio, u8 *setting)
if (prio >= ICE_MAX_USER_PRIORITY)
return;
- *setting = (pi->local_dcbx_cfg.pfc.pfcena >> prio) & 0x1;
+ *setting = (pi->qos_cfg.local_dcbx_cfg.pfc.pfcena >> prio) & 0x1;
dev_dbg(ice_pf_to_dev(pf), "Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n",
- prio, *setting, pi->local_dcbx_cfg.pfc.pfcena);
+ prio, *setting, pi->qos_cfg.local_dcbx_cfg.pfc.pfcena);
}
/**
@@ -316,7 +316,7 @@ static void ice_dcbnl_set_pfc_cfg(struct net_device *netdev, int prio, u8 set)
if (prio >= ICE_MAX_USER_PRIORITY)
return;
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc;
if (set)
@@ -338,7 +338,7 @@ static u8 ice_dcbnl_getpfcstate(struct net_device *netdev)
struct ice_port_info *pi = pf->hw.port_info;
/* Return enabled if any UP enabled for PFC */
- if (pi->local_dcbx_cfg.pfc.pfcena)
+ if (pi->qos_cfg.local_dcbx_cfg.pfc.pfcena)
return 1;
return 0;
@@ -378,8 +378,8 @@ static u8 ice_dcbnl_setstate(struct net_device *netdev, u8 state)
if (state) {
set_bit(ICE_FLAG_DCB_ENA, pf->flags);
- memcpy(&pf->hw.port_info->desired_dcbx_cfg,
- &pf->hw.port_info->local_dcbx_cfg,
+ memcpy(&pf->hw.port_info->qos_cfg.desired_dcbx_cfg,
+ &pf->hw.port_info->qos_cfg.local_dcbx_cfg,
sizeof(struct ice_dcbx_cfg));
} else {
clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
@@ -413,7 +413,7 @@ ice_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio,
if (prio >= ICE_MAX_USER_PRIORITY)
return;
- *pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio];
+ *pgid = pi->qos_cfg.local_dcbx_cfg.etscfg.prio_table[prio];
dev_dbg(ice_pf_to_dev(pf), "Get PG config prio=%d tc=%d\n", prio,
*pgid);
}
@@ -444,7 +444,7 @@ ice_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
if (tc >= ICE_MAX_TRAFFIC_CLASS)
return;
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
/* prio_type, bwg_id and bw_pct per UP are not supported */
@@ -474,7 +474,7 @@ ice_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 *bw_pct)
if (pgid >= ICE_MAX_TRAFFIC_CLASS)
return;
- *bw_pct = pi->local_dcbx_cfg.etscfg.tcbwtable[pgid];
+ *bw_pct = pi->qos_cfg.local_dcbx_cfg.etscfg.tcbwtable[pgid];
dev_dbg(ice_pf_to_dev(pf), "Get PG BW config tc=%d bw_pct=%d\n",
pgid, *bw_pct);
}
@@ -498,7 +498,7 @@ ice_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 bw_pct)
if (pgid >= ICE_MAX_TRAFFIC_CLASS)
return;
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
new_cfg->etscfg.tcbwtable[pgid] = bw_pct;
}
@@ -528,7 +528,7 @@ ice_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int prio,
if (prio >= ICE_MAX_USER_PRIORITY)
return;
- *pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio];
+ *pgid = pi->qos_cfg.local_dcbx_cfg.etscfg.prio_table[prio];
}
/**
@@ -699,9 +699,9 @@ static int ice_dcbnl_setapp(struct net_device *netdev, struct dcb_app *app)
mutex_lock(&pf->tc_mutex);
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
- old_cfg = &pf->hw.port_info->local_dcbx_cfg;
+ old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
if (old_cfg->numapps == ICE_DCBX_MAX_APPS) {
ret = -EINVAL;
@@ -751,7 +751,7 @@ static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app)
return -EINVAL;
mutex_lock(&pf->tc_mutex);
- old_cfg = &pf->hw.port_info->local_dcbx_cfg;
+ old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
if (old_cfg->numapps <= 1)
goto delapp_out;
@@ -760,7 +760,7 @@ static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app)
if (ret)
goto delapp_out;
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
for (i = 1; i < new_cfg->numapps; i++) {
if (app->selector == new_cfg->app[i].selector &&
@@ -813,7 +813,7 @@ static u8 ice_dcbnl_cee_set_all(struct net_device *netdev)
!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
return ICE_DCB_NO_HW_CHG;
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
mutex_lock(&pf->tc_mutex);
@@ -884,7 +884,7 @@ void ice_dcbnl_set_all(struct ice_vsi *vsi)
if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags))
return;
- dcbxcfg = &pi->local_dcbx_cfg;
+ dcbxcfg = &pi->qos_cfg.local_dcbx_cfg;
for (i = 0; i < dcbxcfg->numapps; i++) {
u8 prio, tc_map;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index e01b7e34da5e..5636c9b23896 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -1242,6 +1242,11 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
enum ice_status status;
bool dcbx_agent_status;
+ /* Remove rule to direct LLDP packets to default VSI.
+ * The FW LLDP engine will now be consuming them.
+ */
+ ice_cfg_sw_lldp(vsi, false, false);
+
/* AQ command to start FW LLDP agent will return an
* error if the agent is already started
*/
@@ -1270,11 +1275,6 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
if (status)
dev_dbg(dev, "Fail to init DCB\n");
- /* Remove rule to direct LLDP packets to default VSI.
- * The FW LLDP engine will now be consuming them.
- */
- ice_cfg_sw_lldp(vsi, false, false);
-
/* Register for MIB change events */
status = ice_cfg_lldp_mib_change(&pf->hw, true);
if (status)
@@ -2979,7 +2979,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
pause->rx_pause = 0;
pause->tx_pause = 0;
- dcbx_cfg = &pi->local_dcbx_cfg;
+ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
@@ -3031,7 +3031,7 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
pi = vsi->port_info;
hw_link_info = &pi->phy.link_info;
- dcbx_cfg = &pi->local_dcbx_cfg;
+ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
/* Changing the port's flow control is not supported if this isn't the
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index cf5b717b9293..5e1fd30c0a0f 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -2727,7 +2727,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid)
case ICE_SID_XLT1_RSS:
case ICE_SID_XLT1_ACL:
case ICE_SID_XLT1_PE:
- xlt1 = (struct ice_xlt1_section *)sect;
+ xlt1 = sect;
src = xlt1->value;
sect_len = le16_to_cpu(xlt1->count) *
sizeof(*hw->blk[block_id].xlt1.t);
@@ -2740,7 +2740,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid)
case ICE_SID_XLT2_RSS:
case ICE_SID_XLT2_ACL:
case ICE_SID_XLT2_PE:
- xlt2 = (struct ice_xlt2_section *)sect;
+ xlt2 = sect;
src = (__force u8 *)xlt2->value;
sect_len = le16_to_cpu(xlt2->count) *
sizeof(*hw->blk[block_id].xlt2.t);
@@ -2753,7 +2753,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid)
case ICE_SID_PROFID_TCAM_RSS:
case ICE_SID_PROFID_TCAM_ACL:
case ICE_SID_PROFID_TCAM_PE:
- pid = (struct ice_prof_id_section *)sect;
+ pid = sect;
src = (u8 *)pid->entry;
sect_len = le16_to_cpu(pid->count) *
sizeof(*hw->blk[block_id].prof.t);
@@ -2766,7 +2766,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid)
case ICE_SID_PROFID_REDIR_RSS:
case ICE_SID_PROFID_REDIR_ACL:
case ICE_SID_PROFID_REDIR_PE:
- pr = (struct ice_prof_redir_section *)sect;
+ pr = sect;
src = pr->redir_value;
sect_len = le16_to_cpu(pr->count) *
sizeof(*hw->blk[block_id].prof_redir.t);
@@ -2779,7 +2779,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid)
case ICE_SID_FLD_VEC_RSS:
case ICE_SID_FLD_VEC_ACL:
case ICE_SID_FLD_VEC_PE:
- es = (struct ice_sw_fv_section *)sect;
+ es = sect;
src = (u8 *)es->fv;
sect_len = (u32)(le16_to_cpu(es->count) *
hw->blk[block_id].es.fvw) *
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 90abc8612a6a..093a1818a392 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -86,6 +86,9 @@
#define QRXFLXP_CNTXT_RXDID_PRIO_S 8
#define QRXFLXP_CNTXT_RXDID_PRIO_M ICE_M(0x7, 8)
#define QRXFLXP_CNTXT_TS_M BIT(11)
+#define GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_S 4
+#define GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M ICE_M(0x3, 4)
+#define GLGEN_CLKSTAT_SRC 0x000B826C
#define GLGEN_RSTAT 0x000B8188
#define GLGEN_RSTAT_DEVSTATE_M ICE_M(0x3, 0)
#define GLGEN_RSTCTL 0x000B8180
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
new file mode 100644
index 000000000000..4599fc3b4ed8
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+/* Link Aggregation code */
+
+#include "ice.h"
+#include "ice_lag.h"
+
+/**
+ * ice_lag_nop_handler - no-op Rx handler to disable LAG
+ * @pskb: pointer to skb pointer
+ */
+rx_handler_result_t ice_lag_nop_handler(struct sk_buff __always_unused **pskb)
+{
+ return RX_HANDLER_PASS;
+}
+
+/**
+ * ice_lag_set_primary - set PF LAG state as Primary
+ * @lag: LAG info struct
+ */
+static void ice_lag_set_primary(struct ice_lag *lag)
+{
+ struct ice_pf *pf = lag->pf;
+
+ if (!pf)
+ return;
+
+ if (lag->role != ICE_LAG_UNSET && lag->role != ICE_LAG_BACKUP) {
+ dev_warn(ice_pf_to_dev(pf), "%s: Attempt to be Primary, but incompatible state.\n",
+ netdev_name(lag->netdev));
+ return;
+ }
+
+ lag->role = ICE_LAG_PRIMARY;
+}
+
+/**
+ * ice_lag_set_backup - set PF LAG state to Backup
+ * @lag: LAG info struct
+ */
+static void ice_lag_set_backup(struct ice_lag *lag)
+{
+ struct ice_pf *pf = lag->pf;
+
+ if (!pf)
+ return;
+
+ if (lag->role != ICE_LAG_UNSET && lag->role != ICE_LAG_PRIMARY) {
+ dev_dbg(ice_pf_to_dev(pf), "%s: Attempt to be Backup, but incompatible state\n",
+ netdev_name(lag->netdev));
+ return;
+ }
+
+ lag->role = ICE_LAG_BACKUP;
+}
+
+/**
+ * ice_display_lag_info - print LAG info
+ * @lag: LAG info struct
+ */
+static void ice_display_lag_info(struct ice_lag *lag)
+{
+ const char *name, *peer, *upper, *role, *bonded, *master;
+ struct device *dev = &lag->pf->pdev->dev;
+
+ name = lag->netdev ? netdev_name(lag->netdev) : "unset";
+ peer = lag->peer_netdev ? netdev_name(lag->peer_netdev) : "unset";
+ upper = lag->upper_netdev ? netdev_name(lag->upper_netdev) : "unset";
+ master = lag->master ? "TRUE" : "FALSE";
+ bonded = lag->bonded ? "BONDED" : "UNBONDED";
+
+ switch (lag->role) {
+ case ICE_LAG_NONE:
+ role = "NONE";
+ break;
+ case ICE_LAG_PRIMARY:
+ role = "PRIMARY";
+ break;
+ case ICE_LAG_BACKUP:
+ role = "BACKUP";
+ break;
+ case ICE_LAG_UNSET:
+ role = "UNSET";
+ break;
+ default:
+ role = "ERROR";
+ }
+
+ dev_dbg(dev, "%s %s, peer:%s, upper:%s, role:%s, master:%s\n", name,
+ bonded, peer, upper, role, master);
+}
+
+/**
+ * ice_lag_info_event - handle NETDEV_BONDING_INFO event
+ * @lag: LAG info struct
+ * @ptr: opaque data pointer
+ *
+ * ptr is to be cast to (netdev_notifier_bonding_info *)
+ */
+static void ice_lag_info_event(struct ice_lag *lag, void *ptr)
+{
+ struct net_device *event_netdev, *netdev_tmp;
+ struct netdev_notifier_bonding_info *info;
+ struct netdev_bonding_info *bonding_info;
+ const char *lag_netdev_name;
+
+ event_netdev = netdev_notifier_info_to_dev(ptr);
+ info = ptr;
+ lag_netdev_name = netdev_name(lag->netdev);
+ bonding_info = &info->bonding_info;
+
+ if (event_netdev != lag->netdev || !lag->bonded || !lag->upper_netdev)
+ return;
+
+ if (bonding_info->master.bond_mode != BOND_MODE_ACTIVEBACKUP) {
+ netdev_dbg(lag->netdev, "Bonding event recv, but mode not active/backup\n");
+ goto lag_out;
+ }
+
+ if (strcmp(bonding_info->slave.slave_name, lag_netdev_name)) {
+ netdev_dbg(lag->netdev, "Bonding event recv, but slave info not for us\n");
+ goto lag_out;
+ }
+
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(lag->upper_netdev, netdev_tmp) {
+ if (!netif_is_ice(netdev_tmp))
+ continue;
+
+ if (netdev_tmp && netdev_tmp != lag->netdev &&
+ lag->peer_netdev != netdev_tmp) {
+ dev_hold(netdev_tmp);
+ lag->peer_netdev = netdev_tmp;
+ }
+ }
+ rcu_read_unlock();
+
+ if (bonding_info->slave.state)
+ ice_lag_set_backup(lag);
+ else
+ ice_lag_set_primary(lag);
+
+lag_out:
+ ice_display_lag_info(lag);
+}
+
+/**
+ * ice_lag_link - handle LAG link event
+ * @lag: LAG info struct
+ * @info: info from the netdev notifier
+ */
+static void
+ice_lag_link(struct ice_lag *lag, struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *netdev_tmp, *upper = info->upper_dev;
+ struct ice_pf *pf = lag->pf;
+ int peers = 0;
+
+ if (lag->bonded)
+ dev_warn(ice_pf_to_dev(pf), "%s Already part of a bond\n",
+ netdev_name(lag->netdev));
+
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper, netdev_tmp)
+ peers++;
+ rcu_read_unlock();
+
+ if (lag->upper_netdev != upper) {
+ dev_hold(upper);
+ lag->upper_netdev = upper;
+ }
+
+ ice_clear_sriov_cap(pf);
+
+ lag->bonded = true;
+ lag->role = ICE_LAG_UNSET;
+
+ /* if this is the first element in an LAG mark as master */
+ lag->master = !!(peers == 1);
+}
+
+/**
+ * ice_lag_unlink - handle unlink event
+ * @lag: LAG info struct
+ * @info: info from netdev notification
+ */
+static void
+ice_lag_unlink(struct ice_lag *lag,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *netdev_tmp, *upper = info->upper_dev;
+ struct ice_pf *pf = lag->pf;
+ bool found = false;
+
+ if (!lag->bonded) {
+ netdev_dbg(lag->netdev, "bonding unlink event on non-LAG netdev\n");
+ return;
+ }
+
+ /* determine if we are in the new LAG config or not */
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper, netdev_tmp) {
+ if (netdev_tmp == lag->netdev) {
+ found = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (found)
+ return;
+
+ if (lag->upper_netdev) {
+ dev_put(lag->upper_netdev);
+ lag->upper_netdev = NULL;
+ }
+
+ if (lag->peer_netdev) {
+ dev_put(lag->peer_netdev);
+ lag->peer_netdev = NULL;
+ }
+
+ ice_set_sriov_cap(pf);
+ lag->bonded = false;
+ lag->role = ICE_LAG_NONE;
+}
+
+/**
+ * ice_lag_changeupper_event - handle LAG changeupper event
+ * @lag: LAG info struct
+ * @ptr: opaque pointer data
+ *
+ * ptr is to be cast into netdev_notifier_changeupper_info
+ */
+static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr)
+{
+ struct netdev_notifier_changeupper_info *info;
+ struct net_device *netdev;
+
+ info = ptr;
+ netdev = netdev_notifier_info_to_dev(ptr);
+
+ /* not for this netdev */
+ if (netdev != lag->netdev)
+ return;
+
+ if (!info->upper_dev) {
+ netdev_dbg(netdev, "changeupper rcvd, but no upper defined\n");
+ return;
+ }
+
+ netdev_dbg(netdev, "bonding %s\n", info->linking ? "LINK" : "UNLINK");
+
+ if (!netif_is_lag_master(info->upper_dev)) {
+ netdev_dbg(netdev, "changeupper rcvd, but not master. bail\n");
+ return;
+ }
+
+ if (info->linking)
+ ice_lag_link(lag, info);
+ else
+ ice_lag_unlink(lag, info);
+
+ ice_display_lag_info(lag);
+}
+
+/**
+ * ice_lag_changelower_event - handle LAG changelower event
+ * @lag: LAG info struct
+ * @ptr: opaque data pointer
+ *
+ * ptr to be cast to netdev_notifier_changelowerstate_info
+ */
+static void ice_lag_changelower_event(struct ice_lag *lag, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ if (netdev != lag->netdev)
+ return;
+
+ netdev_dbg(netdev, "bonding info\n");
+
+ if (!netif_is_lag_port(netdev))
+ netdev_dbg(netdev, "CHANGELOWER rcvd, but netdev not in LAG. Bail\n");
+}
+
+/**
+ * ice_lag_event_handler - handle LAG events from netdev
+ * @notif_blk: notifier block registered by this netdev
+ * @event: event type
+ * @ptr: opaque data containing notifier event
+ */
+static int
+ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event,
+ void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct ice_lag *lag;
+
+ lag = container_of(notif_blk, struct ice_lag, notif_block);
+
+ if (!lag->netdev)
+ return NOTIFY_DONE;
+
+ /* Check that the netdev is in the working namespace */
+ if (!net_eq(dev_net(netdev), &init_net))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ ice_lag_changeupper_event(lag, ptr);
+ break;
+ case NETDEV_CHANGELOWERSTATE:
+ ice_lag_changelower_event(lag, ptr);
+ break;
+ case NETDEV_BONDING_INFO:
+ ice_lag_info_event(lag, ptr);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * ice_register_lag_handler - register LAG handler on netdev
+ * @lag: LAG struct
+ */
+static int ice_register_lag_handler(struct ice_lag *lag)
+{
+ struct device *dev = ice_pf_to_dev(lag->pf);
+ struct notifier_block *notif_blk;
+
+ notif_blk = &lag->notif_block;
+
+ if (!notif_blk->notifier_call) {
+ notif_blk->notifier_call = ice_lag_event_handler;
+ if (register_netdevice_notifier(notif_blk)) {
+ notif_blk->notifier_call = NULL;
+ dev_err(dev, "FAIL register LAG event handler!\n");
+ return -EINVAL;
+ }
+ dev_dbg(dev, "LAG event handler registered\n");
+ }
+ return 0;
+}
+
+/**
+ * ice_unregister_lag_handler - unregister LAG handler on netdev
+ * @lag: LAG struct
+ */
+static void ice_unregister_lag_handler(struct ice_lag *lag)
+{
+ struct device *dev = ice_pf_to_dev(lag->pf);
+ struct notifier_block *notif_blk;
+
+ notif_blk = &lag->notif_block;
+ if (notif_blk->notifier_call) {
+ unregister_netdevice_notifier(notif_blk);
+ dev_dbg(dev, "LAG event handler unregistered\n");
+ }
+}
+
+/**
+ * ice_init_lag - initialize support for LAG
+ * @pf: PF struct
+ *
+ * Alloc memory for LAG structs and initialize the elements.
+ * Memory will be freed in ice_deinit_lag
+ */
+int ice_init_lag(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_lag *lag;
+ struct ice_vsi *vsi;
+ int err;
+
+ pf->lag = kzalloc(sizeof(*lag), GFP_KERNEL);
+ if (!pf->lag)
+ return -ENOMEM;
+ lag = pf->lag;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi) {
+ dev_err(dev, "couldn't get main vsi, link aggregation init fail\n");
+ err = -EIO;
+ goto lag_error;
+ }
+
+ lag->pf = pf;
+ lag->netdev = vsi->netdev;
+ lag->role = ICE_LAG_NONE;
+ lag->bonded = false;
+ lag->peer_netdev = NULL;
+ lag->upper_netdev = NULL;
+ lag->notif_block.notifier_call = NULL;
+
+ err = ice_register_lag_handler(lag);
+ if (err) {
+ dev_warn(dev, "INIT LAG: Failed to register event handler\n");
+ goto lag_error;
+ }
+
+ ice_display_lag_info(lag);
+
+ dev_dbg(dev, "INIT LAG complete\n");
+ return 0;
+
+lag_error:
+ kfree(lag);
+ pf->lag = NULL;
+ return err;
+}
+
+/**
+ * ice_deinit_lag - Clean up LAG
+ * @pf: PF struct
+ *
+ * Clean up kernel LAG info and free memory
+ * This function is meant to only be called on driver remove/shutdown
+ */
+void ice_deinit_lag(struct ice_pf *pf)
+{
+ struct ice_lag *lag;
+
+ lag = pf->lag;
+
+ if (!lag)
+ return;
+
+ if (lag->pf)
+ ice_unregister_lag_handler(lag);
+
+ if (lag->upper_netdev)
+ dev_put(lag->upper_netdev);
+
+ if (lag->peer_netdev)
+ dev_put(lag->peer_netdev);
+
+ kfree(lag);
+
+ pf->lag = NULL;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h
new file mode 100644
index 000000000000..c2e3688dd8fd
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lag.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+#ifndef _ICE_LAG_H_
+#define _ICE_LAG_H_
+
+#include <linux/netdevice.h>
+
+/* LAG roles for netdev */
+enum ice_lag_role {
+ ICE_LAG_NONE,
+ ICE_LAG_PRIMARY,
+ ICE_LAG_BACKUP,
+ ICE_LAG_UNSET
+};
+
+struct ice_pf;
+
+/* LAG info struct */
+struct ice_lag {
+ struct ice_pf *pf; /* backlink to PF struct */
+ struct net_device *netdev; /* this PF's netdev */
+ struct net_device *peer_netdev;
+ struct net_device *upper_netdev; /* upper bonding netdev */
+ struct notifier_block notif_block;
+ u8 bonded:1; /* currently bonded */
+ u8 master:1; /* this is a master */
+ u8 handler:1; /* did we register a rx_netdev_handler */
+ /* each thing blocking bonding will increment this value by one.
+ * If this value is zero, then bonding is allowed.
+ */
+ u16 dis_lag;
+ u8 role;
+};
+
+int ice_init_lag(struct ice_pf *pf);
+void ice_deinit_lag(struct ice_pf *pf);
+rx_handler_result_t ice_lag_nop_handler(struct sk_buff **pskb);
+
+/**
+ * ice_disable_lag - increment LAG disable count
+ * @lag: LAG struct
+ */
+static inline void ice_disable_lag(struct ice_lag *lag)
+{
+ /* If LAG this PF is not already disabled, disable it */
+ rtnl_lock();
+ if (!netdev_is_rx_handler_busy(lag->netdev)) {
+ if (!netdev_rx_handler_register(lag->netdev,
+ ice_lag_nop_handler,
+ NULL))
+ lag->handler = true;
+ }
+ rtnl_unlock();
+ lag->dis_lag++;
+}
+
+/**
+ * ice_enable_lag - decrement disable count for a PF
+ * @lag: LAG struct
+ *
+ * Decrement the disable counter for a port, and if that count reaches
+ * zero, then remove the no-op Rx handler from that netdev
+ */
+static inline void ice_enable_lag(struct ice_lag *lag)
+{
+ if (lag->dis_lag)
+ lag->dis_lag--;
+ if (!lag->dis_lag && lag->handler) {
+ rtnl_lock();
+ netdev_rx_handler_unregister(lag->netdev);
+ rtnl_unlock();
+ lag->handler = false;
+ }
+}
+
+/**
+ * ice_is_lag_dis - is LAG disabled
+ * @lag: LAG struct
+ *
+ * Return true if bonding is disabled
+ */
+static inline bool ice_is_lag_dis(struct ice_lag *lag)
+{
+ return !!(lag->dis_lag);
+}
+#endif /* _ICE_LAG_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index ad9c22a1b97a..8d4e2ad4328d 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2078,7 +2078,7 @@ err_out:
static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
{
- struct ice_dcbx_cfg *cfg = &vsi->port_info->local_dcbx_cfg;
+ struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg);
vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg);
@@ -2145,11 +2145,18 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
dev = ice_pf_to_dev(pf);
eth_fltr = create ? ice_fltr_add_eth : ice_fltr_remove_eth;
- if (tx)
+ if (tx) {
status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_TX,
ICE_DROP_PACKET);
- else
- status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_RX, ICE_FWD_TO_VSI);
+ } else {
+ if (ice_fw_supports_lldp_fltr_ctrl(&pf->hw)) {
+ status = ice_lldp_fltr_add_remove(&pf->hw, vsi->vsi_num,
+ create);
+ } else {
+ status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_RX,
+ ICE_FWD_TO_VSI);
+ }
+ }
if (status)
dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n",
@@ -2158,6 +2165,126 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
}
/**
+ * ice_set_agg_vsi - sets up scheduler aggregator node and move VSI into it
+ * @vsi: pointer to the VSI
+ *
+ * This function will allocate new scheduler aggregator now if needed and will
+ * move specified VSI into it.
+ */
+static void ice_set_agg_vsi(struct ice_vsi *vsi)
+{
+ struct device *dev = ice_pf_to_dev(vsi->back);
+ struct ice_agg_node *agg_node_iter = NULL;
+ u32 agg_id = ICE_INVALID_AGG_NODE_ID;
+ struct ice_agg_node *agg_node = NULL;
+ int node_offset, max_agg_nodes = 0;
+ struct ice_port_info *port_info;
+ struct ice_pf *pf = vsi->back;
+ u32 agg_node_id_start = 0;
+ enum ice_status status;
+
+ /* create (as needed) scheduler aggregator node and move VSI into
+ * corresponding aggregator node
+ * - PF aggregator node to contains VSIs of type _PF and _CTRL
+ * - VF aggregator nodes will contain VF VSI
+ */
+ port_info = pf->hw.port_info;
+ if (!port_info)
+ return;
+
+ switch (vsi->type) {
+ case ICE_VSI_CTRL:
+ case ICE_VSI_LB:
+ case ICE_VSI_PF:
+ max_agg_nodes = ICE_MAX_PF_AGG_NODES;
+ agg_node_id_start = ICE_PF_AGG_NODE_ID_START;
+ agg_node_iter = &pf->pf_agg_node[0];
+ break;
+ case ICE_VSI_VF:
+ /* user can create 'n' VFs on a given PF, but since max children
+ * per aggregator node can be only 64. Following code handles
+ * aggregator(s) for VF VSIs, either selects a agg_node which
+ * was already created provided num_vsis < 64, otherwise
+ * select next available node, which will be created
+ */
+ max_agg_nodes = ICE_MAX_VF_AGG_NODES;
+ agg_node_id_start = ICE_VF_AGG_NODE_ID_START;
+ agg_node_iter = &pf->vf_agg_node[0];
+ break;
+ default:
+ /* other VSI type, handle later if needed */
+ dev_dbg(dev, "unexpected VSI type %s\n",
+ ice_vsi_type_str(vsi->type));
+ return;
+ }
+
+ /* find the appropriate aggregator node */
+ for (node_offset = 0; node_offset < max_agg_nodes; node_offset++) {
+ /* see if we can find space in previously created
+ * node if num_vsis < 64, otherwise skip
+ */
+ if (agg_node_iter->num_vsis &&
+ agg_node_iter->num_vsis == ICE_MAX_VSIS_IN_AGG_NODE) {
+ agg_node_iter++;
+ continue;
+ }
+
+ if (agg_node_iter->valid &&
+ agg_node_iter->agg_id != ICE_INVALID_AGG_NODE_ID) {
+ agg_id = agg_node_iter->agg_id;
+ agg_node = agg_node_iter;
+ break;
+ }
+
+ /* find unclaimed agg_id */
+ if (agg_node_iter->agg_id == ICE_INVALID_AGG_NODE_ID) {
+ agg_id = node_offset + agg_node_id_start;
+ agg_node = agg_node_iter;
+ break;
+ }
+ /* move to next agg_node */
+ agg_node_iter++;
+ }
+
+ if (!agg_node)
+ return;
+
+ /* if selected aggregator node was not created, create it */
+ if (!agg_node->valid) {
+ status = ice_cfg_agg(port_info, agg_id, ICE_AGG_TYPE_AGG,
+ (u8)vsi->tc_cfg.ena_tc);
+ if (status) {
+ dev_err(dev, "unable to create aggregator node with agg_id %u\n",
+ agg_id);
+ return;
+ }
+ /* aggregator node is created, store the neeeded info */
+ agg_node->valid = true;
+ agg_node->agg_id = agg_id;
+ }
+
+ /* move VSI to corresponding aggregator node */
+ status = ice_move_vsi_to_agg(port_info, agg_id, vsi->idx,
+ (u8)vsi->tc_cfg.ena_tc);
+ if (status) {
+ dev_err(dev, "unable to move VSI idx %u into aggregator %u node",
+ vsi->idx, agg_id);
+ return;
+ }
+
+ /* keep active children count for aggregator node */
+ agg_node->num_vsis++;
+
+ /* cache the 'agg_id' in VSI, so that after reset - VSI will be moved
+ * to aggregator node
+ */
+ vsi->agg_node = agg_node;
+ dev_dbg(dev, "successfully moved VSI idx %u tc_bitmap 0x%x) into aggregator node %d which has num_vsis %u\n",
+ vsi->idx, vsi->tc_cfg.ena_tc, vsi->agg_node->agg_id,
+ vsi->agg_node->num_vsis);
+}
+
+/**
* ice_vsi_setup - Set up a VSI by a given type
* @pf: board private structure
* @pi: pointer to the port_info instance
@@ -2327,6 +2454,8 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
ice_cfg_sw_lldp(vsi, true, true);
}
+ if (!vsi->agg_node)
+ ice_set_agg_vsi(vsi);
return vsi;
unroll_clear_rings:
@@ -2342,6 +2471,8 @@ unroll_vsi_init:
unroll_get_qs:
ice_vsi_put_qs(vsi);
unroll_vsi_alloc:
+ if (vsi_type == ICE_VSI_VF)
+ ice_enable_lag(pf->lag);
ice_vsi_clear(vsi);
return NULL;
@@ -2669,6 +2800,9 @@ int ice_vsi_release(struct ice_vsi *vsi)
vsi->netdev = NULL;
}
+ if (vsi->type == ICE_VSI_VF &&
+ vsi->agg_node && vsi->agg_node->valid)
+ vsi->agg_node->num_vsis--;
ice_vsi_clear_rings(vsi);
ice_vsi_put_qs(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 98cd44a3ccf7..813ec6b8ac23 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -44,6 +44,11 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);
static void ice_vsi_release_all(struct ice_pf *pf);
+bool netif_is_ice(struct net_device *dev)
+{
+ return dev && (dev->netdev_ops == &ice_netdev_ops);
+}
+
/**
* ice_get_tx_pending - returns number of Tx descriptors not processed
* @ring: the ring of descriptors
@@ -430,11 +435,19 @@ static void ice_sync_fltr_subtask(struct ice_pf *pf)
*/
static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
{
+ int node;
int v;
ice_for_each_vsi(pf, v)
if (pf->vsi[v])
ice_dis_vsi(pf->vsi[v], locked);
+
+ for (node = 0; node < ICE_MAX_PF_AGG_NODES; node++)
+ pf->pf_agg_node[node].num_vsis = 0;
+
+ for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++)
+ pf->vf_agg_node[node].num_vsis = 0;
+
}
/**
@@ -2476,6 +2489,22 @@ free_qmap:
}
/**
+ * ice_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI
+ * @vsi: VSI to schedule napi on
+ */
+static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi)
+{
+ int i;
+
+ ice_for_each_rxq(vsi, i) {
+ struct ice_ring *rx_ring = vsi->rx_rings[i];
+
+ if (rx_ring->xsk_pool)
+ napi_schedule(&rx_ring->q_vector->napi);
+ }
+}
+
+/**
* ice_xdp_setup_prog - Add or remove XDP eBPF program
* @vsi: VSI to setup XDP for
* @prog: XDP program
@@ -2519,16 +2548,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
if (if_running)
ret = ice_up(vsi);
- if (!ret && prog && vsi->xsk_pools) {
- int i;
-
- ice_for_each_rxq(vsi, i) {
- struct ice_ring *rx_ring = vsi->rx_rings[i];
-
- if (rx_ring->xsk_pool)
- napi_schedule(&rx_ring->q_vector->napi);
- }
- }
+ if (!ret && prog)
+ ice_vsi_rx_napi_schedule(vsi);
return (ret || xdp_ring_err) ? -ENOMEM : 0;
}
@@ -3370,28 +3391,20 @@ static int ice_init_pf(struct ice_pf *pf)
*/
static int ice_ena_msix_range(struct ice_pf *pf)
{
+ int v_left, v_actual, v_other, v_budget = 0;
struct device *dev = ice_pf_to_dev(pf);
- int v_left, v_actual, v_budget = 0;
int needed, err, i;
v_left = pf->hw.func_caps.common_cap.num_msix_vectors;
- /* reserve one vector for miscellaneous handler */
- needed = 1;
- if (v_left < needed)
- goto no_hw_vecs_left_err;
- v_budget += needed;
- v_left -= needed;
-
- /* reserve vectors for LAN traffic */
- needed = min_t(int, num_online_cpus(), v_left);
+ /* reserve for LAN miscellaneous handler */
+ needed = ICE_MIN_LAN_OICR_MSIX;
if (v_left < needed)
goto no_hw_vecs_left_err;
- pf->num_lan_msix = needed;
v_budget += needed;
v_left -= needed;
- /* reserve one vector for flow director */
+ /* reserve for flow director */
if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
needed = ICE_FDIR_MSIX;
if (v_left < needed)
@@ -3400,9 +3413,19 @@ static int ice_ena_msix_range(struct ice_pf *pf)
v_left -= needed;
}
+ /* total used for non-traffic vectors */
+ v_other = v_budget;
+
+ /* reserve vectors for LAN traffic */
+ needed = min_t(int, num_online_cpus(), v_left);
+ if (v_left < needed)
+ goto no_hw_vecs_left_err;
+ pf->num_lan_msix = needed;
+ v_budget += needed;
+ v_left -= needed;
+
pf->msix_entries = devm_kcalloc(dev, v_budget,
sizeof(*pf->msix_entries), GFP_KERNEL);
-
if (!pf->msix_entries) {
err = -ENOMEM;
goto exit_err;
@@ -3414,7 +3437,6 @@ static int ice_ena_msix_range(struct ice_pf *pf)
/* actually reserve the vectors */
v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries,
ICE_MIN_MSIX, v_budget);
-
if (v_actual < 0) {
dev_err(dev, "unable to reserve MSI-X vectors\n");
err = v_actual;
@@ -3431,7 +3453,16 @@ static int ice_ena_msix_range(struct ice_pf *pf)
err = -ERANGE;
goto msix_err;
} else {
- pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX;
+ int v_traffic = v_actual - v_other;
+
+ if (v_actual == ICE_MIN_MSIX ||
+ v_traffic < ICE_MIN_LAN_TXRX_MSIX)
+ pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX;
+ else
+ pf->num_lan_msix = v_traffic;
+
+ dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n",
+ pf->num_lan_msix);
}
}
@@ -4227,6 +4258,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
ice_cfg_lldp_mib_change(&pf->hw, true);
}
+ if (ice_init_lag(pf))
+ dev_warn(dev, "Failed to init link aggregation support\n");
+
/* print PCI link speed and width */
pcie_print_link_status(pf->pdev);
@@ -4349,6 +4383,7 @@ static void ice_remove(struct pci_dev *pdev)
ice_aq_cancel_waiting_tasks(pf);
mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
+ ice_deinit_lag(pf);
if (!ice_is_safe_mode(pf))
ice_remove_arfs(pf);
ice_setup_mc_magic_wake(pf);
@@ -6152,7 +6187,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
err = ice_down(vsi);
if (err) {
- netdev_err(netdev, "change MTU if_up err %d\n", err);
+ netdev_err(netdev, "change MTU if_down err %d\n", err);
return err;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index f0912e44d4ad..2403cb38b93c 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -431,6 +431,27 @@ ice_aq_cfg_sched_elems(struct ice_hw *hw, u16 elems_req,
}
/**
+ * ice_aq_move_sched_elems - move scheduler elements
+ * @hw: pointer to the HW struct
+ * @grps_req: number of groups to move
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @grps_movd: returns total number of groups moved
+ * @cd: pointer to command details structure or NULL
+ *
+ * Move scheduling elements (0x0408)
+ */
+static enum ice_status
+ice_aq_move_sched_elems(struct ice_hw *hw, u16 grps_req,
+ struct ice_aqc_move_elem *buf, u16 buf_size,
+ u16 *grps_movd, struct ice_sq_cd *cd)
+{
+ return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_move_sched_elems,
+ grps_req, (void *)buf, buf_size,
+ grps_movd, cd);
+}
+
+/**
* ice_aq_suspend_sched_elems - suspend scheduler elements
* @hw: pointer to the HW struct
* @elems_req: number of elements to suspend
@@ -1022,6 +1043,28 @@ static u8 ice_sched_get_vsi_layer(struct ice_hw *hw)
}
/**
+ * ice_sched_get_agg_layer - get the current aggregator layer number
+ * @hw: pointer to the HW struct
+ *
+ * This function returns the current aggregator layer number
+ */
+static u8 ice_sched_get_agg_layer(struct ice_hw *hw)
+{
+ /* Num Layers aggregator layer
+ * 9 4
+ * 7 or less sw_entry_point_layer
+ */
+ /* calculate the aggregator layer based on number of layers. */
+ if (hw->num_tx_sched_layers > ICE_AGG_LAYER_OFFSET + 1) {
+ u8 layer = hw->num_tx_sched_layers - ICE_AGG_LAYER_OFFSET;
+
+ if (layer > hw->sw_entry_point_layer)
+ return layer;
+ }
+ return hw->sw_entry_point_layer;
+}
+
+/**
* ice_rm_dflt_leaf_node - remove the default leaf node in the tree
* @pi: port information structure
*
@@ -1239,6 +1282,46 @@ sched_query_out:
}
/**
+ * ice_sched_get_psm_clk_freq - determine the PSM clock frequency
+ * @hw: pointer to the HW struct
+ *
+ * Determine the PSM clock frequency and store in HW struct
+ */
+void ice_sched_get_psm_clk_freq(struct ice_hw *hw)
+{
+ u32 val, clk_src;
+
+ val = rd32(hw, GLGEN_CLKSTAT_SRC);
+ clk_src = (val & GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M) >>
+ GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_S;
+
+#define PSM_CLK_SRC_367_MHZ 0x0
+#define PSM_CLK_SRC_416_MHZ 0x1
+#define PSM_CLK_SRC_446_MHZ 0x2
+#define PSM_CLK_SRC_390_MHZ 0x3
+
+ switch (clk_src) {
+ case PSM_CLK_SRC_367_MHZ:
+ hw->psm_clk_freq = ICE_PSM_CLK_367MHZ_IN_HZ;
+ break;
+ case PSM_CLK_SRC_416_MHZ:
+ hw->psm_clk_freq = ICE_PSM_CLK_416MHZ_IN_HZ;
+ break;
+ case PSM_CLK_SRC_446_MHZ:
+ hw->psm_clk_freq = ICE_PSM_CLK_446MHZ_IN_HZ;
+ break;
+ case PSM_CLK_SRC_390_MHZ:
+ hw->psm_clk_freq = ICE_PSM_CLK_390MHZ_IN_HZ;
+ break;
+ default:
+ ice_debug(hw, ICE_DBG_SCHED, "PSM clk_src unexpected %u\n",
+ clk_src);
+ /* fall back to a safe default */
+ hw->psm_clk_freq = ICE_PSM_CLK_446MHZ_IN_HZ;
+ }
+}
+
+/**
* ice_sched_find_node_in_subtree - Find node in part of base node subtree
* @hw: pointer to the HW struct
* @base: pointer to the base node
@@ -1364,7 +1447,7 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
/**
* ice_sched_get_vsi_node - Get a VSI node based on VSI ID
- * @hw: pointer to the HW struct
+ * @pi: pointer to the port information structure
* @tc_node: pointer to the TC node
* @vsi_handle: software VSI handle
*
@@ -1372,14 +1455,14 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
* TC branch
*/
static struct ice_sched_node *
-ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
+ice_sched_get_vsi_node(struct ice_port_info *pi, struct ice_sched_node *tc_node,
u16 vsi_handle)
{
struct ice_sched_node *node;
u8 vsi_layer;
- vsi_layer = ice_sched_get_vsi_layer(hw);
- node = ice_sched_get_first_node(hw->port_info, tc_node, vsi_layer);
+ vsi_layer = ice_sched_get_vsi_layer(pi->hw);
+ node = ice_sched_get_first_node(pi, tc_node, vsi_layer);
/* Check whether it already exists */
while (node) {
@@ -1392,6 +1475,38 @@ ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
}
/**
+ * ice_sched_get_agg_node - Get an aggregator node based on aggregator ID
+ * @pi: pointer to the port information structure
+ * @tc_node: pointer to the TC node
+ * @agg_id: aggregator ID
+ *
+ * This function retrieves an aggregator node for a given aggregator ID from
+ * a given TC branch
+ */
+static struct ice_sched_node *
+ice_sched_get_agg_node(struct ice_port_info *pi, struct ice_sched_node *tc_node,
+ u32 agg_id)
+{
+ struct ice_sched_node *node;
+ struct ice_hw *hw = pi->hw;
+ u8 agg_layer;
+
+ if (!hw)
+ return NULL;
+ agg_layer = ice_sched_get_agg_layer(hw);
+ node = ice_sched_get_first_node(pi, tc_node, agg_layer);
+
+ /* Check whether it already exists */
+ while (node) {
+ if (node->agg_id == agg_id)
+ return node;
+ node = node->sibling;
+ }
+
+ return node;
+}
+
+/**
* ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
* @hw: pointer to the HW struct
* @num_qs: number of queues
@@ -1444,7 +1559,7 @@ ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
qgl = ice_sched_get_qgrp_layer(hw);
vsil = ice_sched_get_vsi_layer(hw);
- parent = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
+ parent = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
for (i = vsil + 1; i <= qgl; i++) {
if (!parent)
return ICE_ERR_CFG;
@@ -1477,7 +1592,7 @@ ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
/**
* ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
- * @hw: pointer to the HW struct
+ * @pi: pointer to the port info structure
* @tc_node: pointer to TC node
* @num_nodes: pointer to num nodes array
*
@@ -1486,15 +1601,15 @@ ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
* layers
*/
static void
-ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
+ice_sched_calc_vsi_support_nodes(struct ice_port_info *pi,
struct ice_sched_node *tc_node, u16 *num_nodes)
{
struct ice_sched_node *node;
u8 vsil;
int i;
- vsil = ice_sched_get_vsi_layer(hw);
- for (i = vsil; i >= hw->sw_entry_point_layer; i--)
+ vsil = ice_sched_get_vsi_layer(pi->hw);
+ for (i = vsil; i >= pi->hw->sw_entry_point_layer; i--)
/* Add intermediate nodes if TC has no children and
* need at least one node for VSI
*/
@@ -1504,11 +1619,10 @@ ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
/* If intermediate nodes are reached max children
* then add a new one.
*/
- node = ice_sched_get_first_node(hw->port_info, tc_node,
- (u8)i);
+ node = ice_sched_get_first_node(pi, tc_node, (u8)i);
/* scan all the siblings */
while (node) {
- if (node->num_children < hw->max_children[i])
+ if (node->num_children < pi->hw->max_children[i])
break;
node = node->sibling;
}
@@ -1588,14 +1702,13 @@ ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
{
u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
struct ice_sched_node *tc_node;
- struct ice_hw *hw = pi->hw;
tc_node = ice_sched_get_tc_node(pi, tc);
if (!tc_node)
return ICE_ERR_PARAM;
/* calculate number of supported nodes needed for this VSI */
- ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes);
+ ice_sched_calc_vsi_support_nodes(pi, tc_node, num_nodes);
/* add VSI supported nodes to TC subtree */
return ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
@@ -1628,7 +1741,7 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
if (!tc_node)
return ICE_ERR_CFG;
- vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
if (!vsi_node)
return ICE_ERR_CFG;
@@ -1691,7 +1804,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
if (!vsi_ctx)
return ICE_ERR_PARAM;
- vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
/* suspend the VSI if TC is not enabled */
if (!enable) {
@@ -1712,7 +1825,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
if (status)
return status;
- vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
if (!vsi_node)
return ICE_ERR_CFG;
@@ -1821,7 +1934,7 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
if (!tc_node)
continue;
- vsi_node = ice_sched_get_vsi_node(pi->hw, tc_node, vsi_handle);
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
if (!vsi_node)
continue;
@@ -1874,6 +1987,720 @@ enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
}
/**
+ * ice_get_agg_info - get the aggregator ID
+ * @hw: pointer to the hardware structure
+ * @agg_id: aggregator ID
+ *
+ * This function validates aggregator ID. The function returns info if
+ * aggregator ID is present in list otherwise it returns null.
+ */
+static struct ice_sched_agg_info *
+ice_get_agg_info(struct ice_hw *hw, u32 agg_id)
+{
+ struct ice_sched_agg_info *agg_info;
+
+ list_for_each_entry(agg_info, &hw->agg_list, list_entry)
+ if (agg_info->agg_id == agg_id)
+ return agg_info;
+
+ return NULL;
+}
+
+/**
+ * ice_sched_get_free_vsi_parent - Find a free parent node in aggregator subtree
+ * @hw: pointer to the HW struct
+ * @node: pointer to a child node
+ * @num_nodes: num nodes count array
+ *
+ * This function walks through the aggregator subtree to find a free parent
+ * node
+ */
+static struct ice_sched_node *
+ice_sched_get_free_vsi_parent(struct ice_hw *hw, struct ice_sched_node *node,
+ u16 *num_nodes)
+{
+ u8 l = node->tx_sched_layer;
+ u8 vsil, i;
+
+ vsil = ice_sched_get_vsi_layer(hw);
+
+ /* Is it VSI parent layer ? */
+ if (l == vsil - 1)
+ return (node->num_children < hw->max_children[l]) ? node : NULL;
+
+ /* We have intermediate nodes. Let's walk through the subtree. If the
+ * intermediate node has space to add a new node then clear the count
+ */
+ if (node->num_children < hw->max_children[l])
+ num_nodes[l] = 0;
+ /* The below recursive call is intentional and wouldn't go more than
+ * 2 or 3 iterations.
+ */
+
+ for (i = 0; i < node->num_children; i++) {
+ struct ice_sched_node *parent;
+
+ parent = ice_sched_get_free_vsi_parent(hw, node->children[i],
+ num_nodes);
+ if (parent)
+ return parent;
+ }
+
+ return NULL;
+}
+
+/**
+ * ice_sched_update_parent - update the new parent in SW DB
+ * @new_parent: pointer to a new parent node
+ * @node: pointer to a child node
+ *
+ * This function removes the child from the old parent and adds it to a new
+ * parent
+ */
+static void
+ice_sched_update_parent(struct ice_sched_node *new_parent,
+ struct ice_sched_node *node)
+{
+ struct ice_sched_node *old_parent;
+ u8 i, j;
+
+ old_parent = node->parent;
+
+ /* update the old parent children */
+ for (i = 0; i < old_parent->num_children; i++)
+ if (old_parent->children[i] == node) {
+ for (j = i + 1; j < old_parent->num_children; j++)
+ old_parent->children[j - 1] =
+ old_parent->children[j];
+ old_parent->num_children--;
+ break;
+ }
+
+ /* now move the node to a new parent */
+ new_parent->children[new_parent->num_children++] = node;
+ node->parent = new_parent;
+ node->info.parent_teid = new_parent->info.node_teid;
+}
+
+/**
+ * ice_sched_move_nodes - move child nodes to a given parent
+ * @pi: port information structure
+ * @parent: pointer to parent node
+ * @num_items: number of child nodes to be moved
+ * @list: pointer to child node teids
+ *
+ * This function move the child nodes to a given parent.
+ */
+static enum ice_status
+ice_sched_move_nodes(struct ice_port_info *pi, struct ice_sched_node *parent,
+ u16 num_items, u32 *list)
+{
+ struct ice_aqc_move_elem *buf;
+ struct ice_sched_node *node;
+ enum ice_status status = 0;
+ u16 i, grps_movd = 0;
+ struct ice_hw *hw;
+ u16 buf_len;
+
+ hw = pi->hw;
+
+ if (!parent || !num_items)
+ return ICE_ERR_PARAM;
+
+ /* Does parent have enough space */
+ if (parent->num_children + num_items >
+ hw->max_children[parent->tx_sched_layer])
+ return ICE_ERR_AQ_FULL;
+
+ buf_len = struct_size(buf, teid, 1);
+ buf = kzalloc(buf_len, GFP_KERNEL);
+ if (!buf)
+ return ICE_ERR_NO_MEMORY;
+
+ for (i = 0; i < num_items; i++) {
+ node = ice_sched_find_node_by_teid(pi->root, list[i]);
+ if (!node) {
+ status = ICE_ERR_PARAM;
+ goto move_err_exit;
+ }
+
+ buf->hdr.src_parent_teid = node->info.parent_teid;
+ buf->hdr.dest_parent_teid = parent->info.node_teid;
+ buf->teid[0] = node->info.node_teid;
+ buf->hdr.num_elems = cpu_to_le16(1);
+ status = ice_aq_move_sched_elems(hw, 1, buf, buf_len,
+ &grps_movd, NULL);
+ if (status && grps_movd != 1) {
+ status = ICE_ERR_CFG;
+ goto move_err_exit;
+ }
+
+ /* update the SW DB */
+ ice_sched_update_parent(parent, node);
+ }
+
+move_err_exit:
+ kfree(buf);
+ return status;
+}
+
+/**
+ * ice_sched_move_vsi_to_agg - move VSI to aggregator node
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @agg_id: aggregator ID
+ * @tc: TC number
+ *
+ * This function moves a VSI to an aggregator node or its subtree.
+ * Intermediate nodes may be created if required.
+ */
+static enum ice_status
+ice_sched_move_vsi_to_agg(struct ice_port_info *pi, u16 vsi_handle, u32 agg_id,
+ u8 tc)
+{
+ struct ice_sched_node *vsi_node, *agg_node, *tc_node, *parent;
+ u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+ u32 first_node_teid, vsi_teid;
+ enum ice_status status;
+ u16 num_nodes_added;
+ u8 aggl, vsil, i;
+
+ tc_node = ice_sched_get_tc_node(pi, tc);
+ if (!tc_node)
+ return ICE_ERR_CFG;
+
+ agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id);
+ if (!agg_node)
+ return ICE_ERR_DOES_NOT_EXIST;
+
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
+ if (!vsi_node)
+ return ICE_ERR_DOES_NOT_EXIST;
+
+ /* Is this VSI already part of given aggregator? */
+ if (ice_sched_find_node_in_subtree(pi->hw, agg_node, vsi_node))
+ return 0;
+
+ aggl = ice_sched_get_agg_layer(pi->hw);
+ vsil = ice_sched_get_vsi_layer(pi->hw);
+
+ /* set intermediate node count to 1 between aggregator and VSI layers */
+ for (i = aggl + 1; i < vsil; i++)
+ num_nodes[i] = 1;
+
+ /* Check if the aggregator subtree has any free node to add the VSI */
+ for (i = 0; i < agg_node->num_children; i++) {
+ parent = ice_sched_get_free_vsi_parent(pi->hw,
+ agg_node->children[i],
+ num_nodes);
+ if (parent)
+ goto move_nodes;
+ }
+
+ /* add new nodes */
+ parent = agg_node;
+ for (i = aggl + 1; i < vsil; i++) {
+ status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
+ num_nodes[i],
+ &first_node_teid,
+ &num_nodes_added);
+ if (status || num_nodes[i] != num_nodes_added)
+ return ICE_ERR_CFG;
+
+ /* The newly added node can be a new parent for the next
+ * layer nodes
+ */
+ if (num_nodes_added)
+ parent = ice_sched_find_node_by_teid(tc_node,
+ first_node_teid);
+ else
+ parent = parent->children[0];
+
+ if (!parent)
+ return ICE_ERR_CFG;
+ }
+
+move_nodes:
+ vsi_teid = le32_to_cpu(vsi_node->info.node_teid);
+ return ice_sched_move_nodes(pi, parent, 1, &vsi_teid);
+}
+
+/**
+ * ice_move_all_vsi_to_dflt_agg - move all VSI(s) to default aggregator
+ * @pi: port information structure
+ * @agg_info: aggregator info
+ * @tc: traffic class number
+ * @rm_vsi_info: true or false
+ *
+ * This function move all the VSI(s) to the default aggregator and delete
+ * aggregator VSI info based on passed in boolean parameter rm_vsi_info. The
+ * caller holds the scheduler lock.
+ */
+static enum ice_status
+ice_move_all_vsi_to_dflt_agg(struct ice_port_info *pi,
+ struct ice_sched_agg_info *agg_info, u8 tc,
+ bool rm_vsi_info)
+{
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+ struct ice_sched_agg_vsi_info *tmp;
+ enum ice_status status = 0;
+
+ list_for_each_entry_safe(agg_vsi_info, tmp, &agg_info->agg_vsi_list,
+ list_entry) {
+ u16 vsi_handle = agg_vsi_info->vsi_handle;
+
+ /* Move VSI to default aggregator */
+ if (!ice_is_tc_ena(agg_vsi_info->tc_bitmap[0], tc))
+ continue;
+
+ status = ice_sched_move_vsi_to_agg(pi, vsi_handle,
+ ICE_DFLT_AGG_ID, tc);
+ if (status)
+ break;
+
+ clear_bit(tc, agg_vsi_info->tc_bitmap);
+ if (rm_vsi_info && !agg_vsi_info->tc_bitmap[0]) {
+ list_del(&agg_vsi_info->list_entry);
+ devm_kfree(ice_hw_to_dev(pi->hw), agg_vsi_info);
+ }
+ }
+
+ return status;
+}
+
+/**
+ * ice_sched_is_agg_inuse - check whether the aggregator is in use or not
+ * @pi: port information structure
+ * @node: node pointer
+ *
+ * This function checks whether the aggregator is attached with any VSI or not.
+ */
+static bool
+ice_sched_is_agg_inuse(struct ice_port_info *pi, struct ice_sched_node *node)
+{
+ u8 vsil, i;
+
+ vsil = ice_sched_get_vsi_layer(pi->hw);
+ if (node->tx_sched_layer < vsil - 1) {
+ for (i = 0; i < node->num_children; i++)
+ if (ice_sched_is_agg_inuse(pi, node->children[i]))
+ return true;
+ return false;
+ } else {
+ return node->num_children ? true : false;
+ }
+}
+
+/**
+ * ice_sched_rm_agg_cfg - remove the aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @tc: TC number
+ *
+ * This function removes the aggregator node and intermediate nodes if any
+ * from the given TC
+ */
+static enum ice_status
+ice_sched_rm_agg_cfg(struct ice_port_info *pi, u32 agg_id, u8 tc)
+{
+ struct ice_sched_node *tc_node, *agg_node;
+ struct ice_hw *hw = pi->hw;
+
+ tc_node = ice_sched_get_tc_node(pi, tc);
+ if (!tc_node)
+ return ICE_ERR_CFG;
+
+ agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id);
+ if (!agg_node)
+ return ICE_ERR_DOES_NOT_EXIST;
+
+ /* Can't remove the aggregator node if it has children */
+ if (ice_sched_is_agg_inuse(pi, agg_node))
+ return ICE_ERR_IN_USE;
+
+ /* need to remove the whole subtree if aggregator node is the
+ * only child.
+ */
+ while (agg_node->tx_sched_layer > hw->sw_entry_point_layer) {
+ struct ice_sched_node *parent = agg_node->parent;
+
+ if (!parent)
+ return ICE_ERR_CFG;
+
+ if (parent->num_children > 1)
+ break;
+
+ agg_node = parent;
+ }
+
+ ice_free_sched_node(pi, agg_node);
+ return 0;
+}
+
+/**
+ * ice_rm_agg_cfg_tc - remove aggregator configuration for TC
+ * @pi: port information structure
+ * @agg_info: aggregator ID
+ * @tc: TC number
+ * @rm_vsi_info: bool value true or false
+ *
+ * This function removes aggregator reference to VSI of given TC. It removes
+ * the aggregator configuration completely for requested TC. The caller needs
+ * to hold the scheduler lock.
+ */
+static enum ice_status
+ice_rm_agg_cfg_tc(struct ice_port_info *pi, struct ice_sched_agg_info *agg_info,
+ u8 tc, bool rm_vsi_info)
+{
+ enum ice_status status = 0;
+
+ /* If nothing to remove - return success */
+ if (!ice_is_tc_ena(agg_info->tc_bitmap[0], tc))
+ goto exit_rm_agg_cfg_tc;
+
+ status = ice_move_all_vsi_to_dflt_agg(pi, agg_info, tc, rm_vsi_info);
+ if (status)
+ goto exit_rm_agg_cfg_tc;
+
+ /* Delete aggregator node(s) */
+ status = ice_sched_rm_agg_cfg(pi, agg_info->agg_id, tc);
+ if (status)
+ goto exit_rm_agg_cfg_tc;
+
+ clear_bit(tc, agg_info->tc_bitmap);
+exit_rm_agg_cfg_tc:
+ return status;
+}
+
+/**
+ * ice_save_agg_tc_bitmap - save aggregator TC bitmap
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @tc_bitmap: 8 bits TC bitmap
+ *
+ * Save aggregator TC bitmap. This function needs to be called with scheduler
+ * lock held.
+ */
+static enum ice_status
+ice_save_agg_tc_bitmap(struct ice_port_info *pi, u32 agg_id,
+ unsigned long *tc_bitmap)
+{
+ struct ice_sched_agg_info *agg_info;
+
+ agg_info = ice_get_agg_info(pi->hw, agg_id);
+ if (!agg_info)
+ return ICE_ERR_PARAM;
+ bitmap_copy(agg_info->replay_tc_bitmap, tc_bitmap,
+ ICE_MAX_TRAFFIC_CLASS);
+ return 0;
+}
+
+/**
+ * ice_sched_add_agg_cfg - create an aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @tc: TC number
+ *
+ * This function creates an aggregator node and intermediate nodes if required
+ * for the given TC
+ */
+static enum ice_status
+ice_sched_add_agg_cfg(struct ice_port_info *pi, u32 agg_id, u8 tc)
+{
+ struct ice_sched_node *parent, *agg_node, *tc_node;
+ u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+ enum ice_status status = 0;
+ struct ice_hw *hw = pi->hw;
+ u32 first_node_teid;
+ u16 num_nodes_added;
+ u8 i, aggl;
+
+ tc_node = ice_sched_get_tc_node(pi, tc);
+ if (!tc_node)
+ return ICE_ERR_CFG;
+
+ agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id);
+ /* Does Agg node already exist ? */
+ if (agg_node)
+ return status;
+
+ aggl = ice_sched_get_agg_layer(hw);
+
+ /* need one node in Agg layer */
+ num_nodes[aggl] = 1;
+
+ /* Check whether the intermediate nodes have space to add the
+ * new aggregator. If they are full, then SW needs to allocate a new
+ * intermediate node on those layers
+ */
+ for (i = hw->sw_entry_point_layer; i < aggl; i++) {
+ parent = ice_sched_get_first_node(pi, tc_node, i);
+
+ /* scan all the siblings */
+ while (parent) {
+ if (parent->num_children < hw->max_children[i])
+ break;
+ parent = parent->sibling;
+ }
+
+ /* all the nodes are full, reserve one for this layer */
+ if (!parent)
+ num_nodes[i]++;
+ }
+
+ /* add the aggregator node */
+ parent = tc_node;
+ for (i = hw->sw_entry_point_layer; i <= aggl; i++) {
+ if (!parent)
+ return ICE_ERR_CFG;
+
+ status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
+ num_nodes[i],
+ &first_node_teid,
+ &num_nodes_added);
+ if (status || num_nodes[i] != num_nodes_added)
+ return ICE_ERR_CFG;
+
+ /* The newly added node can be a new parent for the next
+ * layer nodes
+ */
+ if (num_nodes_added) {
+ parent = ice_sched_find_node_by_teid(tc_node,
+ first_node_teid);
+ /* register aggregator ID with the aggregator node */
+ if (parent && i == aggl)
+ parent->agg_id = agg_id;
+ } else {
+ parent = parent->children[0];
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_sched_cfg_agg - configure aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @agg_type: aggregator type queue, VSI, or aggregator group
+ * @tc_bitmap: bits TC bitmap
+ *
+ * It registers a unique aggregator node into scheduler services. It
+ * allows a user to register with a unique ID to track it's resources.
+ * The aggregator type determines if this is a queue group, VSI group
+ * or aggregator group. It then creates the aggregator node(s) for requested
+ * TC(s) or removes an existing aggregator node including its configuration
+ * if indicated via tc_bitmap. Call ice_rm_agg_cfg to release aggregator
+ * resources and remove aggregator ID.
+ * This function needs to be called with scheduler lock held.
+ */
+static enum ice_status
+ice_sched_cfg_agg(struct ice_port_info *pi, u32 agg_id,
+ enum ice_agg_type agg_type, unsigned long *tc_bitmap)
+{
+ struct ice_sched_agg_info *agg_info;
+ enum ice_status status = 0;
+ struct ice_hw *hw = pi->hw;
+ u8 tc;
+
+ agg_info = ice_get_agg_info(hw, agg_id);
+ if (!agg_info) {
+ /* Create new entry for new aggregator ID */
+ agg_info = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*agg_info),
+ GFP_KERNEL);
+ if (!agg_info)
+ return ICE_ERR_NO_MEMORY;
+
+ agg_info->agg_id = agg_id;
+ agg_info->agg_type = agg_type;
+ agg_info->tc_bitmap[0] = 0;
+
+ /* Initialize the aggregator VSI list head */
+ INIT_LIST_HEAD(&agg_info->agg_vsi_list);
+
+ /* Add new entry in aggregator list */
+ list_add(&agg_info->list_entry, &hw->agg_list);
+ }
+ /* Create aggregator node(s) for requested TC(s) */
+ ice_for_each_traffic_class(tc) {
+ if (!ice_is_tc_ena(*tc_bitmap, tc)) {
+ /* Delete aggregator cfg TC if it exists previously */
+ status = ice_rm_agg_cfg_tc(pi, agg_info, tc, false);
+ if (status)
+ break;
+ continue;
+ }
+
+ /* Check if aggregator node for TC already exists */
+ if (ice_is_tc_ena(agg_info->tc_bitmap[0], tc))
+ continue;
+
+ /* Create new aggregator node for TC */
+ status = ice_sched_add_agg_cfg(pi, agg_id, tc);
+ if (status)
+ break;
+
+ /* Save aggregator node's TC information */
+ set_bit(tc, agg_info->tc_bitmap);
+ }
+
+ return status;
+}
+
+/**
+ * ice_cfg_agg - config aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @agg_type: aggregator type queue, VSI, or aggregator group
+ * @tc_bitmap: bits TC bitmap
+ *
+ * This function configures aggregator node(s).
+ */
+enum ice_status
+ice_cfg_agg(struct ice_port_info *pi, u32 agg_id, enum ice_agg_type agg_type,
+ u8 tc_bitmap)
+{
+ unsigned long bitmap = tc_bitmap;
+ enum ice_status status;
+
+ mutex_lock(&pi->sched_lock);
+ status = ice_sched_cfg_agg(pi, agg_id, agg_type,
+ (unsigned long *)&bitmap);
+ if (!status)
+ status = ice_save_agg_tc_bitmap(pi, agg_id,
+ (unsigned long *)&bitmap);
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
+
+/**
+ * ice_get_agg_vsi_info - get the aggregator ID
+ * @agg_info: aggregator info
+ * @vsi_handle: software VSI handle
+ *
+ * The function returns aggregator VSI info based on VSI handle. This function
+ * needs to be called with scheduler lock held.
+ */
+static struct ice_sched_agg_vsi_info *
+ice_get_agg_vsi_info(struct ice_sched_agg_info *agg_info, u16 vsi_handle)
+{
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+
+ list_for_each_entry(agg_vsi_info, &agg_info->agg_vsi_list, list_entry)
+ if (agg_vsi_info->vsi_handle == vsi_handle)
+ return agg_vsi_info;
+
+ return NULL;
+}
+
+/**
+ * ice_get_vsi_agg_info - get the aggregator info of VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: Sw VSI handle
+ *
+ * The function returns aggregator info of VSI represented via vsi_handle. The
+ * VSI has in this case a different aggregator than the default one. This
+ * function needs to be called with scheduler lock held.
+ */
+static struct ice_sched_agg_info *
+ice_get_vsi_agg_info(struct ice_hw *hw, u16 vsi_handle)
+{
+ struct ice_sched_agg_info *agg_info;
+
+ list_for_each_entry(agg_info, &hw->agg_list, list_entry) {
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+
+ agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+ if (agg_vsi_info)
+ return agg_info;
+ }
+ return NULL;
+}
+
+/**
+ * ice_save_agg_vsi_tc_bitmap - save aggregator VSI TC bitmap
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap of enabled TC(s)
+ *
+ * Save VSI to aggregator TC bitmap. This function needs to call with scheduler
+ * lock held.
+ */
+static enum ice_status
+ice_save_agg_vsi_tc_bitmap(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle,
+ unsigned long *tc_bitmap)
+{
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+ struct ice_sched_agg_info *agg_info;
+
+ agg_info = ice_get_agg_info(pi->hw, agg_id);
+ if (!agg_info)
+ return ICE_ERR_PARAM;
+ /* check if entry already exist */
+ agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+ if (!agg_vsi_info)
+ return ICE_ERR_PARAM;
+ bitmap_copy(agg_vsi_info->replay_tc_bitmap, tc_bitmap,
+ ICE_MAX_TRAFFIC_CLASS);
+ return 0;
+}
+
+/**
+ * ice_sched_assoc_vsi_to_agg - associate/move VSI to new/default aggregator
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap of enabled TC(s)
+ *
+ * This function moves VSI to a new or default aggregator node. If VSI is
+ * already associated to the aggregator node then no operation is performed on
+ * the tree. This function needs to be called with scheduler lock held.
+ */
+static enum ice_status
+ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id,
+ u16 vsi_handle, unsigned long *tc_bitmap)
+{
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+ struct ice_sched_agg_info *agg_info;
+ enum ice_status status = 0;
+ struct ice_hw *hw = pi->hw;
+ u8 tc;
+
+ if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+ return ICE_ERR_PARAM;
+ agg_info = ice_get_agg_info(hw, agg_id);
+ if (!agg_info)
+ return ICE_ERR_PARAM;
+ /* check if entry already exist */
+ agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+ if (!agg_vsi_info) {
+ /* Create new entry for VSI under aggregator list */
+ agg_vsi_info = devm_kzalloc(ice_hw_to_dev(hw),
+ sizeof(*agg_vsi_info), GFP_KERNEL);
+ if (!agg_vsi_info)
+ return ICE_ERR_PARAM;
+
+ /* add VSI ID into the aggregator list */
+ agg_vsi_info->vsi_handle = vsi_handle;
+ list_add(&agg_vsi_info->list_entry, &agg_info->agg_vsi_list);
+ }
+ /* Move VSI node to new aggregator node for requested TC(s) */
+ ice_for_each_traffic_class(tc) {
+ if (!ice_is_tc_ena(*tc_bitmap, tc))
+ continue;
+
+ /* Move VSI to new aggregator */
+ status = ice_sched_move_vsi_to_agg(pi, vsi_handle, agg_id, tc);
+ if (status)
+ break;
+
+ set_bit(tc, agg_vsi_info->tc_bitmap);
+ }
+ return status;
+}
+
+/**
* ice_sched_rm_unused_rl_prof - remove unused RL profile
* @pi: port information structure
*
@@ -1955,7 +2782,6 @@ ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node,
{
struct ice_aqc_txsched_elem_data buf;
struct ice_aqc_txsched_elem *data;
- enum ice_status status;
buf = node->info;
data = &buf.data;
@@ -1970,7 +2796,32 @@ ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node,
}
/* Configure element */
- status = ice_sched_update_elem(hw, node, &buf);
+ return ice_sched_update_elem(hw, node, &buf);
+}
+
+/**
+ * ice_move_vsi_to_agg - moves VSI to new or default aggregator
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap of enabled TC(s)
+ *
+ * Move or associate VSI to a new or default aggregator node.
+ */
+enum ice_status
+ice_move_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle,
+ u8 tc_bitmap)
+{
+ unsigned long bitmap = tc_bitmap;
+ enum ice_status status;
+
+ mutex_lock(&pi->sched_lock);
+ status = ice_sched_assoc_vsi_to_agg(pi, agg_id, vsi_handle,
+ (unsigned long *)&bitmap);
+ if (!status)
+ status = ice_save_agg_vsi_tc_bitmap(pi, agg_id, vsi_handle,
+ (unsigned long *)&bitmap);
+ mutex_unlock(&pi->sched_lock);
return status;
}
@@ -2045,11 +2896,12 @@ static void ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
/**
* ice_sched_calc_wakeup - calculate RL profile wakeup parameter
+ * @hw: pointer to the HW struct
* @bw: bandwidth in Kbps
*
* This function calculates the wakeup parameter of RL profile.
*/
-static u16 ice_sched_calc_wakeup(s32 bw)
+static u16 ice_sched_calc_wakeup(struct ice_hw *hw, s32 bw)
{
s64 bytes_per_sec, wakeup_int, wakeup_a, wakeup_b, wakeup_f;
s32 wakeup_f_int;
@@ -2057,7 +2909,7 @@ static u16 ice_sched_calc_wakeup(s32 bw)
/* Get the wakeup integer value */
bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE);
- wakeup_int = div64_long(ICE_RL_PROF_FREQUENCY, bytes_per_sec);
+ wakeup_int = div64_long(hw->psm_clk_freq, bytes_per_sec);
if (wakeup_int > 63) {
wakeup = (u16)((1 << 15) | wakeup_int);
} else {
@@ -2066,8 +2918,7 @@ static u16 ice_sched_calc_wakeup(s32 bw)
*/
wakeup_b = (s64)ICE_RL_PROF_MULTIPLIER * wakeup_int;
wakeup_a = div64_long((s64)ICE_RL_PROF_MULTIPLIER *
- ICE_RL_PROF_FREQUENCY,
- bytes_per_sec);
+ hw->psm_clk_freq, bytes_per_sec);
/* Get Fraction value */
wakeup_f = wakeup_a - wakeup_b;
@@ -2087,13 +2938,15 @@ static u16 ice_sched_calc_wakeup(s32 bw)
/**
* ice_sched_bw_to_rl_profile - convert BW to profile parameters
+ * @hw: pointer to the HW struct
* @bw: bandwidth in Kbps
* @profile: profile parameters to return
*
* This function converts the BW to profile structure format.
*/
static enum ice_status
-ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile)
+ice_sched_bw_to_rl_profile(struct ice_hw *hw, u32 bw,
+ struct ice_aqc_rl_profile_elem *profile)
{
enum ice_status status = ICE_ERR_PARAM;
s64 bytes_per_sec, ts_rate, mv_tmp;
@@ -2113,7 +2966,7 @@ ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile)
for (i = 0; i < 64; i++) {
u64 pow_result = BIT_ULL(i);
- ts_rate = div64_long((s64)ICE_RL_PROF_FREQUENCY,
+ ts_rate = div64_long((s64)hw->psm_clk_freq,
pow_result * ICE_RL_PROF_TS_MULTIPLIER);
if (ts_rate <= 0)
continue;
@@ -2137,7 +2990,7 @@ ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile)
if (found) {
u16 wm;
- wm = ice_sched_calc_wakeup(bw);
+ wm = ice_sched_calc_wakeup(hw, bw);
profile->rl_multiply = cpu_to_le16(mv);
profile->wake_up_calc = cpu_to_le16(wm);
profile->rl_encode = cpu_to_le16(encode);
@@ -2206,7 +3059,7 @@ ice_sched_add_rl_profile(struct ice_port_info *pi,
if (!rl_prof_elem)
return NULL;
- status = ice_sched_bw_to_rl_profile(bw, &rl_prof_elem->profile);
+ status = ice_sched_bw_to_rl_profile(hw, bw, &rl_prof_elem->profile);
if (status)
goto exit_add_rl_prof;
@@ -2941,6 +3794,156 @@ ice_sched_replay_node_bw(struct ice_hw *hw, struct ice_sched_node *node,
}
/**
+ * ice_sched_get_ena_tc_bitmap - get enabled TC bitmap
+ * @pi: port info struct
+ * @tc_bitmap: 8 bits TC bitmap to check
+ * @ena_tc_bitmap: 8 bits enabled TC bitmap to return
+ *
+ * This function returns enabled TC bitmap in variable ena_tc_bitmap. Some TCs
+ * may be missing, it returns enabled TCs. This function needs to be called with
+ * scheduler lock held.
+ */
+static void
+ice_sched_get_ena_tc_bitmap(struct ice_port_info *pi,
+ unsigned long *tc_bitmap,
+ unsigned long *ena_tc_bitmap)
+{
+ u8 tc;
+
+ /* Some TC(s) may be missing after reset, adjust for replay */
+ ice_for_each_traffic_class(tc)
+ if (ice_is_tc_ena(*tc_bitmap, tc) &&
+ (ice_sched_get_tc_node(pi, tc)))
+ set_bit(tc, ena_tc_bitmap);
+}
+
+/**
+ * ice_sched_replay_agg - recreate aggregator node(s)
+ * @hw: pointer to the HW struct
+ *
+ * This function recreate aggregator type nodes which are not replayed earlier.
+ * It also replay aggregator BW information. These aggregator nodes are not
+ * associated with VSI type node yet.
+ */
+void ice_sched_replay_agg(struct ice_hw *hw)
+{
+ struct ice_port_info *pi = hw->port_info;
+ struct ice_sched_agg_info *agg_info;
+
+ mutex_lock(&pi->sched_lock);
+ list_for_each_entry(agg_info, &hw->agg_list, list_entry)
+ /* replay aggregator (re-create aggregator node) */
+ if (!bitmap_equal(agg_info->tc_bitmap, agg_info->replay_tc_bitmap,
+ ICE_MAX_TRAFFIC_CLASS)) {
+ DECLARE_BITMAP(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ enum ice_status status;
+
+ bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ ice_sched_get_ena_tc_bitmap(pi,
+ agg_info->replay_tc_bitmap,
+ replay_bitmap);
+ status = ice_sched_cfg_agg(hw->port_info,
+ agg_info->agg_id,
+ ICE_AGG_TYPE_AGG,
+ replay_bitmap);
+ if (status) {
+ dev_info(ice_hw_to_dev(hw),
+ "Replay agg id[%d] failed\n",
+ agg_info->agg_id);
+ /* Move on to next one */
+ continue;
+ }
+ }
+ mutex_unlock(&pi->sched_lock);
+}
+
+/**
+ * ice_sched_replay_agg_vsi_preinit - Agg/VSI replay pre initialization
+ * @hw: pointer to the HW struct
+ *
+ * This function initialize aggregator(s) TC bitmap to zero. A required
+ * preinit step for replaying aggregators.
+ */
+void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw)
+{
+ struct ice_port_info *pi = hw->port_info;
+ struct ice_sched_agg_info *agg_info;
+
+ mutex_lock(&pi->sched_lock);
+ list_for_each_entry(agg_info, &hw->agg_list, list_entry) {
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+
+ agg_info->tc_bitmap[0] = 0;
+ list_for_each_entry(agg_vsi_info, &agg_info->agg_vsi_list,
+ list_entry)
+ agg_vsi_info->tc_bitmap[0] = 0;
+ }
+ mutex_unlock(&pi->sched_lock);
+}
+
+/**
+ * ice_sched_replay_vsi_agg - replay aggregator & VSI to aggregator node(s)
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ *
+ * This function replays aggregator node, VSI to aggregator type nodes, and
+ * their node bandwidth information. This function needs to be called with
+ * scheduler lock held.
+ */
+static enum ice_status
+ice_sched_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle)
+{
+ DECLARE_BITMAP(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+ struct ice_port_info *pi = hw->port_info;
+ struct ice_sched_agg_info *agg_info;
+ enum ice_status status;
+
+ bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return ICE_ERR_PARAM;
+ agg_info = ice_get_vsi_agg_info(hw, vsi_handle);
+ if (!agg_info)
+ return 0; /* Not present in list - default Agg case */
+ agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+ if (!agg_vsi_info)
+ return 0; /* Not present in list - default Agg case */
+ ice_sched_get_ena_tc_bitmap(pi, agg_info->replay_tc_bitmap,
+ replay_bitmap);
+ /* Replay aggregator node associated to vsi_handle */
+ status = ice_sched_cfg_agg(hw->port_info, agg_info->agg_id,
+ ICE_AGG_TYPE_AGG, replay_bitmap);
+ if (status)
+ return status;
+
+ bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ ice_sched_get_ena_tc_bitmap(pi, agg_vsi_info->replay_tc_bitmap,
+ replay_bitmap);
+ /* Move this VSI (vsi_handle) to above aggregator */
+ return ice_sched_assoc_vsi_to_agg(pi, agg_info->agg_id, vsi_handle,
+ replay_bitmap);
+}
+
+/**
+ * ice_replay_vsi_agg - replay VSI to aggregator node
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ *
+ * This function replays association of VSI to aggregator type nodes, and
+ * node bandwidth information.
+ */
+enum ice_status ice_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle)
+{
+ struct ice_port_info *pi = hw->port_info;
+ enum ice_status status;
+
+ mutex_lock(&pi->sched_lock);
+ status = ice_sched_replay_vsi_agg(hw, vsi_handle);
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
+
+/**
* ice_sched_replay_q_bw - replay queue type node BW
* @pi: port information structure
* @q_ctx: queue context structure
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
index 0e55ae0d446f..9beef8f0ec76 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.h
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -8,6 +8,7 @@
#define ICE_QGRP_LAYER_OFFSET 2
#define ICE_VSI_LAYER_OFFSET 4
+#define ICE_AGG_LAYER_OFFSET 6
#define ICE_SCHED_INVAL_LAYER_NUM 0xFF
/* Burst size is a 12 bits register that is configured while creating the RL
* profile(s). MSB is a granularity bit and tells the granularity type
@@ -23,12 +24,16 @@
((BIT(11) - 1) * 64) /* In Bytes */
#define ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY ICE_MAX_BURST_SIZE_ALLOWED
-#define ICE_RL_PROF_FREQUENCY 446000000
#define ICE_RL_PROF_ACCURACY_BYTES 128
#define ICE_RL_PROF_MULTIPLIER 10000
#define ICE_RL_PROF_TS_MULTIPLIER 32
#define ICE_RL_PROF_FRACTION 512
+#define ICE_PSM_CLK_367MHZ_IN_HZ 367647059
+#define ICE_PSM_CLK_416MHZ_IN_HZ 416666667
+#define ICE_PSM_CLK_446MHZ_IN_HZ 446428571
+#define ICE_PSM_CLK_390MHZ_IN_HZ 390625000
+
/* BW rate limit profile parameters list entry along
* with bandwidth maintained per layer in port info
*/
@@ -43,6 +48,8 @@ struct ice_sched_agg_vsi_info {
struct list_head list_entry;
DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
u16 vsi_handle;
+ /* save aggregator VSI TC bitmap */
+ DECLARE_BITMAP(replay_tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
};
struct ice_sched_agg_info {
@@ -51,6 +58,8 @@ struct ice_sched_agg_info {
DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
u32 agg_id;
enum ice_agg_type agg_type;
+ /* save aggregator TC bitmap */
+ DECLARE_BITMAP(replay_tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
};
/* FW AQ command calls */
@@ -60,6 +69,8 @@ ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
u16 *elems_ret, struct ice_sq_cd *cd);
enum ice_status ice_sched_init_port(struct ice_port_info *pi);
enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw);
+void ice_sched_get_psm_clk_freq(struct ice_hw *hw);
+
void ice_sched_clear_port(struct ice_port_info *pi);
void ice_sched_cleanup_all(struct ice_hw *hw);
void ice_sched_clear_agg(struct ice_hw *hw);
@@ -78,6 +89,14 @@ enum ice_status
ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
u8 owner, bool enable);
enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle);
+
+/* Tx scheduler rate limiter functions */
+enum ice_status
+ice_cfg_agg(struct ice_port_info *pi, u32 agg_id,
+ enum ice_agg_type agg_type, u8 tc_bitmap);
+enum ice_status
+ice_move_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle,
+ u8 tc_bitmap);
enum ice_status
ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
u16 q_handle, enum ice_rl_type rl_type, u32 bw);
@@ -85,6 +104,9 @@ enum ice_status
ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
u16 q_handle, enum ice_rl_type rl_type);
enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes);
+void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw);
+void ice_sched_replay_agg(struct ice_hw *hw);
+enum ice_status ice_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle);
enum ice_status
ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx);
#endif /* _ICE_SCHED_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index c33612132ddf..67c965a3f5d2 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -603,7 +603,7 @@ enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw)
}
} while (req_desc && !status);
- devm_kfree(ice_hw_to_dev(hw), (void *)rbuf);
+ devm_kfree(ice_hw_to_dev(hw), rbuf);
return status;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 8ca63c6a6ba4..580419813bb2 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1497,22 +1497,11 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector)
struct ice_vsi *vsi = q_vector->vsi;
u32 itr_val;
- /* when exiting WB_ON_ITR lets set a low ITR value and trigger
- * interrupts to expire right away in case we have more work ready to go
- * already
+ /* when exiting WB_ON_ITR just reset the countdown and let ITR
+ * resume it's normal "interrupts-enabled" path
*/
- if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) {
- itr_val = ice_buildreg_itr(rx->itr_idx, ICE_WB_ON_ITR_USECS);
- wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
- /* set target back to last user set value */
- rx->target_itr = rx->itr_setting;
- /* set current to what we just wrote and dynamic if needed */
- rx->current_itr = ICE_WB_ON_ITR_USECS |
- (rx->itr_setting & ICE_ITR_DYNAMIC);
- /* allow normal interrupt flow to start */
+ if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE)
q_vector->itr_countdown = 0;
- return;
- }
/* This will do nothing if dynamic updates are not enabled */
ice_update_itr(q_vector, tx);
@@ -1552,10 +1541,8 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector)
q_vector->itr_countdown--;
}
- if (!test_bit(__ICE_DOWN, q_vector->vsi->state))
- wr32(&q_vector->vsi->back->hw,
- GLINT_DYN_CTL(q_vector->reg_idx),
- itr_val);
+ if (!test_bit(__ICE_DOWN, vsi->state))
+ wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
}
/**
@@ -1565,30 +1552,29 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector)
* We need to tell hardware to write-back completed descriptors even when
* interrupts are disabled. Descriptors will be written back on cache line
* boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR
- * descriptors may not be written back if they don't fill a cache line until the
- * next interrupt.
+ * descriptors may not be written back if they don't fill a cache line until
+ * the next interrupt.
*
- * This sets the write-back frequency to 2 microseconds as that is the minimum
- * value that's not 0 due to ITR granularity. Also, set the INTENA_MSK bit to
- * make sure hardware knows we aren't meddling with the INTENA_M bit.
+ * This sets the write-back frequency to whatever was set previously for the
+ * ITR indices. Also, set the INTENA_MSK bit to make sure hardware knows we
+ * aren't meddling with the INTENA_M bit.
*/
static void ice_set_wb_on_itr(struct ice_q_vector *q_vector)
{
struct ice_vsi *vsi = q_vector->vsi;
- /* already in WB_ON_ITR mode no need to change it */
+ /* already in wb_on_itr mode no need to change it */
if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE)
return;
- if (q_vector->num_ring_rx)
- wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
- ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS,
- ICE_RX_ITR));
-
- if (q_vector->num_ring_tx)
- wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
- ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS,
- ICE_TX_ITR));
+ /* use previously set ITR values for all of the ITR indices by
+ * specifying ICE_ITR_NONE, which will vary in adaptive (AIM) mode and
+ * be static in non-adaptive mode (user configured)
+ */
+ wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
+ ((ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) &
+ GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M |
+ GLINT_DYN_CTL_WB_ON_ITR_M);
q_vector->itr_countdown = ICE_IN_WB_ON_ITR_MODE;
}
@@ -1655,8 +1641,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
}
/* If work not completed, return budget and polling will return */
- if (!clean_complete)
+ if (!clean_complete) {
+ /* Set the writeback on ITR so partial completions of
+ * cache-lines will still continue even if we're polling.
+ */
+ ice_set_wb_on_itr(q_vector);
return budget;
+ }
/* Exit the polling mode, but don't re-enable interrupts if stack might
* poll us due to busy-polling
@@ -2413,7 +2404,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
/* allow CONTROL frames egress from main VSI if FW LLDP disabled */
if (unlikely(skb->priority == TC_PRIO_CONTROL &&
vsi->type == ICE_VSI_PF &&
- vsi->port_info->is_sw_lldp))
+ vsi->port_info->qos_cfg.is_sw_lldp))
offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
ICE_TX_CTX_DESC_SWTCH_UPLINK <<
ICE_TXD_CTX_QW1_CMD_S);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index ff1a1cbd078e..db56a0c8bfe1 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -240,7 +240,6 @@ enum ice_rx_dtype {
#define ICE_DFLT_INTRL 0
#define ICE_MAX_INTRL 236
-#define ICE_WB_ON_ITR_USECS 2
#define ICE_IN_WB_ON_ITR_MODE 255
/* Sets WB_ON_ITR and assumes INTENA bit is already cleared, which allows
* setting the MSK_M bit to tell hardware to ignore the INTENA_M bit. Also,
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index a98800a91045..a6cb0c35748c 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -403,7 +403,11 @@ struct ice_link_default_override_tlv {
#define ice_for_each_traffic_class(_i) \
for ((_i) = 0; (_i) < ICE_MAX_TRAFFIC_CLASS; (_i)++)
+/* ICE_DFLT_AGG_ID means that all new VM(s)/VSI node connects
+ * to driver defined policy for default aggregator
+ */
#define ICE_INVAL_TEID 0xFFFFFFFF
+#define ICE_DFLT_AGG_ID 0
struct ice_sched_node {
struct ice_sched_node *parent;
@@ -552,6 +556,14 @@ struct ice_dcbx_cfg {
#define ICE_DCBX_APPS_NON_WILLING 0x1
};
+struct ice_qos_cfg {
+ struct ice_dcbx_cfg local_dcbx_cfg; /* Oper/Local Cfg */
+ struct ice_dcbx_cfg desired_dcbx_cfg; /* CEE Desired Cfg */
+ struct ice_dcbx_cfg remote_dcbx_cfg; /* Peer Cfg */
+ u8 dcbx_status : 3; /* see ICE_DCBX_STATUS_DIS */
+ u8 is_sw_lldp : 1;
+};
+
struct ice_port_info {
struct ice_sched_node *root; /* Root Node per Port */
struct ice_hw *hw; /* back pointer to HW instance */
@@ -575,13 +587,7 @@ struct ice_port_info {
sib_head[ICE_MAX_TRAFFIC_CLASS][ICE_AQC_TOPO_MAX_LEVEL_NUM];
/* List contain profile ID(s) and other params per layer */
struct list_head rl_prof_list[ICE_AQC_TOPO_MAX_LEVEL_NUM];
- struct ice_dcbx_cfg local_dcbx_cfg; /* Oper/Local Cfg */
- /* DCBX info */
- struct ice_dcbx_cfg remote_dcbx_cfg; /* Peer Cfg */
- struct ice_dcbx_cfg desired_dcbx_cfg; /* CEE Desired Cfg */
- /* LLDP/DCBX Status */
- u8 dcbx_status:3; /* see ICE_DCBX_STATUS_DIS */
- u8 is_sw_lldp:1;
+ struct ice_qos_cfg qos_cfg;
u8 is_vf:1;
};
@@ -614,6 +620,8 @@ struct ice_hw {
void *back;
struct ice_aqc_layer_props *layer_info;
struct ice_port_info *port_info;
+ /* PSM clock frequency for calculating RL profile params */
+ u32 psm_clk_freq;
u64 debug_mask; /* bitmap for debug mask */
enum ice_mac_type mac_type;
@@ -902,4 +910,9 @@ struct ice_hw_port_stats {
/* Hash redirection LUT for VSI - maximum array size */
#define ICE_VSIQF_HLUT_ARRAY_SIZE ((VSIQF_HLUT_MAX_INDEX + 1) * 4)
+/* AQ API version for LLDP_FILTER_CONTROL */
+#define ICE_FW_API_LLDP_FLTR_MAJ 1
+#define ICE_FW_API_LLDP_FLTR_MIN 7
+#define ICE_FW_API_LLDP_FLTR_PATCH 1
+
#endif /* _ICE_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index ec7f6c64132e..bf5fd812ea0e 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1057,11 +1057,45 @@ static void ice_vf_pre_vsi_rebuild(struct ice_vf *vf)
}
/**
+ * ice_vf_rebuild_aggregator_node_cfg - rebuild aggregator node config
+ * @vsi: Pointer to VSI
+ *
+ * This function moves VSI into corresponding scheduler aggregator node
+ * based on cached value of "aggregator node info" per VSI
+ */
+static void ice_vf_rebuild_aggregator_node_cfg(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ enum ice_status status;
+ struct device *dev;
+
+ if (!vsi->agg_node)
+ return;
+
+ dev = ice_pf_to_dev(pf);
+ if (vsi->agg_node->num_vsis == ICE_MAX_VSIS_IN_AGG_NODE) {
+ dev_dbg(dev,
+ "agg_id %u already has reached max_num_vsis %u\n",
+ vsi->agg_node->agg_id, vsi->agg_node->num_vsis);
+ return;
+ }
+
+ status = ice_move_vsi_to_agg(pf->hw.port_info, vsi->agg_node->agg_id,
+ vsi->idx, vsi->tc_cfg.ena_tc);
+ if (status)
+ dev_dbg(dev, "unable to move VSI idx %u into aggregator %u node",
+ vsi->idx, vsi->agg_node->agg_id);
+ else
+ vsi->agg_node->num_vsis++;
+}
+
+/**
* ice_vf_rebuild_host_cfg - host admin configuration is persistent across reset
* @vf: VF to rebuild host configuration on
*/
static void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
{
+ struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
struct device *dev = ice_pf_to_dev(vf->pf);
ice_vf_set_host_trust_cfg(vf);
@@ -1073,6 +1107,8 @@ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
if (ice_vf_rebuild_host_vlan_cfg(vf))
dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n",
vf->vf_id);
+ /* rebuild aggregator node config for main VF VSI */
+ ice_vf_rebuild_aggregator_node_cfg(vsi);
}
/**
@@ -1677,6 +1713,8 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (!num_vfs) {
if (!pci_vfs_assigned(pdev)) {
ice_free_vfs(pf);
+ if (pf->lag)
+ ice_enable_lag(pf->lag);
return 0;
}
@@ -1688,6 +1726,8 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (err)
return err;
+ if (pf->lag)
+ ice_disable_lag(pf->lag);
return num_vfs;
}
@@ -2312,12 +2352,12 @@ bool ice_is_any_vf_in_promisc(struct ice_pf *pf)
static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ bool rm_promisc, alluni = false, allmulti = false;
struct virtchnl_promisc_info *info =
(struct virtchnl_promisc_info *)msg;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
struct device *dev;
- bool rm_promisc;
int ret = 0;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
@@ -2344,8 +2384,13 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
goto error_param;
}
- rm_promisc = !(info->flags & FLAG_VF_UNICAST_PROMISC) &&
- !(info->flags & FLAG_VF_MULTICAST_PROMISC);
+ if (info->flags & FLAG_VF_UNICAST_PROMISC)
+ alluni = true;
+
+ if (info->flags & FLAG_VF_MULTICAST_PROMISC)
+ allmulti = true;
+
+ rm_promisc = !allmulti && !alluni;
if (vsi->num_vlan || vf->port_vlan_info) {
struct ice_vsi *pf_vsi = ice_get_main_vsi(pf);
@@ -2399,12 +2444,12 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
enum ice_status status;
u8 promisc_m;
- if (info->flags & FLAG_VF_UNICAST_PROMISC) {
+ if (alluni) {
if (vf->port_vlan_info || vsi->num_vlan)
promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
else
promisc_m = ICE_UCAST_PROMISC_BITS;
- } else if (info->flags & FLAG_VF_MULTICAST_PROMISC) {
+ } else if (allmulti) {
if (vf->port_vlan_info || vsi->num_vlan)
promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
else
@@ -2432,15 +2477,16 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
}
}
- if (info->flags & FLAG_VF_MULTICAST_PROMISC)
- set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
- else
- clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+ if (allmulti &&
+ !test_and_set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
+ dev_info(dev, "VF %u successfully set multicast promiscuous mode\n", vf->vf_id);
+ else if (!allmulti && test_and_clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
+ dev_info(dev, "VF %u successfully unset multicast promiscuous mode\n", vf->vf_id);
- if (info->flags & FLAG_VF_UNICAST_PROMISC)
- set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
- else
- clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+ if (alluni && !test_and_set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states))
+ dev_info(dev, "VF %u successfully set unicast promiscuous mode\n", vf->vf_id);
+ else if (!alluni && test_and_clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states))
+ dev_info(dev, "VF %u successfully unset unicast promiscuous mode\n", vf->vf_id);
error_param:
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 1782146db644..875fa0cbef56 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -260,45 +260,6 @@ free_buf:
}
/**
- * ice_xsk_alloc_pools - allocate a buffer pool for an XDP socket
- * @vsi: VSI to allocate the buffer pool on
- *
- * Returns 0 on success, negative on error
- */
-static int ice_xsk_alloc_pools(struct ice_vsi *vsi)
-{
- if (vsi->xsk_pools)
- return 0;
-
- vsi->xsk_pools = kcalloc(vsi->num_xsk_pools, sizeof(*vsi->xsk_pools),
- GFP_KERNEL);
-
- if (!vsi->xsk_pools) {
- vsi->num_xsk_pools = 0;
- return -ENOMEM;
- }
-
- return 0;
-}
-
-/**
- * ice_xsk_remove_pool - Remove an buffer pool for a certain ring/qid
- * @vsi: VSI from which the VSI will be removed
- * @qid: Ring/qid associated with the buffer pool
- */
-static void ice_xsk_remove_pool(struct ice_vsi *vsi, u16 qid)
-{
- vsi->xsk_pools[qid] = NULL;
- vsi->num_xsk_pools_used--;
-
- if (vsi->num_xsk_pools_used == 0) {
- kfree(vsi->xsk_pools);
- vsi->xsk_pools = NULL;
- vsi->num_xsk_pools = 0;
- }
-}
-
-/**
* ice_xsk_pool_disable - disable a buffer pool region
* @vsi: Current VSI
* @qid: queue ID
@@ -307,12 +268,12 @@ static void ice_xsk_remove_pool(struct ice_vsi *vsi, u16 qid)
*/
static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
{
- if (!vsi->xsk_pools || qid >= vsi->num_xsk_pools ||
- !vsi->xsk_pools[qid])
+ struct xsk_buff_pool *pool = xsk_get_pool_from_qid(vsi->netdev, qid);
+
+ if (!pool)
return -EINVAL;
- xsk_pool_dma_unmap(vsi->xsk_pools[qid], ICE_RX_DMA_ATTR);
- ice_xsk_remove_pool(vsi, qid);
+ xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR);
return 0;
}
@@ -333,22 +294,11 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
if (vsi->type != ICE_VSI_PF)
return -EINVAL;
- if (!vsi->num_xsk_pools)
- vsi->num_xsk_pools = min_t(u16, vsi->num_rxq, vsi->num_txq);
- if (qid >= vsi->num_xsk_pools)
+ if (qid >= vsi->netdev->real_num_rx_queues ||
+ qid >= vsi->netdev->real_num_tx_queues)
return -EINVAL;
- err = ice_xsk_alloc_pools(vsi);
- if (err)
- return err;
-
- if (vsi->xsk_pools && vsi->xsk_pools[qid])
- return -EBUSY;
-
- vsi->xsk_pools[qid] = pool;
- vsi->num_xsk_pools_used++;
-
- err = xsk_pool_dma_map(vsi->xsk_pools[qid], ice_pf_to_dev(vsi->back),
+ err = xsk_pool_dma_map(pool, ice_pf_to_dev(vsi->back),
ICE_RX_DMA_ATTR);
if (err)
return err;
@@ -842,11 +792,8 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
{
int i;
- if (!vsi->xsk_pools)
- return false;
-
- for (i = 0; i < vsi->num_xsk_pools; i++) {
- if (vsi->xsk_pools[i])
+ ice_for_each_rxq(vsi, i) {
+ if (xsk_get_pool_from_qid(vsi->netdev, i))
return true;
}