linux/drivers/net/ethernet/ti/cpsw_switchdev.c

545 lines
13 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Texas Instruments switchdev Driver
*
* Copyright (C) 2019 Texas Instruments
*
*/
#include <linux/etherdevice.h>
#include <linux/if_bridge.h>
#include <linux/netdevice.h>
#include <linux/workqueue.h>
#include <net/switchdev.h>
#include "cpsw.h"
#include "cpsw_ale.h"
#include "cpsw_priv.h"
#include "cpsw_switchdev.h"
struct cpsw_switchdev_event_work {
struct work_struct work;
struct switchdev_notifier_fdb_info fdb_info;
struct cpsw_priv *priv;
unsigned long event;
};
net: switchdev: remove the transaction structure from port attributes Since the introduction of the switchdev API, port attributes were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port attribute notifier structures, and converts drivers to not look at this member. In part, this patch contains a revert of my previous commit 2e554a7a5d8a ("net: dsa: propagate switchdev vlan_filtering prepare phase to drivers"). For the most part, the conversion was trivial except for: - Rocker's world implementation based on Broadcom OF-DPA had an odd implementation of ofdpa_port_attr_bridge_flags_set. The conversion was done mechanically, by pasting the implementation twice, then only keeping the code that would get executed during prepare phase on top, then only keeping the code that gets executed during the commit phase on bottom, then simplifying the resulting code until this was obtained. - DSA's offloading of STP state, bridge flags, VLAN filtering and multicast router could be converted right away. But the ageing time could not, so a shim was introduced and this was left for a further commit. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB Reviewed-by: Ido Schimmel <idosch@nvidia.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 02:01:50 +02:00
static int cpsw_port_stp_state_set(struct cpsw_priv *priv, u8 state)
{
struct cpsw_common *cpsw = priv->cpsw;
u8 cpsw_state;
int ret = 0;
switch (state) {
case BR_STATE_FORWARDING:
cpsw_state = ALE_PORT_STATE_FORWARD;
break;
case BR_STATE_LEARNING:
cpsw_state = ALE_PORT_STATE_LEARN;
break;
case BR_STATE_DISABLED:
cpsw_state = ALE_PORT_STATE_DISABLE;
break;
case BR_STATE_LISTENING:
case BR_STATE_BLOCKING:
cpsw_state = ALE_PORT_STATE_BLOCK;
break;
default:
return -EOPNOTSUPP;
}
ret = cpsw_ale_control_set(cpsw->ale, priv->emac_port,
ALE_PORT_STATE, cpsw_state);
dev_dbg(priv->dev, "ale state: %u\n", cpsw_state);
return ret;
}
static int cpsw_port_attr_br_flags_set(struct cpsw_priv *priv,
struct net_device *orig_dev,
struct switchdev_brport_flags flags)
{
struct cpsw_common *cpsw = priv->cpsw;
if (flags.mask & BR_MCAST_FLOOD) {
bool unreg_mcast_add = false;
if (flags.val & BR_MCAST_FLOOD)
unreg_mcast_add = true;
dev_dbg(priv->dev, "BR_MCAST_FLOOD: %d port %u\n",
unreg_mcast_add, priv->emac_port);
cpsw_ale_set_unreg_mcast(cpsw->ale, BIT(priv->emac_port),
unreg_mcast_add);
}
return 0;
}
static int cpsw_port_attr_br_flags_pre_set(struct net_device *netdev,
struct switchdev_brport_flags flags)
{
if (flags.mask & ~(BR_LEARNING | BR_MCAST_FLOOD))
return -EINVAL;
return 0;
}
static int cpsw_port_attr_set(struct net_device *ndev, const void *ctx,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack)
{
struct cpsw_priv *priv = netdev_priv(ndev);
int ret;
dev_dbg(priv->dev, "attr: id %u port: %u\n", attr->id, priv->emac_port);
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
net: switchdev: remove the transaction structure from port attributes Since the introduction of the switchdev API, port attributes were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port attribute notifier structures, and converts drivers to not look at this member. In part, this patch contains a revert of my previous commit 2e554a7a5d8a ("net: dsa: propagate switchdev vlan_filtering prepare phase to drivers"). For the most part, the conversion was trivial except for: - Rocker's world implementation based on Broadcom OF-DPA had an odd implementation of ofdpa_port_attr_bridge_flags_set. The conversion was done mechanically, by pasting the implementation twice, then only keeping the code that would get executed during prepare phase on top, then only keeping the code that gets executed during the commit phase on bottom, then simplifying the resulting code until this was obtained. - DSA's offloading of STP state, bridge flags, VLAN filtering and multicast router could be converted right away. But the ageing time could not, so a shim was introduced and this was left for a further commit. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB Reviewed-by: Ido Schimmel <idosch@nvidia.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 02:01:50 +02:00
ret = cpsw_port_attr_br_flags_pre_set(ndev,
attr->u.brport_flags);
break;
case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
net: switchdev: remove the transaction structure from port attributes Since the introduction of the switchdev API, port attributes were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port attribute notifier structures, and converts drivers to not look at this member. In part, this patch contains a revert of my previous commit 2e554a7a5d8a ("net: dsa: propagate switchdev vlan_filtering prepare phase to drivers"). For the most part, the conversion was trivial except for: - Rocker's world implementation based on Broadcom OF-DPA had an odd implementation of ofdpa_port_attr_bridge_flags_set. The conversion was done mechanically, by pasting the implementation twice, then only keeping the code that would get executed during prepare phase on top, then only keeping the code that gets executed during the commit phase on bottom, then simplifying the resulting code until this was obtained. - DSA's offloading of STP state, bridge flags, VLAN filtering and multicast router could be converted right away. But the ageing time could not, so a shim was introduced and this was left for a further commit. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB Reviewed-by: Ido Schimmel <idosch@nvidia.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 02:01:50 +02:00
ret = cpsw_port_stp_state_set(priv, attr->u.stp_state);
dev_dbg(priv->dev, "stp state: %u\n", attr->u.stp_state);
break;
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
net: switchdev: remove the transaction structure from port attributes Since the introduction of the switchdev API, port attributes were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port attribute notifier structures, and converts drivers to not look at this member. In part, this patch contains a revert of my previous commit 2e554a7a5d8a ("net: dsa: propagate switchdev vlan_filtering prepare phase to drivers"). For the most part, the conversion was trivial except for: - Rocker's world implementation based on Broadcom OF-DPA had an odd implementation of ofdpa_port_attr_bridge_flags_set. The conversion was done mechanically, by pasting the implementation twice, then only keeping the code that would get executed during prepare phase on top, then only keeping the code that gets executed during the commit phase on bottom, then simplifying the resulting code until this was obtained. - DSA's offloading of STP state, bridge flags, VLAN filtering and multicast router could be converted right away. But the ageing time could not, so a shim was introduced and this was left for a further commit. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB Reviewed-by: Ido Schimmel <idosch@nvidia.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 02:01:50 +02:00
ret = cpsw_port_attr_br_flags_set(priv, attr->orig_dev,
attr->u.brport_flags);
break;
default:
ret = -EOPNOTSUPP;
break;
}
return ret;
}
static u16 cpsw_get_pvid(struct cpsw_priv *priv)
{
struct cpsw_common *cpsw = priv->cpsw;
u32 __iomem *port_vlan_reg;
u32 pvid;
if (priv->emac_port) {
int reg = CPSW2_PORT_VLAN;
if (cpsw->version == CPSW_VERSION_1)
reg = CPSW1_PORT_VLAN;
pvid = slave_read(cpsw->slaves + (priv->emac_port - 1), reg);
} else {
port_vlan_reg = &cpsw->host_port_regs->port_vlan;
pvid = readl(port_vlan_reg);
}
pvid = pvid & 0xfff;
return pvid;
}
static void cpsw_set_pvid(struct cpsw_priv *priv, u16 vid, bool cfi, u32 cos)
{
struct cpsw_common *cpsw = priv->cpsw;
void __iomem *port_vlan_reg;
u32 pvid;
pvid = vid;
pvid |= cfi ? BIT(12) : 0;
pvid |= (cos & 0x7) << 13;
if (priv->emac_port) {
int reg = CPSW2_PORT_VLAN;
if (cpsw->version == CPSW_VERSION_1)
reg = CPSW1_PORT_VLAN;
/* no barrier */
slave_write(cpsw->slaves + (priv->emac_port - 1), pvid, reg);
} else {
/* CPU port */
port_vlan_reg = &cpsw->host_port_regs->port_vlan;
writel(pvid, port_vlan_reg);
}
}
static int cpsw_port_vlan_add(struct cpsw_priv *priv, bool untag, bool pvid,
u16 vid, struct net_device *orig_dev)
{
bool cpu_port = netif_is_bridge_master(orig_dev);
struct cpsw_common *cpsw = priv->cpsw;
int unreg_mcast_mask = 0;
int reg_mcast_mask = 0;
int untag_mask = 0;
int port_mask;
int ret = 0;
u32 flags;
if (cpu_port) {
port_mask = BIT(HOST_PORT_NUM);
flags = orig_dev->flags;
unreg_mcast_mask = port_mask;
} else {
port_mask = BIT(priv->emac_port);
flags = priv->ndev->flags;
}
if (flags & IFF_MULTICAST)
reg_mcast_mask = port_mask;
if (untag)
untag_mask = port_mask;
ret = cpsw_ale_vlan_add_modify(cpsw->ale, vid, port_mask, untag_mask,
reg_mcast_mask, unreg_mcast_mask);
if (ret) {
dev_err(priv->dev, "Unable to add vlan\n");
return ret;
}
if (cpu_port)
cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr,
HOST_PORT_NUM, ALE_VLAN, vid);
if (!pvid)
return ret;
cpsw_set_pvid(priv, vid, 0, 0);
dev_dbg(priv->dev, "VID add: %s: vid:%u ports:%X\n",
priv->ndev->name, vid, port_mask);
return ret;
}
static int cpsw_port_vlan_del(struct cpsw_priv *priv, u16 vid,
struct net_device *orig_dev)
{
bool cpu_port = netif_is_bridge_master(orig_dev);
struct cpsw_common *cpsw = priv->cpsw;
int port_mask;
int ret = 0;
if (cpu_port)
port_mask = BIT(HOST_PORT_NUM);
else
port_mask = BIT(priv->emac_port);
ret = cpsw_ale_vlan_del_modify(cpsw->ale, vid, port_mask);
if (ret != 0)
return ret;
/* We don't care for the return value here, error is returned only if
* the unicast entry is not present
*/
if (cpu_port)
cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr,
HOST_PORT_NUM, ALE_VLAN, vid);
if (vid == cpsw_get_pvid(priv))
cpsw_set_pvid(priv, 0, 0, 0);
/* We don't care for the return value here, error is returned only if
* the multicast entry is not present
*/
cpsw_ale_del_mcast(cpsw->ale, priv->ndev->broadcast,
port_mask, ALE_VLAN, vid);
dev_dbg(priv->dev, "VID del: %s: vid:%u ports:%X\n",
priv->ndev->name, vid, port_mask);
return ret;
}
static int cpsw_port_vlans_add(struct cpsw_priv *priv,
net: switchdev: remove the transaction structure from port object notifiers Since the introduction of the switchdev API, port objects were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port object notifier structures, and converts drivers to not look at this member. Where driver conversion is trivial (like in the case of the Marvell Prestera driver, NXP DPAA2 switch, TI CPSW, and Rocker drivers), it is done in this patch. Where driver conversion needs more attention (DSA, Mellanox Spectrum), the conversion is left for subsequent patches and here we only fake the prepare/commit phases at a lower level, just not in the switchdev notifier itself. Where the code has a natural structure that is best left alone as a preparation and a commit phase (as in the case of the Ocelot switch), that structure is left in place, just made to not depend upon the switchdev transactional model. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 02:01:48 +02:00
const struct switchdev_obj_port_vlan *vlan)
{
bool untag = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
struct net_device *orig_dev = vlan->obj.orig_dev;
bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
dev_dbg(priv->dev, "VID add: %s: vid:%u flags:%X\n",
net: switchdev: remove vid_begin -> vid_end range from VLAN objects The call path of a switchdev VLAN addition to the bridge looks something like this today: nbp_vlan_init | __br_vlan_set_default_pvid | | | | | br_afspec | | | | | | | v | | | br_process_vlan_info | | | | | | | v | | | br_vlan_info | | | / \ / | | / \ / | | / \ / | | / \ / v v v v v nbp_vlan_add br_vlan_add ------+ | ^ ^ | | | / | | | | / / / | \ br_vlan_get_master/ / v \ ^ / / br_vlan_add_existing \ | / / | \ | / / / \ | / / / \ | / / / \ | / / / v | | v / __vlan_add / / | / / | / v | / __vlan_vid_add | / \ | / v v v br_switchdev_port_vlan_add The ranges UAPI was introduced to the bridge in commit bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests") (Jan 10 2015). But the VLAN ranges (parsed in br_afspec) have always been passed one by one, through struct bridge_vlan_info tmp_vinfo, to br_vlan_info. So the range never went too far in depth. Then Scott Feldman introduced the switchdev_port_bridge_setlink function in commit 47f8328bb1a4 ("switchdev: add new switchdev bridge setlink"). That marked the introduction of the SWITCHDEV_OBJ_PORT_VLAN, which made full use of the range. But switchdev_port_bridge_setlink was called like this: br_setlink -> br_afspec -> switchdev_port_bridge_setlink Basically, the switchdev and the bridge code were not tightly integrated. Then commit 41c498b9359e ("bridge: restore br_setlink back to original") came, and switchdev drivers were required to implement .ndo_bridge_setlink = switchdev_port_bridge_setlink for a while. In the meantime, commits such as 0944d6b5a2fa ("bridge: try switchdev op first in __vlan_vid_add/del") finally made switchdev penetrate the br_vlan_info() barrier and start to develop the call path we have today. But remember, br_vlan_info() still receives VLANs one by one. Then Arkadi Sharshevsky refactored the switchdev API in 2017 in commit 29ab586c3d83 ("net: switchdev: Remove bridge bypass support from switchdev") so that drivers would not implement .ndo_bridge_setlink any longer. The switchdev_port_bridge_setlink also got deleted. This refactoring removed the parallel bridge_setlink implementation from switchdev, and left the only switchdev VLAN objects to be the ones offloaded from __vlan_vid_add (basically RX filtering) and __vlan_add (the latter coming from commit 9c86ce2c1ae3 ("net: bridge: Notify about bridge VLANs")). That is to say, today the switchdev VLAN object ranges are not used in the kernel. Refactoring the above call path is a bit complicated, when the bridge VLAN call path is already a bit complicated. Let's go off and finish the job of commit 29ab586c3d83 by deleting the bogus iteration through the VLAN ranges from the drivers. Some aspects of this feature never made too much sense in the first place. For example, what is a range of VLANs all having the BRIDGE_VLAN_INFO_PVID flag supposed to mean, when a port can obviously have a single pvid? This particular configuration _is_ denied as of commit 6623c60dc28e ("bridge: vlan: enforce no pvid flag in vlan ranges"), but from an API perspective, the driver still has to play pretend, and only offload the vlan->vid_end as pvid. And the addition of a switchdev VLAN object can modify the flags of another, completely unrelated, switchdev VLAN object! (a VLAN that is PVID will invalidate the PVID flag from whatever other VLAN had previously been offloaded with switchdev and had that flag. Yet switchdev never notifies about that change, drivers are supposed to guess). Nonetheless, having a VLAN range in the API makes error handling look scarier than it really is - unwinding on errors and all of that. When in reality, no one really calls this API with more than one VLAN. It is all unnecessary complexity. And despite appearing pretentious (two-phase transactional model and all), the switchdev API is really sloppy because the VLAN addition and removal operations are not paired with one another (you can add a VLAN 100 times and delete it just once). The bridge notifies through switchdev of a VLAN addition not only when the flags of an existing VLAN change, but also when nothing changes. There are switchdev drivers out there who don't like adding a VLAN that has already been added, and those checks don't really belong at driver level. But the fact that the API contains ranges is yet another factor that prevents this from being addressed in the future. Of the existing switchdev pieces of hardware, it appears that only Mellanox Spectrum supports offloading more than one VLAN at a time, through mlxsw_sp_port_vlan_set. I have kept that code internal to the driver, because there is some more bookkeeping that makes use of it, but I deleted it from the switchdev API. But since the switchdev support for ranges has already been de facto deleted by a Mellanox employee and nobody noticed for 4 years, I'm going to assume it's not a biggie. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> # switchdev and mlxsw Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 02:01:46 +02:00
priv->ndev->name, vlan->vid, vlan->flags);
net: switchdev: remove vid_begin -> vid_end range from VLAN objects The call path of a switchdev VLAN addition to the bridge looks something like this today: nbp_vlan_init | __br_vlan_set_default_pvid | | | | | br_afspec | | | | | | | v | | | br_process_vlan_info | | | | | | | v | | | br_vlan_info | | | / \ / | | / \ / | | / \ / | | / \ / v v v v v nbp_vlan_add br_vlan_add ------+ | ^ ^ | | | / | | | | / / / | \ br_vlan_get_master/ / v \ ^ / / br_vlan_add_existing \ | / / | \ | / / / \ | / / / \ | / / / \ | / / / v | | v / __vlan_add / / | / / | / v | / __vlan_vid_add | / \ | / v v v br_switchdev_port_vlan_add The ranges UAPI was introduced to the bridge in commit bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests") (Jan 10 2015). But the VLAN ranges (parsed in br_afspec) have always been passed one by one, through struct bridge_vlan_info tmp_vinfo, to br_vlan_info. So the range never went too far in depth. Then Scott Feldman introduced the switchdev_port_bridge_setlink function in commit 47f8328bb1a4 ("switchdev: add new switchdev bridge setlink"). That marked the introduction of the SWITCHDEV_OBJ_PORT_VLAN, which made full use of the range. But switchdev_port_bridge_setlink was called like this: br_setlink -> br_afspec -> switchdev_port_bridge_setlink Basically, the switchdev and the bridge code were not tightly integrated. Then commit 41c498b9359e ("bridge: restore br_setlink back to original") came, and switchdev drivers were required to implement .ndo_bridge_setlink = switchdev_port_bridge_setlink for a while. In the meantime, commits such as 0944d6b5a2fa ("bridge: try switchdev op first in __vlan_vid_add/del") finally made switchdev penetrate the br_vlan_info() barrier and start to develop the call path we have today. But remember, br_vlan_info() still receives VLANs one by one. Then Arkadi Sharshevsky refactored the switchdev API in 2017 in commit 29ab586c3d83 ("net: switchdev: Remove bridge bypass support from switchdev") so that drivers would not implement .ndo_bridge_setlink any longer. The switchdev_port_bridge_setlink also got deleted. This refactoring removed the parallel bridge_setlink implementation from switchdev, and left the only switchdev VLAN objects to be the ones offloaded from __vlan_vid_add (basically RX filtering) and __vlan_add (the latter coming from commit 9c86ce2c1ae3 ("net: bridge: Notify about bridge VLANs")). That is to say, today the switchdev VLAN object ranges are not used in the kernel. Refactoring the above call path is a bit complicated, when the bridge VLAN call path is already a bit complicated. Let's go off and finish the job of commit 29ab586c3d83 by deleting the bogus iteration through the VLAN ranges from the drivers. Some aspects of this feature never made too much sense in the first place. For example, what is a range of VLANs all having the BRIDGE_VLAN_INFO_PVID flag supposed to mean, when a port can obviously have a single pvid? This particular configuration _is_ denied as of commit 6623c60dc28e ("bridge: vlan: enforce no pvid flag in vlan ranges"), but from an API perspective, the driver still has to play pretend, and only offload the vlan->vid_end as pvid. And the addition of a switchdev VLAN object can modify the flags of another, completely unrelated, switchdev VLAN object! (a VLAN that is PVID will invalidate the PVID flag from whatever other VLAN had previously been offloaded with switchdev and had that flag. Yet switchdev never notifies about that change, drivers are supposed to guess). Nonetheless, having a VLAN range in the API makes error handling look scarier than it really is - unwinding on errors and all of that. When in reality, no one really calls this API with more than one VLAN. It is all unnecessary complexity. And despite appearing pretentious (two-phase transactional model and all), the switchdev API is really sloppy because the VLAN addition and removal operations are not paired with one another (you can add a VLAN 100 times and delete it just once). The bridge notifies through switchdev of a VLAN addition not only when the flags of an existing VLAN change, but also when nothing changes. There are switchdev drivers out there who don't like adding a VLAN that has already been added, and those checks don't really belong at driver level. But the fact that the API contains ranges is yet another factor that prevents this from being addressed in the future. Of the existing switchdev pieces of hardware, it appears that only Mellanox Spectrum supports offloading more than one VLAN at a time, through mlxsw_sp_port_vlan_set. I have kept that code internal to the driver, because there is some more bookkeeping that makes use of it, but I deleted it from the switchdev API. But since the switchdev support for ranges has already been de facto deleted by a Mellanox employee and nobody noticed for 4 years, I'm going to assume it's not a biggie. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> # switchdev and mlxsw Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 02:01:46 +02:00
return cpsw_port_vlan_add(priv, untag, pvid, vlan->vid, orig_dev);
}
static int cpsw_port_mdb_add(struct cpsw_priv *priv,
net: switchdev: remove the transaction structure from port object notifiers Since the introduction of the switchdev API, port objects were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port object notifier structures, and converts drivers to not look at this member. Where driver conversion is trivial (like in the case of the Marvell Prestera driver, NXP DPAA2 switch, TI CPSW, and Rocker drivers), it is done in this patch. Where driver conversion needs more attention (DSA, Mellanox Spectrum), the conversion is left for subsequent patches and here we only fake the prepare/commit phases at a lower level, just not in the switchdev notifier itself. Where the code has a natural structure that is best left alone as a preparation and a commit phase (as in the case of the Ocelot switch), that structure is left in place, just made to not depend upon the switchdev transactional model. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 02:01:48 +02:00
struct switchdev_obj_port_mdb *mdb)
{
struct net_device *orig_dev = mdb->obj.orig_dev;
bool cpu_port = netif_is_bridge_master(orig_dev);
struct cpsw_common *cpsw = priv->cpsw;
int port_mask;
int err;
if (cpu_port)
port_mask = BIT(HOST_PORT_NUM);
else
port_mask = BIT(priv->emac_port);
err = cpsw_ale_add_mcast(cpsw->ale, mdb->addr, port_mask,
ALE_VLAN, mdb->vid, 0);
dev_dbg(priv->dev, "MDB add: %s: vid %u:%pM ports: %X\n",
priv->ndev->name, mdb->vid, mdb->addr, port_mask);
return err;
}
static int cpsw_port_mdb_del(struct cpsw_priv *priv,
struct switchdev_obj_port_mdb *mdb)
{
struct net_device *orig_dev = mdb->obj.orig_dev;
bool cpu_port = netif_is_bridge_master(orig_dev);
struct cpsw_common *cpsw = priv->cpsw;
int del_mask;
int err;
if (cpu_port)
del_mask = BIT(HOST_PORT_NUM);
else
del_mask = BIT(priv->emac_port);
err = cpsw_ale_del_mcast(cpsw->ale, mdb->addr, del_mask,
ALE_VLAN, mdb->vid);
dev_dbg(priv->dev, "MDB del: %s: vid %u:%pM ports: %X\n",
priv->ndev->name, mdb->vid, mdb->addr, del_mask);
return err;
}
static int cpsw_port_obj_add(struct net_device *ndev, const void *ctx,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack)
{
struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
struct switchdev_obj_port_mdb *mdb = SWITCHDEV_OBJ_PORT_MDB(obj);
struct cpsw_priv *priv = netdev_priv(ndev);
int err = 0;
dev_dbg(priv->dev, "obj_add: id %u port: %u\n",
obj->id, priv->emac_port);
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_VLAN:
net: switchdev: remove the transaction structure from port object notifiers Since the introduction of the switchdev API, port objects were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port object notifier structures, and converts drivers to not look at this member. Where driver conversion is trivial (like in the case of the Marvell Prestera driver, NXP DPAA2 switch, TI CPSW, and Rocker drivers), it is done in this patch. Where driver conversion needs more attention (DSA, Mellanox Spectrum), the conversion is left for subsequent patches and here we only fake the prepare/commit phases at a lower level, just not in the switchdev notifier itself. Where the code has a natural structure that is best left alone as a preparation and a commit phase (as in the case of the Ocelot switch), that structure is left in place, just made to not depend upon the switchdev transactional model. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 02:01:48 +02:00
err = cpsw_port_vlans_add(priv, vlan);
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
case SWITCHDEV_OBJ_ID_HOST_MDB:
net: switchdev: remove the transaction structure from port object notifiers Since the introduction of the switchdev API, port objects were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port object notifier structures, and converts drivers to not look at this member. Where driver conversion is trivial (like in the case of the Marvell Prestera driver, NXP DPAA2 switch, TI CPSW, and Rocker drivers), it is done in this patch. Where driver conversion needs more attention (DSA, Mellanox Spectrum), the conversion is left for subsequent patches and here we only fake the prepare/commit phases at a lower level, just not in the switchdev notifier itself. Where the code has a natural structure that is best left alone as a preparation and a commit phase (as in the case of the Ocelot switch), that structure is left in place, just made to not depend upon the switchdev transactional model. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 02:01:48 +02:00
err = cpsw_port_mdb_add(priv, mdb);
break;
default:
err = -EOPNOTSUPP;
break;
}
return err;
}
static int cpsw_port_obj_del(struct net_device *ndev, const void *ctx,
const struct switchdev_obj *obj)
{
struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
struct switchdev_obj_port_mdb *mdb = SWITCHDEV_OBJ_PORT_MDB(obj);
struct cpsw_priv *priv = netdev_priv(ndev);
int err = 0;
dev_dbg(priv->dev, "obj_del: id %u port: %u\n",
obj->id, priv->emac_port);
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_VLAN:
net: switchdev: remove vid_begin -> vid_end range from VLAN objects The call path of a switchdev VLAN addition to the bridge looks something like this today: nbp_vlan_init | __br_vlan_set_default_pvid | | | | | br_afspec | | | | | | | v | | | br_process_vlan_info | | | | | | | v | | | br_vlan_info | | | / \ / | | / \ / | | / \ / | | / \ / v v v v v nbp_vlan_add br_vlan_add ------+ | ^ ^ | | | / | | | | / / / | \ br_vlan_get_master/ / v \ ^ / / br_vlan_add_existing \ | / / | \ | / / / \ | / / / \ | / / / \ | / / / v | | v / __vlan_add / / | / / | / v | / __vlan_vid_add | / \ | / v v v br_switchdev_port_vlan_add The ranges UAPI was introduced to the bridge in commit bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests") (Jan 10 2015). But the VLAN ranges (parsed in br_afspec) have always been passed one by one, through struct bridge_vlan_info tmp_vinfo, to br_vlan_info. So the range never went too far in depth. Then Scott Feldman introduced the switchdev_port_bridge_setlink function in commit 47f8328bb1a4 ("switchdev: add new switchdev bridge setlink"). That marked the introduction of the SWITCHDEV_OBJ_PORT_VLAN, which made full use of the range. But switchdev_port_bridge_setlink was called like this: br_setlink -> br_afspec -> switchdev_port_bridge_setlink Basically, the switchdev and the bridge code were not tightly integrated. Then commit 41c498b9359e ("bridge: restore br_setlink back to original") came, and switchdev drivers were required to implement .ndo_bridge_setlink = switchdev_port_bridge_setlink for a while. In the meantime, commits such as 0944d6b5a2fa ("bridge: try switchdev op first in __vlan_vid_add/del") finally made switchdev penetrate the br_vlan_info() barrier and start to develop the call path we have today. But remember, br_vlan_info() still receives VLANs one by one. Then Arkadi Sharshevsky refactored the switchdev API in 2017 in commit 29ab586c3d83 ("net: switchdev: Remove bridge bypass support from switchdev") so that drivers would not implement .ndo_bridge_setlink any longer. The switchdev_port_bridge_setlink also got deleted. This refactoring removed the parallel bridge_setlink implementation from switchdev, and left the only switchdev VLAN objects to be the ones offloaded from __vlan_vid_add (basically RX filtering) and __vlan_add (the latter coming from commit 9c86ce2c1ae3 ("net: bridge: Notify about bridge VLANs")). That is to say, today the switchdev VLAN object ranges are not used in the kernel. Refactoring the above call path is a bit complicated, when the bridge VLAN call path is already a bit complicated. Let's go off and finish the job of commit 29ab586c3d83 by deleting the bogus iteration through the VLAN ranges from the drivers. Some aspects of this feature never made too much sense in the first place. For example, what is a range of VLANs all having the BRIDGE_VLAN_INFO_PVID flag supposed to mean, when a port can obviously have a single pvid? This particular configuration _is_ denied as of commit 6623c60dc28e ("bridge: vlan: enforce no pvid flag in vlan ranges"), but from an API perspective, the driver still has to play pretend, and only offload the vlan->vid_end as pvid. And the addition of a switchdev VLAN object can modify the flags of another, completely unrelated, switchdev VLAN object! (a VLAN that is PVID will invalidate the PVID flag from whatever other VLAN had previously been offloaded with switchdev and had that flag. Yet switchdev never notifies about that change, drivers are supposed to guess). Nonetheless, having a VLAN range in the API makes error handling look scarier than it really is - unwinding on errors and all of that. When in reality, no one really calls this API with more than one VLAN. It is all unnecessary complexity. And despite appearing pretentious (two-phase transactional model and all), the switchdev API is really sloppy because the VLAN addition and removal operations are not paired with one another (you can add a VLAN 100 times and delete it just once). The bridge notifies through switchdev of a VLAN addition not only when the flags of an existing VLAN change, but also when nothing changes. There are switchdev drivers out there who don't like adding a VLAN that has already been added, and those checks don't really belong at driver level. But the fact that the API contains ranges is yet another factor that prevents this from being addressed in the future. Of the existing switchdev pieces of hardware, it appears that only Mellanox Spectrum supports offloading more than one VLAN at a time, through mlxsw_sp_port_vlan_set. I have kept that code internal to the driver, because there is some more bookkeeping that makes use of it, but I deleted it from the switchdev API. But since the switchdev support for ranges has already been de facto deleted by a Mellanox employee and nobody noticed for 4 years, I'm going to assume it's not a biggie. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> # switchdev and mlxsw Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 02:01:46 +02:00
err = cpsw_port_vlan_del(priv, vlan->vid, vlan->obj.orig_dev);
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
case SWITCHDEV_OBJ_ID_HOST_MDB:
err = cpsw_port_mdb_del(priv, mdb);
break;
default:
err = -EOPNOTSUPP;
break;
}
return err;
}
static void cpsw_fdb_offload_notify(struct net_device *ndev,
struct switchdev_notifier_fdb_info *rcv)
{
struct switchdev_notifier_fdb_info info = {};
info.addr = rcv->addr;
info.vid = rcv->vid;
info.offloaded = true;
call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED,
ndev, &info.info, NULL);
}
static void cpsw_switchdev_event_work(struct work_struct *work)
{
struct cpsw_switchdev_event_work *switchdev_work =
container_of(work, struct cpsw_switchdev_event_work, work);
struct cpsw_priv *priv = switchdev_work->priv;
struct switchdev_notifier_fdb_info *fdb;
struct cpsw_common *cpsw = priv->cpsw;
int port = priv->emac_port;
rtnl_lock();
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
fdb = &switchdev_work->fdb_info;
dev_dbg(cpsw->dev, "cpsw_fdb_add: MACID = %pM vid = %u flags = %u %u -- port %d\n",
fdb->addr, fdb->vid, fdb->added_by_user,
fdb->offloaded, port);
if (!fdb->added_by_user || fdb->is_local)
break;
if (memcmp(priv->mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0)
port = HOST_PORT_NUM;
cpsw_ale_add_ucast(cpsw->ale, (u8 *)fdb->addr, port,
fdb->vid ? ALE_VLAN : 0, fdb->vid);
cpsw_fdb_offload_notify(priv->ndev, fdb);
break;
case SWITCHDEV_FDB_DEL_TO_DEVICE:
fdb = &switchdev_work->fdb_info;
dev_dbg(cpsw->dev, "cpsw_fdb_del: MACID = %pM vid = %u flags = %u %u -- port %d\n",
fdb->addr, fdb->vid, fdb->added_by_user,
fdb->offloaded, port);
if (!fdb->added_by_user || fdb->is_local)
break;
if (memcmp(priv->mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0)
port = HOST_PORT_NUM;
cpsw_ale_del_ucast(cpsw->ale, (u8 *)fdb->addr, port,
fdb->vid ? ALE_VLAN : 0, fdb->vid);
break;
default:
break;
}
rtnl_unlock();
kfree(switchdev_work->fdb_info.addr);
kfree(switchdev_work);
dev_put(priv->ndev);
}
/* called under rcu_read_lock() */
static int cpsw_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *ndev = switchdev_notifier_info_to_dev(ptr);
struct switchdev_notifier_fdb_info *fdb_info = ptr;
struct cpsw_switchdev_event_work *switchdev_work;
struct cpsw_priv *priv = netdev_priv(ndev);
int err;
if (event == SWITCHDEV_PORT_ATTR_SET) {
err = switchdev_handle_port_attr_set(ndev, ptr,
cpsw_port_dev_check,
cpsw_port_attr_set);
return notifier_from_errno(err);
}
if (!cpsw_port_dev_check(ndev))
return NOTIFY_DONE;
switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
if (WARN_ON(!switchdev_work))
return NOTIFY_BAD;
INIT_WORK(&switchdev_work->work, cpsw_switchdev_event_work);
switchdev_work->priv = priv;
switchdev_work->event = event;
switch (event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
case SWITCHDEV_FDB_DEL_TO_DEVICE:
memcpy(&switchdev_work->fdb_info, ptr,
sizeof(switchdev_work->fdb_info));
switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
if (!switchdev_work->fdb_info.addr)
goto err_addr_alloc;
ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
fdb_info->addr);
dev_hold(ndev);
break;
default:
kfree(switchdev_work);
return NOTIFY_DONE;
}
queue_work(system_long_wq, &switchdev_work->work);
return NOTIFY_DONE;
err_addr_alloc:
kfree(switchdev_work);
return NOTIFY_BAD;
}
static struct notifier_block cpsw_switchdev_notifier = {
.notifier_call = cpsw_switchdev_event,
};
static int cpsw_switchdev_blocking_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
int err;
switch (event) {
case SWITCHDEV_PORT_OBJ_ADD:
err = switchdev_handle_port_obj_add(dev, ptr,
cpsw_port_dev_check,
cpsw_port_obj_add);
return notifier_from_errno(err);
case SWITCHDEV_PORT_OBJ_DEL:
err = switchdev_handle_port_obj_del(dev, ptr,
cpsw_port_dev_check,
cpsw_port_obj_del);
return notifier_from_errno(err);
case SWITCHDEV_PORT_ATTR_SET:
err = switchdev_handle_port_attr_set(dev, ptr,
cpsw_port_dev_check,
cpsw_port_attr_set);
return notifier_from_errno(err);
default:
break;
}
return NOTIFY_DONE;
}
static struct notifier_block cpsw_switchdev_bl_notifier = {
.notifier_call = cpsw_switchdev_blocking_event,
};
int cpsw_switchdev_register_notifiers(struct cpsw_common *cpsw)
{
int ret = 0;
ret = register_switchdev_notifier(&cpsw_switchdev_notifier);
if (ret) {
dev_err(cpsw->dev, "register switchdev notifier fail ret:%d\n",
ret);
return ret;
}
ret = register_switchdev_blocking_notifier(&cpsw_switchdev_bl_notifier);
if (ret) {
dev_err(cpsw->dev, "register switchdev blocking notifier ret:%d\n",
ret);
unregister_switchdev_notifier(&cpsw_switchdev_notifier);
}
return ret;
}
void cpsw_switchdev_unregister_notifiers(struct cpsw_common *cpsw)
{
unregister_switchdev_blocking_notifier(&cpsw_switchdev_bl_notifier);
unregister_switchdev_notifier(&cpsw_switchdev_notifier);
}