2019-05-02 23:23:30 +03:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/* Copyright (c) 2018, Sensor-Technik Wiedemann GmbH
|
|
|
|
* Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
|
|
|
|
#include <linux/delay.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/printk.h>
|
|
|
|
#include <linux/spi/spi.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/gpio/consumer.h>
|
2019-05-02 23:23:38 +03:00
|
|
|
#include <linux/phylink.h>
|
2019-05-02 23:23:30 +03:00
|
|
|
#include <linux/of.h>
|
|
|
|
#include <linux/of_net.h>
|
|
|
|
#include <linux/of_mdio.h>
|
2021-06-11 23:05:28 +03:00
|
|
|
#include <linux/pcs/pcs-xpcs.h>
|
2019-05-02 23:23:30 +03:00
|
|
|
#include <linux/netdev_features.h>
|
|
|
|
#include <linux/netdevice.h>
|
|
|
|
#include <linux/if_bridge.h>
|
|
|
|
#include <linux/if_ether.h>
|
2019-05-05 13:19:27 +03:00
|
|
|
#include <linux/dsa/8021q.h>
|
2023-11-28 19:50:27 +02:00
|
|
|
#include <linux/units.h>
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
#include "sja1105.h"
|
2019-09-15 05:00:02 +03:00
|
|
|
#include "sja1105_tas.h"
|
2019-05-02 23:23:30 +03:00
|
|
|
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
#define SJA1105_UNKNOWN_MULTICAST 0x010000000000ull
|
|
|
|
|
2021-09-22 18:10:29 +03:00
|
|
|
/* Configure the optional reset pin and bring up switch */
|
|
|
|
static int sja1105_hw_reset(struct device *dev, unsigned int pulse_len,
|
|
|
|
unsigned int startup_delay)
|
2019-05-02 23:23:30 +03:00
|
|
|
{
|
2021-09-22 18:10:29 +03:00
|
|
|
struct gpio_desc *gpio;
|
|
|
|
|
|
|
|
gpio = gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
|
|
|
|
if (IS_ERR(gpio))
|
|
|
|
return PTR_ERR(gpio);
|
|
|
|
|
|
|
|
if (!gpio)
|
|
|
|
return 0;
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
gpiod_set_value_cansleep(gpio, 1);
|
|
|
|
/* Wait for minimum reset pulse length */
|
|
|
|
msleep(pulse_len);
|
|
|
|
gpiod_set_value_cansleep(gpio, 0);
|
|
|
|
/* Wait until chip is ready after reset */
|
|
|
|
msleep(startup_delay);
|
2021-09-22 18:10:29 +03:00
|
|
|
|
|
|
|
gpiod_put(gpio);
|
|
|
|
|
|
|
|
return 0;
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
sja1105_port_allow_traffic(struct sja1105_l2_forwarding_entry *l2_fwd,
|
|
|
|
int from, int to, bool allow)
|
|
|
|
{
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
if (allow)
|
2019-05-02 23:23:30 +03:00
|
|
|
l2_fwd[from].reach_port |= BIT(to);
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
else
|
2019-05-02 23:23:30 +03:00
|
|
|
l2_fwd[from].reach_port &= ~BIT(to);
|
|
|
|
}
|
|
|
|
|
2021-02-16 13:41:19 +02:00
|
|
|
static bool sja1105_can_forward(struct sja1105_l2_forwarding_entry *l2_fwd,
|
|
|
|
int from, int to)
|
|
|
|
{
|
|
|
|
return !!(l2_fwd[from].reach_port & BIT(to));
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: make sure untagged packets are dropped on ingress ports with no pvid
Surprisingly, this configuration:
ip link add br0 type bridge vlan_filtering 1
ip link set swp2 master br0
bridge vlan del dev swp2 vid 1
still has the sja1105 switch sending untagged packets to the CPU (and
failing to decode them, since dsa_find_designated_bridge_port_by_vid
searches by VID 1 and rightfully finds no bridge VLAN 1 on a port).
Dumping the switch configuration, the VLANs are managed properly:
- the pvid of swp2 is 1 in the MAC Configuration Table, but
- only the CPU port is in the port membership of VLANID 1 in the VLAN
Lookup Table
When the ingress packets are tagged with VID 1, they are properly
dropped. But when they are untagged, they are able to reach the CPU
port. Also, when the pvid in the MAC Configuration Table is changed to
e.g. 55 (an unused VLAN), the untagged packets are also dropped.
So it looks like:
- the switch bypasses ingress VLAN membership checks for untagged traffic
- the reason why the untagged traffic is dropped when I make the pvid 55
is due to the lack of valid destination ports in VLAN 55, rather than
an ingress membership violation
- the ingress VLAN membership cheks are only done for VLAN-tagged traffic
Interesting. It looks like there is an explicit bit to drop untagged
traffic, so we should probably be using that to preserve user expectations.
Note that only VLAN-aware ports should drop untagged packets due to no
pvid - when VLAN-unaware, the software bridge doesn't do this even if
there is no pvid on any bridge port and on the bridge itself. So the new
sja1105_drop_untagged() function cannot simply be called with "false"
from sja1105_bridge_vlan_add() and with "true" from sja1105_bridge_vlan_del.
Instead, we need to also consider the VLAN awareness state. That means
we need to hook the "drop untagged" setting in all the same places where
the "commit pvid" logic is, and it needs to factor in all the state when
flipping the "drop untagged" bit: is our current pvid in the VLAN Lookup
Table, and is the current port in that VLAN's port membership list?
VLAN-unaware ports will never drop untagged frames because these checks
always succeed by construction, and the tag_8021q VLANs cannot be changed
by the user.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-29 00:54:28 +03:00
|
|
|
static int sja1105_is_vlan_configured(struct sja1105_private *priv, u16 vid)
|
|
|
|
{
|
|
|
|
struct sja1105_vlan_lookup_entry *vlan;
|
|
|
|
int count, i;
|
|
|
|
|
|
|
|
vlan = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entries;
|
|
|
|
count = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entry_count;
|
|
|
|
|
|
|
|
for (i = 0; i < count; i++)
|
|
|
|
if (vlan[i].vlanid == vid)
|
|
|
|
return i;
|
|
|
|
|
|
|
|
/* Return an invalid entry index if not found */
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_drop_untagged(struct dsa_switch *ds, int port, bool drop)
|
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
struct sja1105_mac_config_entry *mac;
|
|
|
|
|
|
|
|
mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
|
|
|
|
|
|
|
|
if (mac[port].drpuntag == drop)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
mac[port].drpuntag = drop;
|
|
|
|
|
|
|
|
return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
|
|
|
|
&mac[port], true);
|
|
|
|
}
|
|
|
|
|
2021-07-29 00:54:27 +03:00
|
|
|
static int sja1105_pvid_apply(struct sja1105_private *priv, int port, u16 pvid)
|
|
|
|
{
|
|
|
|
struct sja1105_mac_config_entry *mac;
|
|
|
|
|
|
|
|
mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
|
|
|
|
|
|
|
|
if (mac[port].vlanid == pvid)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
mac[port].vlanid = pvid;
|
|
|
|
|
|
|
|
return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
|
|
|
|
&mac[port], true);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_commit_pvid(struct dsa_switch *ds, int port)
|
|
|
|
{
|
|
|
|
struct dsa_port *dp = dsa_to_port(ds, port);
|
2021-12-06 18:57:53 +02:00
|
|
|
struct net_device *br = dsa_port_bridge_dev_get(dp);
|
2021-07-29 00:54:27 +03:00
|
|
|
struct sja1105_private *priv = ds->priv;
|
net: dsa: sja1105: make sure untagged packets are dropped on ingress ports with no pvid
Surprisingly, this configuration:
ip link add br0 type bridge vlan_filtering 1
ip link set swp2 master br0
bridge vlan del dev swp2 vid 1
still has the sja1105 switch sending untagged packets to the CPU (and
failing to decode them, since dsa_find_designated_bridge_port_by_vid
searches by VID 1 and rightfully finds no bridge VLAN 1 on a port).
Dumping the switch configuration, the VLANs are managed properly:
- the pvid of swp2 is 1 in the MAC Configuration Table, but
- only the CPU port is in the port membership of VLANID 1 in the VLAN
Lookup Table
When the ingress packets are tagged with VID 1, they are properly
dropped. But when they are untagged, they are able to reach the CPU
port. Also, when the pvid in the MAC Configuration Table is changed to
e.g. 55 (an unused VLAN), the untagged packets are also dropped.
So it looks like:
- the switch bypasses ingress VLAN membership checks for untagged traffic
- the reason why the untagged traffic is dropped when I make the pvid 55
is due to the lack of valid destination ports in VLAN 55, rather than
an ingress membership violation
- the ingress VLAN membership cheks are only done for VLAN-tagged traffic
Interesting. It looks like there is an explicit bit to drop untagged
traffic, so we should probably be using that to preserve user expectations.
Note that only VLAN-aware ports should drop untagged packets due to no
pvid - when VLAN-unaware, the software bridge doesn't do this even if
there is no pvid on any bridge port and on the bridge itself. So the new
sja1105_drop_untagged() function cannot simply be called with "false"
from sja1105_bridge_vlan_add() and with "true" from sja1105_bridge_vlan_del.
Instead, we need to also consider the VLAN awareness state. That means
we need to hook the "drop untagged" setting in all the same places where
the "commit pvid" logic is, and it needs to factor in all the state when
flipping the "drop untagged" bit: is our current pvid in the VLAN Lookup
Table, and is the current port in that VLAN's port membership list?
VLAN-unaware ports will never drop untagged frames because these checks
always succeed by construction, and the tag_8021q VLANs cannot be changed
by the user.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-29 00:54:28 +03:00
|
|
|
struct sja1105_vlan_lookup_entry *vlan;
|
|
|
|
bool drop_untagged = false;
|
|
|
|
int match, rc;
|
2021-07-29 00:54:27 +03:00
|
|
|
u16 pvid;
|
|
|
|
|
2021-12-06 18:57:53 +02:00
|
|
|
if (br && br_vlan_enabled(br))
|
2021-07-29 00:54:27 +03:00
|
|
|
pvid = priv->bridge_pvid[port];
|
|
|
|
else
|
|
|
|
pvid = priv->tag_8021q_pvid[port];
|
|
|
|
|
net: dsa: sja1105: make sure untagged packets are dropped on ingress ports with no pvid
Surprisingly, this configuration:
ip link add br0 type bridge vlan_filtering 1
ip link set swp2 master br0
bridge vlan del dev swp2 vid 1
still has the sja1105 switch sending untagged packets to the CPU (and
failing to decode them, since dsa_find_designated_bridge_port_by_vid
searches by VID 1 and rightfully finds no bridge VLAN 1 on a port).
Dumping the switch configuration, the VLANs are managed properly:
- the pvid of swp2 is 1 in the MAC Configuration Table, but
- only the CPU port is in the port membership of VLANID 1 in the VLAN
Lookup Table
When the ingress packets are tagged with VID 1, they are properly
dropped. But when they are untagged, they are able to reach the CPU
port. Also, when the pvid in the MAC Configuration Table is changed to
e.g. 55 (an unused VLAN), the untagged packets are also dropped.
So it looks like:
- the switch bypasses ingress VLAN membership checks for untagged traffic
- the reason why the untagged traffic is dropped when I make the pvid 55
is due to the lack of valid destination ports in VLAN 55, rather than
an ingress membership violation
- the ingress VLAN membership cheks are only done for VLAN-tagged traffic
Interesting. It looks like there is an explicit bit to drop untagged
traffic, so we should probably be using that to preserve user expectations.
Note that only VLAN-aware ports should drop untagged packets due to no
pvid - when VLAN-unaware, the software bridge doesn't do this even if
there is no pvid on any bridge port and on the bridge itself. So the new
sja1105_drop_untagged() function cannot simply be called with "false"
from sja1105_bridge_vlan_add() and with "true" from sja1105_bridge_vlan_del.
Instead, we need to also consider the VLAN awareness state. That means
we need to hook the "drop untagged" setting in all the same places where
the "commit pvid" logic is, and it needs to factor in all the state when
flipping the "drop untagged" bit: is our current pvid in the VLAN Lookup
Table, and is the current port in that VLAN's port membership list?
VLAN-unaware ports will never drop untagged frames because these checks
always succeed by construction, and the tag_8021q VLANs cannot be changed
by the user.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-29 00:54:28 +03:00
|
|
|
rc = sja1105_pvid_apply(priv, port, pvid);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
net: dsa: sja1105: prevent tag_8021q VLANs from being received on user ports
Currently it is possible for an attacker to craft packets with a fake
DSA tag and send them to us, and our user ports will accept them and
preserve that VLAN when transmitting towards the CPU. Then the tagger
will be misled into thinking that the packets came on a different port
than they really came on.
Up until recently there wasn't a good option to prevent this from
happening. In SJA1105P and later, the MAC Configuration Table introduced
two options called:
- DRPSITAG: Drop Single Inner Tagged Frames
- DRPSOTAG: Drop Single Outer Tagged Frames
Because the sja1105 driver classifies all VLANs as "outer VLANs" (S-Tags),
it would be in principle possible to enable the DRPSOTAG bit on ports
using tag_8021q, and drop on ingress all packets which have a VLAN tag.
When the switch is VLAN-unaware, this works, because it uses a custom
TPID of 0xdadb, so any "tagged" packets received on a user port are
probably a spoofing attempt. But when the switch overall is VLAN-aware,
and some ports are standalone (therefore they use tag_8021q), the TPID
is 0x8100, and the port can receive a mix of untagged and VLAN-tagged
packets. The untagged ones will be classified to the tag_8021q pvid, and
the tagged ones to the VLAN ID from the packet header. Yes, it is true
that since commit 4fbc08bd3665 ("net: dsa: sja1105: deny 8021q uppers on
ports") we no longer support this mixed mode, but that is a temporary
limitation which will eventually be lifted. It would be nice to not
introduce one more restriction via DRPSOTAG, which would make the
standalone ports of a VLAN-aware switch drop genuinely VLAN-tagged
packets.
Also, the DRPSOTAG bit is not available on the first generation of
switches (SJA1105E, SJA1105T). So since one of the key features of this
driver is compatibility across switch generations, this makes it an even
less desirable approach.
The breakthrough comes from commit bef0746cf4cc ("net: dsa: sja1105:
make sure untagged packets are dropped on ingress ports with no pvid"),
where it became obvious that untagged packets are not dropped even if
the ingress port is not in the VMEMB_PORT vector of that port's pvid.
However, VLAN-tagged packets are subject to VLAN ingress
checking/dropping. This means that instead of using the catch-all
DRPSOTAG bit introduced in SJA1105P, we can drop tagged packets on a
per-VLAN basis, and this is already compatible with SJA1105E/T.
This patch adds an "allowed_ingress" argument to sja1105_vlan_add(), and
we call it with "false" for tag_8021q VLANs on user ports. The tag_8021q
VLANs still need to be allowed, of course, on ingress to DSA ports and
CPU ports.
We also need to refine the drop_untagged check in sja1105_commit_pvid to
make it not freak out about this new configuration. Currently it will
try to keep the configuration consistent between untagged and pvid-tagged
packets, so if the pvid of a port is 1 but VLAN 1 is not in VMEMB_PORT,
packets tagged with VID 1 will behave the same as untagged packets, and
be dropped. This behavior is what we want for ports under a VLAN-aware
bridge, but for the ports with a tag_8021q pvid, we want untagged
packets to be accepted, but packets tagged with a header recognized by
the switch as a tag_8021q VLAN to be dropped. So only restrict the
drop_untagged check to apply to the bridge_pvid, not to the tag_8021q_pvid.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-24 20:15:00 +03:00
|
|
|
/* Only force dropping of untagged packets when the port is under a
|
|
|
|
* VLAN-aware bridge. When the tag_8021q pvid is used, we are
|
|
|
|
* deliberately removing the RX VLAN from the port's VMEMB_PORT list,
|
|
|
|
* to prevent DSA tag spoofing from the link partner. Untagged packets
|
|
|
|
* are the only ones that should be received with tag_8021q, so
|
|
|
|
* definitely don't drop them.
|
|
|
|
*/
|
|
|
|
if (pvid == priv->bridge_pvid[port]) {
|
|
|
|
vlan = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entries;
|
net: dsa: sja1105: make sure untagged packets are dropped on ingress ports with no pvid
Surprisingly, this configuration:
ip link add br0 type bridge vlan_filtering 1
ip link set swp2 master br0
bridge vlan del dev swp2 vid 1
still has the sja1105 switch sending untagged packets to the CPU (and
failing to decode them, since dsa_find_designated_bridge_port_by_vid
searches by VID 1 and rightfully finds no bridge VLAN 1 on a port).
Dumping the switch configuration, the VLANs are managed properly:
- the pvid of swp2 is 1 in the MAC Configuration Table, but
- only the CPU port is in the port membership of VLANID 1 in the VLAN
Lookup Table
When the ingress packets are tagged with VID 1, they are properly
dropped. But when they are untagged, they are able to reach the CPU
port. Also, when the pvid in the MAC Configuration Table is changed to
e.g. 55 (an unused VLAN), the untagged packets are also dropped.
So it looks like:
- the switch bypasses ingress VLAN membership checks for untagged traffic
- the reason why the untagged traffic is dropped when I make the pvid 55
is due to the lack of valid destination ports in VLAN 55, rather than
an ingress membership violation
- the ingress VLAN membership cheks are only done for VLAN-tagged traffic
Interesting. It looks like there is an explicit bit to drop untagged
traffic, so we should probably be using that to preserve user expectations.
Note that only VLAN-aware ports should drop untagged packets due to no
pvid - when VLAN-unaware, the software bridge doesn't do this even if
there is no pvid on any bridge port and on the bridge itself. So the new
sja1105_drop_untagged() function cannot simply be called with "false"
from sja1105_bridge_vlan_add() and with "true" from sja1105_bridge_vlan_del.
Instead, we need to also consider the VLAN awareness state. That means
we need to hook the "drop untagged" setting in all the same places where
the "commit pvid" logic is, and it needs to factor in all the state when
flipping the "drop untagged" bit: is our current pvid in the VLAN Lookup
Table, and is the current port in that VLAN's port membership list?
VLAN-unaware ports will never drop untagged frames because these checks
always succeed by construction, and the tag_8021q VLANs cannot be changed
by the user.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-29 00:54:28 +03:00
|
|
|
|
net: dsa: sja1105: prevent tag_8021q VLANs from being received on user ports
Currently it is possible for an attacker to craft packets with a fake
DSA tag and send them to us, and our user ports will accept them and
preserve that VLAN when transmitting towards the CPU. Then the tagger
will be misled into thinking that the packets came on a different port
than they really came on.
Up until recently there wasn't a good option to prevent this from
happening. In SJA1105P and later, the MAC Configuration Table introduced
two options called:
- DRPSITAG: Drop Single Inner Tagged Frames
- DRPSOTAG: Drop Single Outer Tagged Frames
Because the sja1105 driver classifies all VLANs as "outer VLANs" (S-Tags),
it would be in principle possible to enable the DRPSOTAG bit on ports
using tag_8021q, and drop on ingress all packets which have a VLAN tag.
When the switch is VLAN-unaware, this works, because it uses a custom
TPID of 0xdadb, so any "tagged" packets received on a user port are
probably a spoofing attempt. But when the switch overall is VLAN-aware,
and some ports are standalone (therefore they use tag_8021q), the TPID
is 0x8100, and the port can receive a mix of untagged and VLAN-tagged
packets. The untagged ones will be classified to the tag_8021q pvid, and
the tagged ones to the VLAN ID from the packet header. Yes, it is true
that since commit 4fbc08bd3665 ("net: dsa: sja1105: deny 8021q uppers on
ports") we no longer support this mixed mode, but that is a temporary
limitation which will eventually be lifted. It would be nice to not
introduce one more restriction via DRPSOTAG, which would make the
standalone ports of a VLAN-aware switch drop genuinely VLAN-tagged
packets.
Also, the DRPSOTAG bit is not available on the first generation of
switches (SJA1105E, SJA1105T). So since one of the key features of this
driver is compatibility across switch generations, this makes it an even
less desirable approach.
The breakthrough comes from commit bef0746cf4cc ("net: dsa: sja1105:
make sure untagged packets are dropped on ingress ports with no pvid"),
where it became obvious that untagged packets are not dropped even if
the ingress port is not in the VMEMB_PORT vector of that port's pvid.
However, VLAN-tagged packets are subject to VLAN ingress
checking/dropping. This means that instead of using the catch-all
DRPSOTAG bit introduced in SJA1105P, we can drop tagged packets on a
per-VLAN basis, and this is already compatible with SJA1105E/T.
This patch adds an "allowed_ingress" argument to sja1105_vlan_add(), and
we call it with "false" for tag_8021q VLANs on user ports. The tag_8021q
VLANs still need to be allowed, of course, on ingress to DSA ports and
CPU ports.
We also need to refine the drop_untagged check in sja1105_commit_pvid to
make it not freak out about this new configuration. Currently it will
try to keep the configuration consistent between untagged and pvid-tagged
packets, so if the pvid of a port is 1 but VLAN 1 is not in VMEMB_PORT,
packets tagged with VID 1 will behave the same as untagged packets, and
be dropped. This behavior is what we want for ports under a VLAN-aware
bridge, but for the ports with a tag_8021q pvid, we want untagged
packets to be accepted, but packets tagged with a header recognized by
the switch as a tag_8021q VLAN to be dropped. So only restrict the
drop_untagged check to apply to the bridge_pvid, not to the tag_8021q_pvid.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-24 20:15:00 +03:00
|
|
|
match = sja1105_is_vlan_configured(priv, pvid);
|
net: dsa: sja1105: make sure untagged packets are dropped on ingress ports with no pvid
Surprisingly, this configuration:
ip link add br0 type bridge vlan_filtering 1
ip link set swp2 master br0
bridge vlan del dev swp2 vid 1
still has the sja1105 switch sending untagged packets to the CPU (and
failing to decode them, since dsa_find_designated_bridge_port_by_vid
searches by VID 1 and rightfully finds no bridge VLAN 1 on a port).
Dumping the switch configuration, the VLANs are managed properly:
- the pvid of swp2 is 1 in the MAC Configuration Table, but
- only the CPU port is in the port membership of VLANID 1 in the VLAN
Lookup Table
When the ingress packets are tagged with VID 1, they are properly
dropped. But when they are untagged, they are able to reach the CPU
port. Also, when the pvid in the MAC Configuration Table is changed to
e.g. 55 (an unused VLAN), the untagged packets are also dropped.
So it looks like:
- the switch bypasses ingress VLAN membership checks for untagged traffic
- the reason why the untagged traffic is dropped when I make the pvid 55
is due to the lack of valid destination ports in VLAN 55, rather than
an ingress membership violation
- the ingress VLAN membership cheks are only done for VLAN-tagged traffic
Interesting. It looks like there is an explicit bit to drop untagged
traffic, so we should probably be using that to preserve user expectations.
Note that only VLAN-aware ports should drop untagged packets due to no
pvid - when VLAN-unaware, the software bridge doesn't do this even if
there is no pvid on any bridge port and on the bridge itself. So the new
sja1105_drop_untagged() function cannot simply be called with "false"
from sja1105_bridge_vlan_add() and with "true" from sja1105_bridge_vlan_del.
Instead, we need to also consider the VLAN awareness state. That means
we need to hook the "drop untagged" setting in all the same places where
the "commit pvid" logic is, and it needs to factor in all the state when
flipping the "drop untagged" bit: is our current pvid in the VLAN Lookup
Table, and is the current port in that VLAN's port membership list?
VLAN-unaware ports will never drop untagged frames because these checks
always succeed by construction, and the tag_8021q VLANs cannot be changed
by the user.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-29 00:54:28 +03:00
|
|
|
|
net: dsa: sja1105: prevent tag_8021q VLANs from being received on user ports
Currently it is possible for an attacker to craft packets with a fake
DSA tag and send them to us, and our user ports will accept them and
preserve that VLAN when transmitting towards the CPU. Then the tagger
will be misled into thinking that the packets came on a different port
than they really came on.
Up until recently there wasn't a good option to prevent this from
happening. In SJA1105P and later, the MAC Configuration Table introduced
two options called:
- DRPSITAG: Drop Single Inner Tagged Frames
- DRPSOTAG: Drop Single Outer Tagged Frames
Because the sja1105 driver classifies all VLANs as "outer VLANs" (S-Tags),
it would be in principle possible to enable the DRPSOTAG bit on ports
using tag_8021q, and drop on ingress all packets which have a VLAN tag.
When the switch is VLAN-unaware, this works, because it uses a custom
TPID of 0xdadb, so any "tagged" packets received on a user port are
probably a spoofing attempt. But when the switch overall is VLAN-aware,
and some ports are standalone (therefore they use tag_8021q), the TPID
is 0x8100, and the port can receive a mix of untagged and VLAN-tagged
packets. The untagged ones will be classified to the tag_8021q pvid, and
the tagged ones to the VLAN ID from the packet header. Yes, it is true
that since commit 4fbc08bd3665 ("net: dsa: sja1105: deny 8021q uppers on
ports") we no longer support this mixed mode, but that is a temporary
limitation which will eventually be lifted. It would be nice to not
introduce one more restriction via DRPSOTAG, which would make the
standalone ports of a VLAN-aware switch drop genuinely VLAN-tagged
packets.
Also, the DRPSOTAG bit is not available on the first generation of
switches (SJA1105E, SJA1105T). So since one of the key features of this
driver is compatibility across switch generations, this makes it an even
less desirable approach.
The breakthrough comes from commit bef0746cf4cc ("net: dsa: sja1105:
make sure untagged packets are dropped on ingress ports with no pvid"),
where it became obvious that untagged packets are not dropped even if
the ingress port is not in the VMEMB_PORT vector of that port's pvid.
However, VLAN-tagged packets are subject to VLAN ingress
checking/dropping. This means that instead of using the catch-all
DRPSOTAG bit introduced in SJA1105P, we can drop tagged packets on a
per-VLAN basis, and this is already compatible with SJA1105E/T.
This patch adds an "allowed_ingress" argument to sja1105_vlan_add(), and
we call it with "false" for tag_8021q VLANs on user ports. The tag_8021q
VLANs still need to be allowed, of course, on ingress to DSA ports and
CPU ports.
We also need to refine the drop_untagged check in sja1105_commit_pvid to
make it not freak out about this new configuration. Currently it will
try to keep the configuration consistent between untagged and pvid-tagged
packets, so if the pvid of a port is 1 but VLAN 1 is not in VMEMB_PORT,
packets tagged with VID 1 will behave the same as untagged packets, and
be dropped. This behavior is what we want for ports under a VLAN-aware
bridge, but for the ports with a tag_8021q pvid, we want untagged
packets to be accepted, but packets tagged with a header recognized by
the switch as a tag_8021q VLAN to be dropped. So only restrict the
drop_untagged check to apply to the bridge_pvid, not to the tag_8021q_pvid.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-24 20:15:00 +03:00
|
|
|
if (match < 0 || !(vlan[match].vmemb_port & BIT(port)))
|
|
|
|
drop_untagged = true;
|
|
|
|
}
|
net: dsa: sja1105: make sure untagged packets are dropped on ingress ports with no pvid
Surprisingly, this configuration:
ip link add br0 type bridge vlan_filtering 1
ip link set swp2 master br0
bridge vlan del dev swp2 vid 1
still has the sja1105 switch sending untagged packets to the CPU (and
failing to decode them, since dsa_find_designated_bridge_port_by_vid
searches by VID 1 and rightfully finds no bridge VLAN 1 on a port).
Dumping the switch configuration, the VLANs are managed properly:
- the pvid of swp2 is 1 in the MAC Configuration Table, but
- only the CPU port is in the port membership of VLANID 1 in the VLAN
Lookup Table
When the ingress packets are tagged with VID 1, they are properly
dropped. But when they are untagged, they are able to reach the CPU
port. Also, when the pvid in the MAC Configuration Table is changed to
e.g. 55 (an unused VLAN), the untagged packets are also dropped.
So it looks like:
- the switch bypasses ingress VLAN membership checks for untagged traffic
- the reason why the untagged traffic is dropped when I make the pvid 55
is due to the lack of valid destination ports in VLAN 55, rather than
an ingress membership violation
- the ingress VLAN membership cheks are only done for VLAN-tagged traffic
Interesting. It looks like there is an explicit bit to drop untagged
traffic, so we should probably be using that to preserve user expectations.
Note that only VLAN-aware ports should drop untagged packets due to no
pvid - when VLAN-unaware, the software bridge doesn't do this even if
there is no pvid on any bridge port and on the bridge itself. So the new
sja1105_drop_untagged() function cannot simply be called with "false"
from sja1105_bridge_vlan_add() and with "true" from sja1105_bridge_vlan_del.
Instead, we need to also consider the VLAN awareness state. That means
we need to hook the "drop untagged" setting in all the same places where
the "commit pvid" logic is, and it needs to factor in all the state when
flipping the "drop untagged" bit: is our current pvid in the VLAN Lookup
Table, and is the current port in that VLAN's port membership list?
VLAN-unaware ports will never drop untagged frames because these checks
always succeed by construction, and the tag_8021q VLANs cannot be changed
by the user.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-29 00:54:28 +03:00
|
|
|
|
net: dsa: sja1105: drop untagged packets on the CPU and DSA ports
The sja1105 driver is a bit special in its use of VLAN headers as DSA
tags. This is because in VLAN-aware mode, the VLAN headers use an actual
TPID of 0x8100, which is understood even by the DSA master as an actual
VLAN header.
Furthermore, control packets such as PTP and STP are transmitted with no
VLAN header as a DSA tag, because, depending on switch generation, there
are ways to steer these control packets towards a precise egress port
other than VLAN tags. Transmitting control packets as untagged means
leaving a door open for traffic in general to be transmitted as untagged
from the DSA master, and for it to traverse the switch and exit a random
switch port according to the FDB lookup.
This behavior is a bit out of line with other DSA drivers which have
native support for DSA tagging. There, it is to be expected that the
switch only accepts DSA-tagged packets on its CPU port, dropping
everything that does not match this pattern.
We perhaps rely a bit too much on the switches' hardware dropping on the
CPU port, and place no other restrictions in the kernel data path to
avoid that. For example, sja1105 is also a bit special in that STP/PTP
packets are transmitted using "management routes"
(sja1105_port_deferred_xmit): when sending a link-local packet from the
CPU, we must first write a SPI message to the switch to tell it to
expect a packet towards multicast MAC DA 01-80-c2-00-00-0e, and to route
it towards port 3 when it gets it. This entry expires as soon as it
matches a packet received by the switch, and it needs to be reinstalled
for the next packet etc. All in all quite a ghetto mechanism, but it is
all that the sja1105 switches offer for injecting a control packet.
The driver takes a mutex for serializing control packets and making the
pairs of SPI writes of a management route and its associated skb atomic,
but to be honest, a mutex is only relevant as long as all parties agree
to take it. With the DSA design, it is possible to open an AF_PACKET
socket on the DSA master net device, and blast packets towards
01-80-c2-00-00-0e, and whatever locking the DSA switch driver might use,
it all goes kaput because management routes installed by the driver will
match skbs sent by the DSA master, and not skbs generated by the driver
itself. So they will end up being routed on the wrong port.
So through the lens of that, maybe it would make sense to avoid that
from happening by doing something in the network stack, like: introduce
a new bit in struct sk_buff, like xmit_from_dsa. Then, somewhere around
dev_hard_start_xmit(), introduce the following check:
if (netdev_uses_dsa(dev) && !skb->xmit_from_dsa)
kfree_skb(skb);
Ok, maybe that is a bit drastic, but that would at least prevent a bunch
of problems. For example, right now, even though the majority of DSA
switches drop packets without DSA tags sent by the DSA master (and
therefore the majority of garbage that user space daemons like avahi and
udhcpcd and friends create), it is still conceivable that an aggressive
user space program can open an AF_PACKET socket and inject a spoofed DSA
tag directly on the DSA master. We have no protection against that; the
packet will be understood by the switch and be routed wherever user
space says. Furthermore: there are some DSA switches where we even have
register access over Ethernet, using DSA tags. So even user space
drivers are possible in this way. This is a huge hole.
However, the biggest thing that bothers me is that udhcpcd attempts to
ask for an IP address on all interfaces by default, and with sja1105, it
will attempt to get a valid IP address on both the DSA master as well as
on sja1105 switch ports themselves. So with IP addresses in the same
subnet on multiple interfaces, the routing table will be messed up and
the system will be unusable for traffic until it is configured manually
to not ask for an IP address on the DSA master itself.
It turns out that it is possible to avoid that in the sja1105 driver, at
least very superficially, by requesting the switch to drop VLAN-untagged
packets on the CPU port. With the exception of control packets, all
traffic originated from tag_sja1105.c is already VLAN-tagged, so only
STP and PTP packets need to be converted. For that, we need to uphold
the equivalence between an untagged and a pvid-tagged packet, and to
remember that the CPU port of sja1105 uses a pvid of 4095.
Now that we drop untagged traffic on the CPU port, non-aggressive user
space applications like udhcpcd stop bothering us, and sja1105 effectively
becomes just as vulnerable to the aggressive kind of user space programs
as other DSA switches are (ok, users can also create 8021q uppers on top
of the DSA master in the case of sja1105, but in future patches we can
easily deny that, but it still doesn't change the fact that VLAN-tagged
packets can still be injected over raw sockets).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-24 20:15:01 +03:00
|
|
|
if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
|
|
|
|
drop_untagged = true;
|
|
|
|
|
net: dsa: sja1105: make sure untagged packets are dropped on ingress ports with no pvid
Surprisingly, this configuration:
ip link add br0 type bridge vlan_filtering 1
ip link set swp2 master br0
bridge vlan del dev swp2 vid 1
still has the sja1105 switch sending untagged packets to the CPU (and
failing to decode them, since dsa_find_designated_bridge_port_by_vid
searches by VID 1 and rightfully finds no bridge VLAN 1 on a port).
Dumping the switch configuration, the VLANs are managed properly:
- the pvid of swp2 is 1 in the MAC Configuration Table, but
- only the CPU port is in the port membership of VLANID 1 in the VLAN
Lookup Table
When the ingress packets are tagged with VID 1, they are properly
dropped. But when they are untagged, they are able to reach the CPU
port. Also, when the pvid in the MAC Configuration Table is changed to
e.g. 55 (an unused VLAN), the untagged packets are also dropped.
So it looks like:
- the switch bypasses ingress VLAN membership checks for untagged traffic
- the reason why the untagged traffic is dropped when I make the pvid 55
is due to the lack of valid destination ports in VLAN 55, rather than
an ingress membership violation
- the ingress VLAN membership cheks are only done for VLAN-tagged traffic
Interesting. It looks like there is an explicit bit to drop untagged
traffic, so we should probably be using that to preserve user expectations.
Note that only VLAN-aware ports should drop untagged packets due to no
pvid - when VLAN-unaware, the software bridge doesn't do this even if
there is no pvid on any bridge port and on the bridge itself. So the new
sja1105_drop_untagged() function cannot simply be called with "false"
from sja1105_bridge_vlan_add() and with "true" from sja1105_bridge_vlan_del.
Instead, we need to also consider the VLAN awareness state. That means
we need to hook the "drop untagged" setting in all the same places where
the "commit pvid" logic is, and it needs to factor in all the state when
flipping the "drop untagged" bit: is our current pvid in the VLAN Lookup
Table, and is the current port in that VLAN's port membership list?
VLAN-unaware ports will never drop untagged frames because these checks
always succeed by construction, and the tag_8021q VLANs cannot be changed
by the user.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-29 00:54:28 +03:00
|
|
|
return sja1105_drop_untagged(ds, port, drop_untagged);
|
2021-07-29 00:54:27 +03:00
|
|
|
}
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
static int sja1105_init_mac_settings(struct sja1105_private *priv)
|
|
|
|
{
|
|
|
|
struct sja1105_mac_config_entry default_mac = {
|
|
|
|
/* Enable all 8 priority queues on egress.
|
|
|
|
* Every queue i holds top[i] - base[i] frames.
|
|
|
|
* Sum of top[i] - base[i] is 511 (max hardware limit).
|
|
|
|
*/
|
|
|
|
.top = {0x3F, 0x7F, 0xBF, 0xFF, 0x13F, 0x17F, 0x1BF, 0x1FF},
|
|
|
|
.base = {0x0, 0x40, 0x80, 0xC0, 0x100, 0x140, 0x180, 0x1C0},
|
|
|
|
.enabled = {true, true, true, true, true, true, true, true},
|
|
|
|
/* Keep standard IFG of 12 bytes on egress. */
|
|
|
|
.ifg = 0,
|
|
|
|
/* Always put the MAC speed in automatic mode, where it can be
|
2019-06-08 16:03:42 +03:00
|
|
|
* adjusted at runtime by PHYLINK.
|
2019-05-02 23:23:30 +03:00
|
|
|
*/
|
2021-05-31 01:59:37 +03:00
|
|
|
.speed = priv->info->port_speed[SJA1105_SPEED_AUTO],
|
2019-05-02 23:23:30 +03:00
|
|
|
/* No static correction for 1-step 1588 events */
|
|
|
|
.tp_delin = 0,
|
|
|
|
.tp_delout = 0,
|
|
|
|
/* Disable aging for critical TTEthernet traffic */
|
|
|
|
.maxage = 0xFF,
|
|
|
|
/* Internal VLAN (pvid) to apply to untagged ingress */
|
|
|
|
.vlanprio = 0,
|
2019-06-26 02:39:35 +03:00
|
|
|
.vlanid = 1,
|
2019-05-02 23:23:30 +03:00
|
|
|
.ing_mirr = false,
|
|
|
|
.egr_mirr = false,
|
|
|
|
/* Don't drop traffic with other EtherType than ETH_P_IP */
|
|
|
|
.drpnona664 = false,
|
|
|
|
/* Don't drop double-tagged traffic */
|
|
|
|
.drpdtag = false,
|
|
|
|
/* Don't drop untagged traffic */
|
|
|
|
.drpuntag = false,
|
|
|
|
/* Don't retag 802.1p (VID 0) traffic with the pvid */
|
|
|
|
.retag = false,
|
2019-05-05 13:19:28 +03:00
|
|
|
/* Disable learning and I/O on user ports by default -
|
|
|
|
* STP will enable it.
|
|
|
|
*/
|
|
|
|
.dyn_learn = false,
|
2019-05-02 23:23:30 +03:00
|
|
|
.egress = false,
|
|
|
|
.ingress = false,
|
|
|
|
};
|
|
|
|
struct sja1105_mac_config_entry *mac;
|
2021-05-24 16:14:13 +03:00
|
|
|
struct dsa_switch *ds = priv->ds;
|
2019-05-02 23:23:30 +03:00
|
|
|
struct sja1105_table *table;
|
2021-08-08 17:35:26 +03:00
|
|
|
struct dsa_port *dp;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_MAC_CONFIG];
|
|
|
|
|
|
|
|
/* Discard previous MAC Configuration Table */
|
|
|
|
if (table->entry_count) {
|
|
|
|
kfree(table->entries);
|
|
|
|
table->entry_count = 0;
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entries = kcalloc(table->ops->max_entry_count,
|
2019-05-02 23:23:30 +03:00
|
|
|
table->ops->unpacked_entry_size, GFP_KERNEL);
|
|
|
|
if (!table->entries)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entry_count = table->ops->max_entry_count;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
mac = table->entries;
|
|
|
|
|
2021-08-08 17:35:26 +03:00
|
|
|
list_for_each_entry(dp, &ds->dst->ports, list) {
|
|
|
|
if (dp->ds != ds)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
mac[dp->index] = default_mac;
|
2021-07-13 12:37:19 +03:00
|
|
|
|
|
|
|
/* Let sja1105_bridge_stp_state_set() keep address learning
|
net: dsa: sja1105: enable address learning on cascade ports
Right now, address learning is disabled on DSA ports, which means that a
packet received over a DSA port from a cross-chip switch will be flooded
to unrelated ports.
It is desirable to eliminate that, but for that we need a breakdown of
the possibilities for the sja1105 driver. A DSA port can be:
- a downstream-facing cascade port. This is simple because it will
always receive packets from a downstream switch, and there should be
no other route to reach that downstream switch in the first place,
which means it should be safe to learn that MAC address towards that
switch.
- an upstream-facing cascade port. This receives packets either:
* autonomously forwarded by an upstream switch (and therefore these
packets belong to the data plane of a bridge, so address learning
should be ok), or
* injected from the CPU. This deserves further discussion, as normally,
an upstream-facing cascade port is no different than the CPU port
itself. But with "H" topologies (a DSA link towards a switch that
has its own CPU port), these are more "laterally-facing" cascade
ports than they are "upstream-facing". Here, there is a risk that
the port might learn the host addresses on the wrong port (on the
DSA port instead of on its own CPU port), but this is solved by
DSA's RX filtering infrastructure, which installs the host addresses
as static FDB entries on the CPU port of all switches in a "H" tree.
So even if there will be an attempt from the switch to migrate the
FDB entry from the CPU port to the laterally-facing cascade port, it
will fail to do that, because the FDB entry that already exists is
static and cannot migrate. So address learning should be safe for
this configuration too.
Ok, so what about other MAC addresses coming from the host, not
necessarily the bridge local FDB entries? What about MAC addresses
dynamically learned on foreign interfaces, isn't there a risk that
cascade ports will learn these entries dynamically when they are
supposed to be delivered towards the CPU port? Well, that is correct,
and this is why we also need to enable the assisted learning feature, to
snoop for these addresses and write them to hardware as static FDB
entries towards the CPU, to make the switch's learning process on the
cascade ports ineffective for them. With assisted learning enabled, the
hardware learning on the CPU port must be disabled.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:36 +03:00
|
|
|
* enabled for the DSA ports. CPU ports use software-assisted
|
|
|
|
* learning to ensure that only FDB entries belonging to the
|
|
|
|
* bridge are learned, and that they are learned towards all
|
|
|
|
* CPU ports in a cross-chip topology if multiple CPU ports
|
|
|
|
* exist.
|
2021-07-13 12:37:19 +03:00
|
|
|
*/
|
2021-08-08 17:35:26 +03:00
|
|
|
if (dsa_port_is_dsa(dp))
|
|
|
|
dp->learning = true;
|
net: dsa: sja1105: drop untagged packets on the CPU and DSA ports
The sja1105 driver is a bit special in its use of VLAN headers as DSA
tags. This is because in VLAN-aware mode, the VLAN headers use an actual
TPID of 0x8100, which is understood even by the DSA master as an actual
VLAN header.
Furthermore, control packets such as PTP and STP are transmitted with no
VLAN header as a DSA tag, because, depending on switch generation, there
are ways to steer these control packets towards a precise egress port
other than VLAN tags. Transmitting control packets as untagged means
leaving a door open for traffic in general to be transmitted as untagged
from the DSA master, and for it to traverse the switch and exit a random
switch port according to the FDB lookup.
This behavior is a bit out of line with other DSA drivers which have
native support for DSA tagging. There, it is to be expected that the
switch only accepts DSA-tagged packets on its CPU port, dropping
everything that does not match this pattern.
We perhaps rely a bit too much on the switches' hardware dropping on the
CPU port, and place no other restrictions in the kernel data path to
avoid that. For example, sja1105 is also a bit special in that STP/PTP
packets are transmitted using "management routes"
(sja1105_port_deferred_xmit): when sending a link-local packet from the
CPU, we must first write a SPI message to the switch to tell it to
expect a packet towards multicast MAC DA 01-80-c2-00-00-0e, and to route
it towards port 3 when it gets it. This entry expires as soon as it
matches a packet received by the switch, and it needs to be reinstalled
for the next packet etc. All in all quite a ghetto mechanism, but it is
all that the sja1105 switches offer for injecting a control packet.
The driver takes a mutex for serializing control packets and making the
pairs of SPI writes of a management route and its associated skb atomic,
but to be honest, a mutex is only relevant as long as all parties agree
to take it. With the DSA design, it is possible to open an AF_PACKET
socket on the DSA master net device, and blast packets towards
01-80-c2-00-00-0e, and whatever locking the DSA switch driver might use,
it all goes kaput because management routes installed by the driver will
match skbs sent by the DSA master, and not skbs generated by the driver
itself. So they will end up being routed on the wrong port.
So through the lens of that, maybe it would make sense to avoid that
from happening by doing something in the network stack, like: introduce
a new bit in struct sk_buff, like xmit_from_dsa. Then, somewhere around
dev_hard_start_xmit(), introduce the following check:
if (netdev_uses_dsa(dev) && !skb->xmit_from_dsa)
kfree_skb(skb);
Ok, maybe that is a bit drastic, but that would at least prevent a bunch
of problems. For example, right now, even though the majority of DSA
switches drop packets without DSA tags sent by the DSA master (and
therefore the majority of garbage that user space daemons like avahi and
udhcpcd and friends create), it is still conceivable that an aggressive
user space program can open an AF_PACKET socket and inject a spoofed DSA
tag directly on the DSA master. We have no protection against that; the
packet will be understood by the switch and be routed wherever user
space says. Furthermore: there are some DSA switches where we even have
register access over Ethernet, using DSA tags. So even user space
drivers are possible in this way. This is a huge hole.
However, the biggest thing that bothers me is that udhcpcd attempts to
ask for an IP address on all interfaces by default, and with sja1105, it
will attempt to get a valid IP address on both the DSA master as well as
on sja1105 switch ports themselves. So with IP addresses in the same
subnet on multiple interfaces, the routing table will be messed up and
the system will be unusable for traffic until it is configured manually
to not ask for an IP address on the DSA master itself.
It turns out that it is possible to avoid that in the sja1105 driver, at
least very superficially, by requesting the switch to drop VLAN-untagged
packets on the CPU port. With the exception of control packets, all
traffic originated from tag_sja1105.c is already VLAN-tagged, so only
STP and PTP packets need to be converted. For that, we need to uphold
the equivalence between an untagged and a pvid-tagged packet, and to
remember that the CPU port of sja1105 uses a pvid of 4095.
Now that we drop untagged traffic on the CPU port, non-aggressive user
space applications like udhcpcd stop bothering us, and sja1105 effectively
becomes just as vulnerable to the aggressive kind of user space programs
as other DSA switches are (ok, users can also create 8021q uppers on top
of the DSA master in the case of sja1105, but in future patches we can
easily deny that, but it still doesn't change the fact that VLAN-tagged
packets can still be injected over raw sockets).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-24 20:15:01 +03:00
|
|
|
|
|
|
|
/* Disallow untagged packets from being received on the
|
|
|
|
* CPU and DSA ports.
|
|
|
|
*/
|
|
|
|
if (dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))
|
|
|
|
mac[dp->index].drpuntag = true;
|
2019-05-05 13:19:28 +03:00
|
|
|
}
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-06-04 17:01:50 +03:00
|
|
|
static int sja1105_init_mii_settings(struct sja1105_private *priv)
|
2019-05-02 23:23:30 +03:00
|
|
|
{
|
|
|
|
struct device *dev = &priv->spidev->dev;
|
|
|
|
struct sja1105_xmii_params_entry *mii;
|
2021-05-24 16:14:13 +03:00
|
|
|
struct dsa_switch *ds = priv->ds;
|
2019-05-02 23:23:30 +03:00
|
|
|
struct sja1105_table *table;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_XMII_PARAMS];
|
|
|
|
|
|
|
|
/* Discard previous xMII Mode Parameters Table */
|
|
|
|
if (table->entry_count) {
|
|
|
|
kfree(table->entries);
|
|
|
|
table->entry_count = 0;
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entries = kcalloc(table->ops->max_entry_count,
|
2019-05-02 23:23:30 +03:00
|
|
|
table->ops->unpacked_entry_size, GFP_KERNEL);
|
|
|
|
if (!table->entries)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2019-06-08 16:03:42 +03:00
|
|
|
/* Override table based on PHYLINK DT bindings */
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entry_count = table->ops->max_entry_count;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
mii = table->entries;
|
|
|
|
|
2021-05-24 16:14:13 +03:00
|
|
|
for (i = 0; i < ds->num_ports; i++) {
|
2021-06-04 17:01:50 +03:00
|
|
|
sja1105_mii_role_t role = XMII_MAC;
|
|
|
|
|
2020-03-19 22:12:10 +02:00
|
|
|
if (dsa_is_unused_port(priv->ds, i))
|
|
|
|
continue;
|
|
|
|
|
2021-06-04 17:01:50 +03:00
|
|
|
switch (priv->phy_mode[i]) {
|
2021-06-08 12:25:38 +03:00
|
|
|
case PHY_INTERFACE_MODE_INTERNAL:
|
|
|
|
if (priv->info->internal_phy[i] == SJA1105_NO_PHY)
|
|
|
|
goto unsupported;
|
|
|
|
|
|
|
|
mii->xmii_mode[i] = XMII_MODE_MII;
|
|
|
|
if (priv->info->internal_phy[i] == SJA1105_PHY_BASE_TX)
|
|
|
|
mii->special[i] = true;
|
|
|
|
|
|
|
|
break;
|
2021-06-04 17:01:50 +03:00
|
|
|
case PHY_INTERFACE_MODE_REVMII:
|
|
|
|
role = XMII_PHY;
|
|
|
|
fallthrough;
|
2019-05-02 23:23:30 +03:00
|
|
|
case PHY_INTERFACE_MODE_MII:
|
net: dsa: sja1105: add a PHY interface type compatibility matrix
On the SJA1105, all ports support the parallel "xMII" protocols (MII,
RMII, RGMII) except for port 4 on SJA1105R/S which supports only SGMII.
This was relatively easy to model, by special-casing the SGMII port.
On the SJA1110, certain ports can be pinmuxed between SGMII and xMII, or
between SGMII and an internal 100base-TX PHY. This creates problems,
because the driver's assumption so far was that if a port supports
SGMII, it uses SGMII.
We allow the device tree to tell us how the port pinmuxing is done, and
check that against a PHY interface type compatibility matrix for
plausibility.
The other big change is that instead of doing SGMII configuration based
on what the port supports, we do it based on what is the configured
phy_mode of the port.
The 2500base-x support added in this patch is not complete.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-05-31 01:59:36 +03:00
|
|
|
if (!priv->info->supports_mii[i])
|
|
|
|
goto unsupported;
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
mii->xmii_mode[i] = XMII_MODE_MII;
|
|
|
|
break;
|
2021-06-04 17:01:50 +03:00
|
|
|
case PHY_INTERFACE_MODE_REVRMII:
|
|
|
|
role = XMII_PHY;
|
|
|
|
fallthrough;
|
2019-05-02 23:23:30 +03:00
|
|
|
case PHY_INTERFACE_MODE_RMII:
|
net: dsa: sja1105: add a PHY interface type compatibility matrix
On the SJA1105, all ports support the parallel "xMII" protocols (MII,
RMII, RGMII) except for port 4 on SJA1105R/S which supports only SGMII.
This was relatively easy to model, by special-casing the SGMII port.
On the SJA1110, certain ports can be pinmuxed between SGMII and xMII, or
between SGMII and an internal 100base-TX PHY. This creates problems,
because the driver's assumption so far was that if a port supports
SGMII, it uses SGMII.
We allow the device tree to tell us how the port pinmuxing is done, and
check that against a PHY interface type compatibility matrix for
plausibility.
The other big change is that instead of doing SGMII configuration based
on what the port supports, we do it based on what is the configured
phy_mode of the port.
The 2500base-x support added in this patch is not complete.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-05-31 01:59:36 +03:00
|
|
|
if (!priv->info->supports_rmii[i])
|
|
|
|
goto unsupported;
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
mii->xmii_mode[i] = XMII_MODE_RMII;
|
|
|
|
break;
|
|
|
|
case PHY_INTERFACE_MODE_RGMII:
|
|
|
|
case PHY_INTERFACE_MODE_RGMII_ID:
|
|
|
|
case PHY_INTERFACE_MODE_RGMII_RXID:
|
|
|
|
case PHY_INTERFACE_MODE_RGMII_TXID:
|
net: dsa: sja1105: add a PHY interface type compatibility matrix
On the SJA1105, all ports support the parallel "xMII" protocols (MII,
RMII, RGMII) except for port 4 on SJA1105R/S which supports only SGMII.
This was relatively easy to model, by special-casing the SGMII port.
On the SJA1110, certain ports can be pinmuxed between SGMII and xMII, or
between SGMII and an internal 100base-TX PHY. This creates problems,
because the driver's assumption so far was that if a port supports
SGMII, it uses SGMII.
We allow the device tree to tell us how the port pinmuxing is done, and
check that against a PHY interface type compatibility matrix for
plausibility.
The other big change is that instead of doing SGMII configuration based
on what the port supports, we do it based on what is the configured
phy_mode of the port.
The 2500base-x support added in this patch is not complete.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-05-31 01:59:36 +03:00
|
|
|
if (!priv->info->supports_rgmii[i])
|
|
|
|
goto unsupported;
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
mii->xmii_mode[i] = XMII_MODE_RGMII;
|
|
|
|
break;
|
2020-03-20 13:29:37 +02:00
|
|
|
case PHY_INTERFACE_MODE_SGMII:
|
net: dsa: sja1105: add a PHY interface type compatibility matrix
On the SJA1105, all ports support the parallel "xMII" protocols (MII,
RMII, RGMII) except for port 4 on SJA1105R/S which supports only SGMII.
This was relatively easy to model, by special-casing the SGMII port.
On the SJA1110, certain ports can be pinmuxed between SGMII and xMII, or
between SGMII and an internal 100base-TX PHY. This creates problems,
because the driver's assumption so far was that if a port supports
SGMII, it uses SGMII.
We allow the device tree to tell us how the port pinmuxing is done, and
check that against a PHY interface type compatibility matrix for
plausibility.
The other big change is that instead of doing SGMII configuration based
on what the port supports, we do it based on what is the configured
phy_mode of the port.
The 2500base-x support added in this patch is not complete.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-05-31 01:59:36 +03:00
|
|
|
if (!priv->info->supports_sgmii[i])
|
|
|
|
goto unsupported;
|
|
|
|
|
|
|
|
mii->xmii_mode[i] = XMII_MODE_SGMII;
|
2021-06-11 23:05:30 +03:00
|
|
|
mii->special[i] = true;
|
net: dsa: sja1105: add a PHY interface type compatibility matrix
On the SJA1105, all ports support the parallel "xMII" protocols (MII,
RMII, RGMII) except for port 4 on SJA1105R/S which supports only SGMII.
This was relatively easy to model, by special-casing the SGMII port.
On the SJA1110, certain ports can be pinmuxed between SGMII and xMII, or
between SGMII and an internal 100base-TX PHY. This creates problems,
because the driver's assumption so far was that if a port supports
SGMII, it uses SGMII.
We allow the device tree to tell us how the port pinmuxing is done, and
check that against a PHY interface type compatibility matrix for
plausibility.
The other big change is that instead of doing SGMII configuration based
on what the port supports, we do it based on what is the configured
phy_mode of the port.
The 2500base-x support added in this patch is not complete.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-05-31 01:59:36 +03:00
|
|
|
break;
|
|
|
|
case PHY_INTERFACE_MODE_2500BASEX:
|
|
|
|
if (!priv->info->supports_2500basex[i])
|
|
|
|
goto unsupported;
|
|
|
|
|
2020-03-20 13:29:37 +02:00
|
|
|
mii->xmii_mode[i] = XMII_MODE_SGMII;
|
2021-06-11 23:05:30 +03:00
|
|
|
mii->special[i] = true;
|
2020-03-20 13:29:37 +02:00
|
|
|
break;
|
net: dsa: sja1105: add a PHY interface type compatibility matrix
On the SJA1105, all ports support the parallel "xMII" protocols (MII,
RMII, RGMII) except for port 4 on SJA1105R/S which supports only SGMII.
This was relatively easy to model, by special-casing the SGMII port.
On the SJA1110, certain ports can be pinmuxed between SGMII and xMII, or
between SGMII and an internal 100base-TX PHY. This creates problems,
because the driver's assumption so far was that if a port supports
SGMII, it uses SGMII.
We allow the device tree to tell us how the port pinmuxing is done, and
check that against a PHY interface type compatibility matrix for
plausibility.
The other big change is that instead of doing SGMII configuration based
on what the port supports, we do it based on what is the configured
phy_mode of the port.
The 2500base-x support added in this patch is not complete.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-05-31 01:59:36 +03:00
|
|
|
unsupported:
|
2019-05-02 23:23:30 +03:00
|
|
|
default:
|
net: dsa: sja1105: add a PHY interface type compatibility matrix
On the SJA1105, all ports support the parallel "xMII" protocols (MII,
RMII, RGMII) except for port 4 on SJA1105R/S which supports only SGMII.
This was relatively easy to model, by special-casing the SGMII port.
On the SJA1110, certain ports can be pinmuxed between SGMII and xMII, or
between SGMII and an internal 100base-TX PHY. This creates problems,
because the driver's assumption so far was that if a port supports
SGMII, it uses SGMII.
We allow the device tree to tell us how the port pinmuxing is done, and
check that against a PHY interface type compatibility matrix for
plausibility.
The other big change is that instead of doing SGMII configuration based
on what the port supports, we do it based on what is the configured
phy_mode of the port.
The 2500base-x support added in this patch is not complete.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-05-31 01:59:36 +03:00
|
|
|
dev_err(dev, "Unsupported PHY mode %s on port %d!\n",
|
2021-06-04 17:01:50 +03:00
|
|
|
phy_modes(priv->phy_mode[i]), i);
|
2021-05-24 12:25:25 +03:00
|
|
|
return -EINVAL;
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
|
2021-06-04 17:01:50 +03:00
|
|
|
mii->phy_mac[i] = role;
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_init_static_fdb(struct sja1105_private *priv)
|
|
|
|
{
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
struct sja1105_l2_lookup_entry *l2_lookup;
|
2019-05-02 23:23:30 +03:00
|
|
|
struct sja1105_table *table;
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
int port;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP];
|
|
|
|
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
/* We only populate the FDB table through dynamic L2 Address Lookup
|
|
|
|
* entries, except for a special entry at the end which is a catch-all
|
|
|
|
* for unknown multicast and will be used to control flooding domain.
|
2019-05-02 23:23:31 +03:00
|
|
|
*/
|
2019-05-02 23:23:30 +03:00
|
|
|
if (table->entry_count) {
|
|
|
|
kfree(table->entries);
|
|
|
|
table->entry_count = 0;
|
|
|
|
}
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
|
|
|
|
if (!priv->info->can_limit_mcast_flood)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
table->entries = kcalloc(1, table->ops->unpacked_entry_size,
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!table->entries)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
table->entry_count = 1;
|
|
|
|
l2_lookup = table->entries;
|
|
|
|
|
|
|
|
/* All L2 multicast addresses have an odd first octet */
|
|
|
|
l2_lookup[0].macaddr = SJA1105_UNKNOWN_MULTICAST;
|
|
|
|
l2_lookup[0].mask_macaddr = SJA1105_UNKNOWN_MULTICAST;
|
|
|
|
l2_lookup[0].lockeds = true;
|
|
|
|
l2_lookup[0].index = SJA1105_MAX_L2_LOOKUP_COUNT - 1;
|
|
|
|
|
|
|
|
/* Flood multicast to every port by default */
|
|
|
|
for (port = 0; port < priv->ds->num_ports; port++)
|
|
|
|
if (!dsa_is_unused_port(priv->ds, port))
|
|
|
|
l2_lookup[0].destports |= BIT(port);
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_init_l2_lookup_params(struct sja1105_private *priv)
|
|
|
|
{
|
|
|
|
struct sja1105_l2_lookup_params_entry default_l2_lookup_params = {
|
2019-05-02 23:23:36 +03:00
|
|
|
/* Learned FDB entries are forgotten after 300 seconds */
|
|
|
|
.maxage = SJA1105_AGEING_TIME_MS(300000),
|
2019-05-02 23:23:30 +03:00
|
|
|
/* All entries within a FDB bin are available for learning */
|
|
|
|
.dyn_tbsz = SJA1105ET_FDB_BIN_SIZE,
|
2019-06-03 00:15:45 +03:00
|
|
|
/* And the P/Q/R/S equivalent setting: */
|
|
|
|
.start_dynspc = 0,
|
2019-05-02 23:23:30 +03:00
|
|
|
/* 2^8 + 2^5 + 2^3 + 2^2 + 2^1 + 1 in Koopman notation */
|
|
|
|
.poly = 0x97,
|
2022-02-25 11:22:24 +02:00
|
|
|
/* Always use Independent VLAN Learning (IVL) */
|
|
|
|
.shared_learn = false,
|
2019-05-02 23:23:30 +03:00
|
|
|
/* Don't discard management traffic based on ENFPORT -
|
|
|
|
* we don't perform SMAC port enforcement anyway, so
|
|
|
|
* what we are setting here doesn't matter.
|
|
|
|
*/
|
|
|
|
.no_enf_hostprt = false,
|
|
|
|
/* Don't learn SMAC for mac_fltres1 and mac_fltres0.
|
|
|
|
* Maybe correlate with no_linklocal_learn from bridge driver?
|
|
|
|
*/
|
|
|
|
.no_mgmt_learn = true,
|
2019-06-03 00:15:45 +03:00
|
|
|
/* P/Q/R/S only */
|
|
|
|
.use_static = true,
|
|
|
|
/* Dynamically learned FDB entries can overwrite other (older)
|
|
|
|
* dynamic FDB entries
|
|
|
|
*/
|
|
|
|
.owr_dyn = true,
|
|
|
|
.drpnolearn = true,
|
2019-05-02 23:23:30 +03:00
|
|
|
};
|
2021-05-24 16:14:13 +03:00
|
|
|
struct dsa_switch *ds = priv->ds;
|
2021-05-24 16:14:14 +03:00
|
|
|
int port, num_used_ports = 0;
|
2021-05-24 16:14:13 +03:00
|
|
|
struct sja1105_table *table;
|
|
|
|
u64 max_fdb_entries;
|
|
|
|
|
|
|
|
for (port = 0; port < ds->num_ports; port++)
|
2021-05-24 16:14:14 +03:00
|
|
|
if (!dsa_is_unused_port(ds, port))
|
|
|
|
num_used_ports++;
|
|
|
|
|
|
|
|
max_fdb_entries = SJA1105_MAX_L2_LOOKUP_COUNT / num_used_ports;
|
|
|
|
|
|
|
|
for (port = 0; port < ds->num_ports; port++) {
|
|
|
|
if (dsa_is_unused_port(ds, port))
|
|
|
|
continue;
|
|
|
|
|
2021-05-24 16:14:13 +03:00
|
|
|
default_l2_lookup_params.maxaddrp[port] = max_fdb_entries;
|
2021-05-24 16:14:14 +03:00
|
|
|
}
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP_PARAMS];
|
|
|
|
|
|
|
|
if (table->entry_count) {
|
|
|
|
kfree(table->entries);
|
|
|
|
table->entry_count = 0;
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entries = kcalloc(table->ops->max_entry_count,
|
2019-05-02 23:23:30 +03:00
|
|
|
table->ops->unpacked_entry_size, GFP_KERNEL);
|
|
|
|
if (!table->entries)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entry_count = table->ops->max_entry_count;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
/* This table only has a single entry */
|
|
|
|
((struct sja1105_l2_lookup_params_entry *)table->entries)[0] =
|
|
|
|
default_l2_lookup_params;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: use 4095 as the private VLAN for untagged traffic
One thing became visible when writing the blamed commit, and that was
that STP and PTP frames injected by net/dsa/tag_sja1105.c using the
deferred xmit mechanism are always classified to the pvid of the CPU
port, regardless of whatever VLAN there might be in these packets.
So a decision needed to be taken regarding the mechanism through which
we should ensure that delivery of STP and PTP traffic is possible when
we are in a VLAN awareness mode that involves tag_8021q. This is because
tag_8021q is not concerned with managing the pvid of the CPU port, since
as far as tag_8021q is concerned, no traffic should be sent as untagged
from the CPU port. So we end up not actually having a pvid on the CPU
port if we only listen to tag_8021q, and unless we do something about it.
The decision taken at the time was to keep VLAN 1 in the list of
priv->dsa_8021q_vlans, and make it a pvid of the CPU port. This ensures
that STP and PTP frames can always be sent to the outside world.
However there is a problem. If we do the following while we are in
the best_effort_vlan_filtering=true mode:
ip link add br0 type bridge vlan_filtering 1
ip link set swp2 master br0
bridge vlan del dev swp2 vid 1
Then untagged and pvid-tagged frames should be dropped. But we observe
that they aren't, and this is because of the precaution we took that VID
1 is always installed on all ports.
So clearly VLAN 1 is not good for this purpose. What about VLAN 0?
Well, VLAN 0 is managed by the 8021q module, and that module wants to
ensure that 802.1p tagged frames are always received by a port, and are
always transmitted as VLAN-tagged (with VLAN ID 0). Whereas we want our
STP and PTP frames to be untagged if the stack sent them as untagged -
we don't want the driver to just decide out of the blue that it adds
VID 0 to some packets.
So what to do?
Well, there is one other VLAN that is reserved, and that is 4095:
$ ip link add link swp2 name swp2.4095 type vlan id 4095
Error: 8021q: Invalid VLAN id.
$ bridge vlan add dev swp2 vid 4095
Error: bridge: Vlan id is invalid.
After we made this change, VLAN 1 is indeed forwarded and/or dropped
according to the bridge VLAN table, there are no further alterations
done by the sja1105 driver.
Fixes: ec5ae61076d0 ("net: dsa: sja1105: save/restore VLANs using a delta commit method")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 12:25:26 +03:00
|
|
|
/* Set up a default VLAN for untagged traffic injected from the CPU
|
|
|
|
* using management routes (e.g. STP, PTP) as opposed to tag_8021q.
|
|
|
|
* All DT-defined ports are members of this VLAN, and there are no
|
|
|
|
* restrictions on forwarding (since the CPU selects the destination).
|
|
|
|
* Frames from this VLAN will always be transmitted as untagged, and
|
|
|
|
* neither the bridge nor the 8021q module cannot create this VLAN ID.
|
|
|
|
*/
|
2019-05-02 23:23:30 +03:00
|
|
|
static int sja1105_init_static_vlan(struct sja1105_private *priv)
|
|
|
|
{
|
|
|
|
struct sja1105_table *table;
|
|
|
|
struct sja1105_vlan_lookup_entry pvid = {
|
net: dsa: sja1105: add support for the SJA1110 switch family
The SJA1110 is basically an SJA1105 with more ports, some integrated
PHYs (100base-T1 and 100base-TX) and an embedded microcontroller which
can be disabled, and the switch core can be controlled by a host running
Linux, over SPI.
This patch contains:
- the static and dynamic config packing functions, for the tables that
are common with SJA1105
- one more static config tables which is "unique" to the SJA1110
(actually it is a rehash of stuff that was placed somewhere else in
SJA1105): the PCP Remapping Table
- a reset and clock configuration procedure for the SJA1110 switch.
This resets just the switch subsystem, and gates off the clock which
powers on the embedded microcontroller.
- an RGMII delay configuration procedure for SJA1110, which is very
similar to SJA1105, but different enough for us to be unable to reuse
it (this is a pattern that repeats itself)
- some adaptations to dynamic config table entries which are no longer
programmed in the same way. For example, to delete a VLAN, you used to
write an entry through the dynamic reconfiguration interface with the
desired VLAN ID, and with the VALIDENT bit set to false. Now, the VLAN
table entries contain a TYPE_ENTRY field, which must be set to zero
(in a backwards-incompatible way) in order for the entry to be deleted,
or to some other entry for the VLAN to match "inner tagged" or "outer
tagged" packets.
- a similar thing for the static config: the xMII Mode Parameters Table
encoding for SGMII and MII (the latter just when attached to a
100base-TX PHY) just isn't what it used to be in SJA1105. They are
identical, except there is an extra "special" bit which needs to be
set. Set it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-08 12:25:36 +03:00
|
|
|
.type_entry = SJA1110_VLAN_D_TAG,
|
2019-05-02 23:23:30 +03:00
|
|
|
.ving_mirr = 0,
|
|
|
|
.vegr_mirr = 0,
|
|
|
|
.vmemb_port = 0,
|
|
|
|
.vlan_bc = 0,
|
|
|
|
.tag_port = 0,
|
net: dsa: sja1105: use 4095 as the private VLAN for untagged traffic
One thing became visible when writing the blamed commit, and that was
that STP and PTP frames injected by net/dsa/tag_sja1105.c using the
deferred xmit mechanism are always classified to the pvid of the CPU
port, regardless of whatever VLAN there might be in these packets.
So a decision needed to be taken regarding the mechanism through which
we should ensure that delivery of STP and PTP traffic is possible when
we are in a VLAN awareness mode that involves tag_8021q. This is because
tag_8021q is not concerned with managing the pvid of the CPU port, since
as far as tag_8021q is concerned, no traffic should be sent as untagged
from the CPU port. So we end up not actually having a pvid on the CPU
port if we only listen to tag_8021q, and unless we do something about it.
The decision taken at the time was to keep VLAN 1 in the list of
priv->dsa_8021q_vlans, and make it a pvid of the CPU port. This ensures
that STP and PTP frames can always be sent to the outside world.
However there is a problem. If we do the following while we are in
the best_effort_vlan_filtering=true mode:
ip link add br0 type bridge vlan_filtering 1
ip link set swp2 master br0
bridge vlan del dev swp2 vid 1
Then untagged and pvid-tagged frames should be dropped. But we observe
that they aren't, and this is because of the precaution we took that VID
1 is always installed on all ports.
So clearly VLAN 1 is not good for this purpose. What about VLAN 0?
Well, VLAN 0 is managed by the 8021q module, and that module wants to
ensure that 802.1p tagged frames are always received by a port, and are
always transmitted as VLAN-tagged (with VLAN ID 0). Whereas we want our
STP and PTP frames to be untagged if the stack sent them as untagged -
we don't want the driver to just decide out of the blue that it adds
VID 0 to some packets.
So what to do?
Well, there is one other VLAN that is reserved, and that is 4095:
$ ip link add link swp2 name swp2.4095 type vlan id 4095
Error: 8021q: Invalid VLAN id.
$ bridge vlan add dev swp2 vid 4095
Error: bridge: Vlan id is invalid.
After we made this change, VLAN 1 is indeed forwarded and/or dropped
according to the bridge VLAN table, there are no further alterations
done by the sja1105 driver.
Fixes: ec5ae61076d0 ("net: dsa: sja1105: save/restore VLANs using a delta commit method")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 12:25:26 +03:00
|
|
|
.vlanid = SJA1105_DEFAULT_VLAN,
|
2019-05-02 23:23:30 +03:00
|
|
|
};
|
net: dsa: sja1105: save/restore VLANs using a delta commit method
Managing the VLAN table that is present in hardware will become very
difficult once we add a third operating state
(best_effort_vlan_filtering). That is because correct cleanup (not too
little, not too much) becomes virtually impossible, when VLANs can be
added from the bridge layer, from dsa_8021q for basic tagging, for
cross-chip bridging, as well as retagging rules for sub-VLANs and
cross-chip sub-VLANs. So we need to rethink VLAN interaction with the
switch in a more scalable way.
In preparation for that, use the priv->expect_dsa_8021q boolean to
classify any VLAN request received through .port_vlan_add or
.port_vlan_del towards either one of 2 internal lists: bridge VLANs and
dsa_8021q VLANs.
Then, implement a central sja1105_build_vlan_table method that creates a
VLAN configuration from scratch based on the 2 lists of VLANs kept by
the driver, and based on the VLAN awareness state. Currently, if we are
VLAN-unaware, install the dsa_8021q VLANs, otherwise the bridge VLANs.
Then, implement a delta commit procedure that identifies which VLANs
from this new configuration are actually different from the config
previously committed to hardware. We apply the delta through the dynamic
configuration interface (we don't reset the switch). The result is that
the hardware should see the exact sequence of operations as before this
patch.
This also helps remove the "br" argument passed to
dsa_8021q_crosschip_bridge_join, which it was only using to figure out
whether it should commit the configuration back to us or not, based on
the VLAN awareness state of the bridge. We can simplify that, by always
allowing those VLANs inside of our dsa_8021q_vlans list, and committing
those to hardware when necessary.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-05-12 20:20:29 +03:00
|
|
|
struct dsa_switch *ds = priv->ds;
|
|
|
|
int port;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
|
|
|
|
|
|
|
|
if (table->entry_count) {
|
|
|
|
kfree(table->entries);
|
|
|
|
table->entry_count = 0;
|
|
|
|
}
|
|
|
|
|
2020-12-29 21:52:38 +08:00
|
|
|
table->entries = kzalloc(table->ops->unpacked_entry_size,
|
2019-05-02 23:23:30 +03:00
|
|
|
GFP_KERNEL);
|
|
|
|
if (!table->entries)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
table->entry_count = 1;
|
|
|
|
|
net: dsa: sja1105: save/restore VLANs using a delta commit method
Managing the VLAN table that is present in hardware will become very
difficult once we add a third operating state
(best_effort_vlan_filtering). That is because correct cleanup (not too
little, not too much) becomes virtually impossible, when VLANs can be
added from the bridge layer, from dsa_8021q for basic tagging, for
cross-chip bridging, as well as retagging rules for sub-VLANs and
cross-chip sub-VLANs. So we need to rethink VLAN interaction with the
switch in a more scalable way.
In preparation for that, use the priv->expect_dsa_8021q boolean to
classify any VLAN request received through .port_vlan_add or
.port_vlan_del towards either one of 2 internal lists: bridge VLANs and
dsa_8021q VLANs.
Then, implement a central sja1105_build_vlan_table method that creates a
VLAN configuration from scratch based on the 2 lists of VLANs kept by
the driver, and based on the VLAN awareness state. Currently, if we are
VLAN-unaware, install the dsa_8021q VLANs, otherwise the bridge VLANs.
Then, implement a delta commit procedure that identifies which VLANs
from this new configuration are actually different from the config
previously committed to hardware. We apply the delta through the dynamic
configuration interface (we don't reset the switch). The result is that
the hardware should see the exact sequence of operations as before this
patch.
This also helps remove the "br" argument passed to
dsa_8021q_crosschip_bridge_join, which it was only using to figure out
whether it should commit the configuration back to us or not, based on
the VLAN awareness state of the bridge. We can simplify that, by always
allowing those VLANs inside of our dsa_8021q_vlans list, and committing
those to hardware when necessary.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-05-12 20:20:29 +03:00
|
|
|
for (port = 0; port < ds->num_ports; port++) {
|
|
|
|
if (dsa_is_unused_port(ds, port))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
pvid.vmemb_port |= BIT(port);
|
|
|
|
pvid.vlan_bc |= BIT(port);
|
|
|
|
pvid.tag_port &= ~BIT(port);
|
|
|
|
|
2021-08-04 16:54:33 +03:00
|
|
|
if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
priv->tag_8021q_pvid[port] = SJA1105_DEFAULT_VLAN;
|
|
|
|
priv->bridge_pvid[port] = SJA1105_DEFAULT_VLAN;
|
|
|
|
}
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
((struct sja1105_vlan_lookup_entry *)table->entries)[0] = pvid;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
|
|
|
|
{
|
|
|
|
struct sja1105_l2_forwarding_entry *l2fwd;
|
2021-05-24 16:14:13 +03:00
|
|
|
struct dsa_switch *ds = priv->ds;
|
net: dsa: sja1105: suppress TX packets from looping back in "H" topologies
H topologies like this one have a problem:
eth0 eth1
| |
CPU port CPU port
| DSA link |
sw0p0 sw0p1 sw0p2 sw0p3 sw0p4 -------- sw1p4 sw1p3 sw1p2 sw1p1 sw1p0
| | | | | |
user user user user user user
port port port port port port
Basically any packet sent by the eth0 DSA master can be flooded on the
interconnecting DSA link sw0p4 <-> sw1p4 and it will be received by the
eth1 DSA master too. Basically we are talking to ourselves.
In VLAN-unaware mode, these packets are encoded using a tag_8021q TX
VLAN, which dsa_8021q_rcv() rightfully cannot decode and complains.
Whereas in VLAN-aware mode, the packets are encoded with a bridge VLAN
which _can_ be decoded by the tagger running on eth1, so it will attempt
to reinject that packet into the network stack (the bridge, if there is
any port under eth1 that is under a bridge). In the case where the ports
under eth1 are under the same cross-chip bridge as the ports under eth0,
the TX packets will even be learned as RX packets. The only thing that
will prevent loops with the software bridging path, and therefore
disaster, is that the source port and the destination port are in the
same hardware domain, and the bridge will receive packets from the
driver with skb->offload_fwd_mark = true and will not forward between
the two.
The proper solution to this problem is to detect H topologies and
enforce that all packets are received through the local switch and we do
not attempt to receive packets on our CPU port from switches that have
their own. This is a viable solution which works thanks to the fact that
MAC addresses which should be filtered towards the host are installed by
DSA as static MAC addresses towards the CPU port of each switch.
TX from a CPU port towards the DSA port continues to be allowed, this is
because sja1105 supports bridge TX forwarding offload, and the skb->dev
used initially for xmit does not have any direct correlation with where
the station that will respond to that packet is connected. It may very
well happen that when we send a ping through a br0 interface that spans
all switch ports, the xmit packet will exit the system through a DSA
switch interface under eth1 (say sw1p2), but the destination station is
connected to a switch port under eth0, like sw0p0. So the switch under
eth1 needs to communicate on TX with the switch under eth0. The
response, however, will not follow the same path, but instead, this
patch enforces that the response is sent by the first switch directly to
its DSA master which is eth0.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:35 +03:00
|
|
|
struct dsa_switch_tree *dst;
|
2019-05-02 23:23:30 +03:00
|
|
|
struct sja1105_table *table;
|
net: dsa: sja1105: suppress TX packets from looping back in "H" topologies
H topologies like this one have a problem:
eth0 eth1
| |
CPU port CPU port
| DSA link |
sw0p0 sw0p1 sw0p2 sw0p3 sw0p4 -------- sw1p4 sw1p3 sw1p2 sw1p1 sw1p0
| | | | | |
user user user user user user
port port port port port port
Basically any packet sent by the eth0 DSA master can be flooded on the
interconnecting DSA link sw0p4 <-> sw1p4 and it will be received by the
eth1 DSA master too. Basically we are talking to ourselves.
In VLAN-unaware mode, these packets are encoded using a tag_8021q TX
VLAN, which dsa_8021q_rcv() rightfully cannot decode and complains.
Whereas in VLAN-aware mode, the packets are encoded with a bridge VLAN
which _can_ be decoded by the tagger running on eth1, so it will attempt
to reinject that packet into the network stack (the bridge, if there is
any port under eth1 that is under a bridge). In the case where the ports
under eth1 are under the same cross-chip bridge as the ports under eth0,
the TX packets will even be learned as RX packets. The only thing that
will prevent loops with the software bridging path, and therefore
disaster, is that the source port and the destination port are in the
same hardware domain, and the bridge will receive packets from the
driver with skb->offload_fwd_mark = true and will not forward between
the two.
The proper solution to this problem is to detect H topologies and
enforce that all packets are received through the local switch and we do
not attempt to receive packets on our CPU port from switches that have
their own. This is a viable solution which works thanks to the fact that
MAC addresses which should be filtered towards the host are installed by
DSA as static MAC addresses towards the CPU port of each switch.
TX from a CPU port towards the DSA port continues to be allowed, this is
because sja1105 supports bridge TX forwarding offload, and the skb->dev
used initially for xmit does not have any direct correlation with where
the station that will respond to that packet is connected. It may very
well happen that when we send a ping through a br0 interface that spans
all switch ports, the xmit packet will exit the system through a DSA
switch interface under eth1 (say sw1p2), but the destination station is
connected to a switch port under eth0, like sw0p0. So the switch under
eth1 needs to communicate on TX with the switch under eth0. The
response, however, will not follow the same path, but instead, this
patch enforces that the response is sent by the first switch directly to
its DSA master which is eth0.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:35 +03:00
|
|
|
struct dsa_link *dl;
|
net: dsa: sja1105: manage the forwarding domain towards DSA ports
Manage DSA links towards other switches, be they host ports or cascade
ports, the same as the CPU port, i.e. allow forwarding and flooding
unconditionally from all user ports.
We send packets as always VLAN-tagged on a DSA port, and we rely on the
cross-chip notifiers from tag_8021q to install the RX VLAN of a switch
port only on the proper remote ports of another switch (the ports that
are in the same bridging domain). So if there is no cross-chip bridging
in the system, the flooded packets will be sent on the DSA ports too,
but they will be dropped by the remote switches due to either
(a) a lack of the RX VLAN in the VLAN table of the ingress DSA port, or
(b) a lack of valid destinations for those packets, due to a lack of the
RX VLAN on the user ports of the switch
Note that switches which only transport packets in a cross-chip bridge,
but have no user ports of their own as part of that bridge, such as
switch 1 in this case:
DSA link DSA link
sw0p0 sw0p1 sw0p2 -------- sw1p0 sw1p2 sw1p3 -------- sw2p0 sw2p2 sw2p3
ip link set sw0p0 master br0
ip link set sw2p3 master br0
will still work, because the tag_8021q cross-chip notifiers keep the RX
VLANs installed on all DSA ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:32 +03:00
|
|
|
int port, tc;
|
|
|
|
int from, to;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING];
|
|
|
|
|
|
|
|
if (table->entry_count) {
|
|
|
|
kfree(table->entries);
|
|
|
|
table->entry_count = 0;
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entries = kcalloc(table->ops->max_entry_count,
|
2019-05-02 23:23:30 +03:00
|
|
|
table->ops->unpacked_entry_size, GFP_KERNEL);
|
|
|
|
if (!table->entries)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entry_count = table->ops->max_entry_count;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
l2fwd = table->entries;
|
|
|
|
|
net: dsa: sja1105: manage the forwarding domain towards DSA ports
Manage DSA links towards other switches, be they host ports or cascade
ports, the same as the CPU port, i.e. allow forwarding and flooding
unconditionally from all user ports.
We send packets as always VLAN-tagged on a DSA port, and we rely on the
cross-chip notifiers from tag_8021q to install the RX VLAN of a switch
port only on the proper remote ports of another switch (the ports that
are in the same bridging domain). So if there is no cross-chip bridging
in the system, the flooded packets will be sent on the DSA ports too,
but they will be dropped by the remote switches due to either
(a) a lack of the RX VLAN in the VLAN table of the ingress DSA port, or
(b) a lack of valid destinations for those packets, due to a lack of the
RX VLAN on the user ports of the switch
Note that switches which only transport packets in a cross-chip bridge,
but have no user ports of their own as part of that bridge, such as
switch 1 in this case:
DSA link DSA link
sw0p0 sw0p1 sw0p2 -------- sw1p0 sw1p2 sw1p3 -------- sw2p0 sw2p2 sw2p3
ip link set sw0p0 master br0
ip link set sw2p3 master br0
will still work, because the tag_8021q cross-chip notifiers keep the RX
VLANs installed on all DSA ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:32 +03:00
|
|
|
/* First 5 entries in the L2 Forwarding Table define the forwarding
|
|
|
|
* rules and the VLAN PCP to ingress queue mapping.
|
|
|
|
* Set up the ingress queue mapping first.
|
|
|
|
*/
|
|
|
|
for (port = 0; port < ds->num_ports; port++) {
|
|
|
|
if (dsa_is_unused_port(ds, port))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for (tc = 0; tc < SJA1105_NUM_TC; tc++)
|
|
|
|
l2fwd[port].vlan_pmap[tc] = tc;
|
|
|
|
}
|
2019-05-02 23:23:30 +03:00
|
|
|
|
net: dsa: sja1105: manage the forwarding domain towards DSA ports
Manage DSA links towards other switches, be they host ports or cascade
ports, the same as the CPU port, i.e. allow forwarding and flooding
unconditionally from all user ports.
We send packets as always VLAN-tagged on a DSA port, and we rely on the
cross-chip notifiers from tag_8021q to install the RX VLAN of a switch
port only on the proper remote ports of another switch (the ports that
are in the same bridging domain). So if there is no cross-chip bridging
in the system, the flooded packets will be sent on the DSA ports too,
but they will be dropped by the remote switches due to either
(a) a lack of the RX VLAN in the VLAN table of the ingress DSA port, or
(b) a lack of valid destinations for those packets, due to a lack of the
RX VLAN on the user ports of the switch
Note that switches which only transport packets in a cross-chip bridge,
but have no user ports of their own as part of that bridge, such as
switch 1 in this case:
DSA link DSA link
sw0p0 sw0p1 sw0p2 -------- sw1p0 sw1p2 sw1p3 -------- sw2p0 sw2p2 sw2p3
ip link set sw0p0 master br0
ip link set sw2p3 master br0
will still work, because the tag_8021q cross-chip notifiers keep the RX
VLANs installed on all DSA ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:32 +03:00
|
|
|
/* Then manage the forwarding domain for user ports. These can forward
|
|
|
|
* only to the always-on domain (CPU port and DSA links)
|
|
|
|
*/
|
|
|
|
for (from = 0; from < ds->num_ports; from++) {
|
|
|
|
if (!dsa_is_user_port(ds, from))
|
2021-05-24 16:14:14 +03:00
|
|
|
continue;
|
|
|
|
|
net: dsa: sja1105: manage the forwarding domain towards DSA ports
Manage DSA links towards other switches, be they host ports or cascade
ports, the same as the CPU port, i.e. allow forwarding and flooding
unconditionally from all user ports.
We send packets as always VLAN-tagged on a DSA port, and we rely on the
cross-chip notifiers from tag_8021q to install the RX VLAN of a switch
port only on the proper remote ports of another switch (the ports that
are in the same bridging domain). So if there is no cross-chip bridging
in the system, the flooded packets will be sent on the DSA ports too,
but they will be dropped by the remote switches due to either
(a) a lack of the RX VLAN in the VLAN table of the ingress DSA port, or
(b) a lack of valid destinations for those packets, due to a lack of the
RX VLAN on the user ports of the switch
Note that switches which only transport packets in a cross-chip bridge,
but have no user ports of their own as part of that bridge, such as
switch 1 in this case:
DSA link DSA link
sw0p0 sw0p1 sw0p2 -------- sw1p0 sw1p2 sw1p3 -------- sw2p0 sw2p2 sw2p3
ip link set sw0p0 master br0
ip link set sw2p3 master br0
will still work, because the tag_8021q cross-chip notifiers keep the RX
VLANs installed on all DSA ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:32 +03:00
|
|
|
for (to = 0; to < ds->num_ports; to++) {
|
|
|
|
if (!dsa_is_cpu_port(ds, to) &&
|
|
|
|
!dsa_is_dsa_port(ds, to))
|
|
|
|
continue;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
net: dsa: sja1105: manage the forwarding domain towards DSA ports
Manage DSA links towards other switches, be they host ports or cascade
ports, the same as the CPU port, i.e. allow forwarding and flooding
unconditionally from all user ports.
We send packets as always VLAN-tagged on a DSA port, and we rely on the
cross-chip notifiers from tag_8021q to install the RX VLAN of a switch
port only on the proper remote ports of another switch (the ports that
are in the same bridging domain). So if there is no cross-chip bridging
in the system, the flooded packets will be sent on the DSA ports too,
but they will be dropped by the remote switches due to either
(a) a lack of the RX VLAN in the VLAN table of the ingress DSA port, or
(b) a lack of valid destinations for those packets, due to a lack of the
RX VLAN on the user ports of the switch
Note that switches which only transport packets in a cross-chip bridge,
but have no user ports of their own as part of that bridge, such as
switch 1 in this case:
DSA link DSA link
sw0p0 sw0p1 sw0p2 -------- sw1p0 sw1p2 sw1p3 -------- sw2p0 sw2p2 sw2p3
ip link set sw0p0 master br0
ip link set sw2p3 master br0
will still work, because the tag_8021q cross-chip notifiers keep the RX
VLANs installed on all DSA ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:32 +03:00
|
|
|
l2fwd[from].bc_domain |= BIT(to);
|
|
|
|
l2fwd[from].fl_domain |= BIT(to);
|
2021-02-16 13:41:19 +02:00
|
|
|
|
net: dsa: sja1105: manage the forwarding domain towards DSA ports
Manage DSA links towards other switches, be they host ports or cascade
ports, the same as the CPU port, i.e. allow forwarding and flooding
unconditionally from all user ports.
We send packets as always VLAN-tagged on a DSA port, and we rely on the
cross-chip notifiers from tag_8021q to install the RX VLAN of a switch
port only on the proper remote ports of another switch (the ports that
are in the same bridging domain). So if there is no cross-chip bridging
in the system, the flooded packets will be sent on the DSA ports too,
but they will be dropped by the remote switches due to either
(a) a lack of the RX VLAN in the VLAN table of the ingress DSA port, or
(b) a lack of valid destinations for those packets, due to a lack of the
RX VLAN on the user ports of the switch
Note that switches which only transport packets in a cross-chip bridge,
but have no user ports of their own as part of that bridge, such as
switch 1 in this case:
DSA link DSA link
sw0p0 sw0p1 sw0p2 -------- sw1p0 sw1p2 sw1p3 -------- sw2p0 sw2p2 sw2p3
ip link set sw0p0 master br0
ip link set sw2p3 master br0
will still work, because the tag_8021q cross-chip notifiers keep the RX
VLANs installed on all DSA ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:32 +03:00
|
|
|
sja1105_port_allow_traffic(l2fwd, from, to, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Then manage the forwarding domain for DSA links and CPU ports (the
|
|
|
|
* always-on domain). These can send packets to any enabled port except
|
|
|
|
* themselves.
|
|
|
|
*/
|
|
|
|
for (from = 0; from < ds->num_ports; from++) {
|
|
|
|
if (!dsa_is_cpu_port(ds, from) && !dsa_is_dsa_port(ds, from))
|
2019-05-02 23:23:30 +03:00
|
|
|
continue;
|
|
|
|
|
net: dsa: sja1105: manage the forwarding domain towards DSA ports
Manage DSA links towards other switches, be they host ports or cascade
ports, the same as the CPU port, i.e. allow forwarding and flooding
unconditionally from all user ports.
We send packets as always VLAN-tagged on a DSA port, and we rely on the
cross-chip notifiers from tag_8021q to install the RX VLAN of a switch
port only on the proper remote ports of another switch (the ports that
are in the same bridging domain). So if there is no cross-chip bridging
in the system, the flooded packets will be sent on the DSA ports too,
but they will be dropped by the remote switches due to either
(a) a lack of the RX VLAN in the VLAN table of the ingress DSA port, or
(b) a lack of valid destinations for those packets, due to a lack of the
RX VLAN on the user ports of the switch
Note that switches which only transport packets in a cross-chip bridge,
but have no user ports of their own as part of that bridge, such as
switch 1 in this case:
DSA link DSA link
sw0p0 sw0p1 sw0p2 -------- sw1p0 sw1p2 sw1p3 -------- sw2p0 sw2p2 sw2p3
ip link set sw0p0 master br0
ip link set sw2p3 master br0
will still work, because the tag_8021q cross-chip notifiers keep the RX
VLANs installed on all DSA ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:32 +03:00
|
|
|
for (to = 0; to < ds->num_ports; to++) {
|
|
|
|
if (dsa_is_unused_port(ds, to))
|
|
|
|
continue;
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
|
net: dsa: sja1105: manage the forwarding domain towards DSA ports
Manage DSA links towards other switches, be they host ports or cascade
ports, the same as the CPU port, i.e. allow forwarding and flooding
unconditionally from all user ports.
We send packets as always VLAN-tagged on a DSA port, and we rely on the
cross-chip notifiers from tag_8021q to install the RX VLAN of a switch
port only on the proper remote ports of another switch (the ports that
are in the same bridging domain). So if there is no cross-chip bridging
in the system, the flooded packets will be sent on the DSA ports too,
but they will be dropped by the remote switches due to either
(a) a lack of the RX VLAN in the VLAN table of the ingress DSA port, or
(b) a lack of valid destinations for those packets, due to a lack of the
RX VLAN on the user ports of the switch
Note that switches which only transport packets in a cross-chip bridge,
but have no user ports of their own as part of that bridge, such as
switch 1 in this case:
DSA link DSA link
sw0p0 sw0p1 sw0p2 -------- sw1p0 sw1p2 sw1p3 -------- sw2p0 sw2p2 sw2p3
ip link set sw0p0 master br0
ip link set sw2p3 master br0
will still work, because the tag_8021q cross-chip notifiers keep the RX
VLANs installed on all DSA ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:32 +03:00
|
|
|
if (from == to)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
l2fwd[from].bc_domain |= BIT(to);
|
|
|
|
l2fwd[from].fl_domain |= BIT(to);
|
|
|
|
|
|
|
|
sja1105_port_allow_traffic(l2fwd, from, to, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: suppress TX packets from looping back in "H" topologies
H topologies like this one have a problem:
eth0 eth1
| |
CPU port CPU port
| DSA link |
sw0p0 sw0p1 sw0p2 sw0p3 sw0p4 -------- sw1p4 sw1p3 sw1p2 sw1p1 sw1p0
| | | | | |
user user user user user user
port port port port port port
Basically any packet sent by the eth0 DSA master can be flooded on the
interconnecting DSA link sw0p4 <-> sw1p4 and it will be received by the
eth1 DSA master too. Basically we are talking to ourselves.
In VLAN-unaware mode, these packets are encoded using a tag_8021q TX
VLAN, which dsa_8021q_rcv() rightfully cannot decode and complains.
Whereas in VLAN-aware mode, the packets are encoded with a bridge VLAN
which _can_ be decoded by the tagger running on eth1, so it will attempt
to reinject that packet into the network stack (the bridge, if there is
any port under eth1 that is under a bridge). In the case where the ports
under eth1 are under the same cross-chip bridge as the ports under eth0,
the TX packets will even be learned as RX packets. The only thing that
will prevent loops with the software bridging path, and therefore
disaster, is that the source port and the destination port are in the
same hardware domain, and the bridge will receive packets from the
driver with skb->offload_fwd_mark = true and will not forward between
the two.
The proper solution to this problem is to detect H topologies and
enforce that all packets are received through the local switch and we do
not attempt to receive packets on our CPU port from switches that have
their own. This is a viable solution which works thanks to the fact that
MAC addresses which should be filtered towards the host are installed by
DSA as static MAC addresses towards the CPU port of each switch.
TX from a CPU port towards the DSA port continues to be allowed, this is
because sja1105 supports bridge TX forwarding offload, and the skb->dev
used initially for xmit does not have any direct correlation with where
the station that will respond to that packet is connected. It may very
well happen that when we send a ping through a br0 interface that spans
all switch ports, the xmit packet will exit the system through a DSA
switch interface under eth1 (say sw1p2), but the destination station is
connected to a switch port under eth0, like sw0p0. So the switch under
eth1 needs to communicate on TX with the switch under eth0. The
response, however, will not follow the same path, but instead, this
patch enforces that the response is sent by the first switch directly to
its DSA master which is eth0.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:35 +03:00
|
|
|
/* In odd topologies ("H" connections where there is a DSA link to
|
|
|
|
* another switch which also has its own CPU port), TX packets can loop
|
|
|
|
* back into the system (they are flooded from CPU port 1 to the DSA
|
|
|
|
* link, and from there to CPU port 2). Prevent this from happening by
|
|
|
|
* cutting RX from DSA links towards our CPU port, if the remote switch
|
|
|
|
* has its own CPU port and therefore doesn't need ours for network
|
|
|
|
* stack termination.
|
|
|
|
*/
|
|
|
|
dst = ds->dst;
|
|
|
|
|
|
|
|
list_for_each_entry(dl, &dst->rtable, list) {
|
|
|
|
if (dl->dp->ds != ds || dl->link_dp->cpu_dp == dl->dp->cpu_dp)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
from = dl->dp->index;
|
|
|
|
to = dsa_upstream_port(ds, from);
|
|
|
|
|
|
|
|
dev_warn(ds->dev,
|
|
|
|
"H topology detected, cutting RX from DSA link %d to CPU port %d to prevent TX packet loops\n",
|
|
|
|
from, to);
|
|
|
|
|
|
|
|
sja1105_port_allow_traffic(l2fwd, from, to, false);
|
|
|
|
|
|
|
|
l2fwd[from].bc_domain &= ~BIT(to);
|
|
|
|
l2fwd[from].fl_domain &= ~BIT(to);
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: manage the forwarding domain towards DSA ports
Manage DSA links towards other switches, be they host ports or cascade
ports, the same as the CPU port, i.e. allow forwarding and flooding
unconditionally from all user ports.
We send packets as always VLAN-tagged on a DSA port, and we rely on the
cross-chip notifiers from tag_8021q to install the RX VLAN of a switch
port only on the proper remote ports of another switch (the ports that
are in the same bridging domain). So if there is no cross-chip bridging
in the system, the flooded packets will be sent on the DSA ports too,
but they will be dropped by the remote switches due to either
(a) a lack of the RX VLAN in the VLAN table of the ingress DSA port, or
(b) a lack of valid destinations for those packets, due to a lack of the
RX VLAN on the user ports of the switch
Note that switches which only transport packets in a cross-chip bridge,
but have no user ports of their own as part of that bridge, such as
switch 1 in this case:
DSA link DSA link
sw0p0 sw0p1 sw0p2 -------- sw1p0 sw1p2 sw1p3 -------- sw2p0 sw2p2 sw2p3
ip link set sw0p0 master br0
ip link set sw2p3 master br0
will still work, because the tag_8021q cross-chip notifiers keep the RX
VLANs installed on all DSA ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:32 +03:00
|
|
|
/* Finally, manage the egress flooding domain. All ports start up with
|
|
|
|
* flooding enabled, including the CPU port and DSA links.
|
|
|
|
*/
|
|
|
|
for (port = 0; port < ds->num_ports; port++) {
|
|
|
|
if (dsa_is_unused_port(ds, port))
|
|
|
|
continue;
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
|
net: dsa: sja1105: manage the forwarding domain towards DSA ports
Manage DSA links towards other switches, be they host ports or cascade
ports, the same as the CPU port, i.e. allow forwarding and flooding
unconditionally from all user ports.
We send packets as always VLAN-tagged on a DSA port, and we rely on the
cross-chip notifiers from tag_8021q to install the RX VLAN of a switch
port only on the proper remote ports of another switch (the ports that
are in the same bridging domain). So if there is no cross-chip bridging
in the system, the flooded packets will be sent on the DSA ports too,
but they will be dropped by the remote switches due to either
(a) a lack of the RX VLAN in the VLAN table of the ingress DSA port, or
(b) a lack of valid destinations for those packets, due to a lack of the
RX VLAN on the user ports of the switch
Note that switches which only transport packets in a cross-chip bridge,
but have no user ports of their own as part of that bridge, such as
switch 1 in this case:
DSA link DSA link
sw0p0 sw0p1 sw0p2 -------- sw1p0 sw1p2 sw1p3 -------- sw2p0 sw2p2 sw2p3
ip link set sw0p0 master br0
ip link set sw2p3 master br0
will still work, because the tag_8021q cross-chip notifiers keep the RX
VLANs installed on all DSA ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:32 +03:00
|
|
|
priv->ucast_egress_floods |= BIT(port);
|
|
|
|
priv->bcast_egress_floods |= BIT(port);
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
2021-05-24 16:14:14 +03:00
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
/* Next 8 entries define VLAN PCP mapping from ingress to egress.
|
|
|
|
* Create a one-to-one mapping.
|
|
|
|
*/
|
net: dsa: sja1105: manage the forwarding domain towards DSA ports
Manage DSA links towards other switches, be they host ports or cascade
ports, the same as the CPU port, i.e. allow forwarding and flooding
unconditionally from all user ports.
We send packets as always VLAN-tagged on a DSA port, and we rely on the
cross-chip notifiers from tag_8021q to install the RX VLAN of a switch
port only on the proper remote ports of another switch (the ports that
are in the same bridging domain). So if there is no cross-chip bridging
in the system, the flooded packets will be sent on the DSA ports too,
but they will be dropped by the remote switches due to either
(a) a lack of the RX VLAN in the VLAN table of the ingress DSA port, or
(b) a lack of valid destinations for those packets, due to a lack of the
RX VLAN on the user ports of the switch
Note that switches which only transport packets in a cross-chip bridge,
but have no user ports of their own as part of that bridge, such as
switch 1 in this case:
DSA link DSA link
sw0p0 sw0p1 sw0p2 -------- sw1p0 sw1p2 sw1p3 -------- sw2p0 sw2p2 sw2p3
ip link set sw0p0 master br0
ip link set sw2p3 master br0
will still work, because the tag_8021q cross-chip notifiers keep the RX
VLANs installed on all DSA ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:32 +03:00
|
|
|
for (tc = 0; tc < SJA1105_NUM_TC; tc++) {
|
|
|
|
for (port = 0; port < ds->num_ports; port++) {
|
|
|
|
if (dsa_is_unused_port(ds, port))
|
2021-05-24 16:14:14 +03:00
|
|
|
continue;
|
|
|
|
|
net: dsa: sja1105: manage the forwarding domain towards DSA ports
Manage DSA links towards other switches, be they host ports or cascade
ports, the same as the CPU port, i.e. allow forwarding and flooding
unconditionally from all user ports.
We send packets as always VLAN-tagged on a DSA port, and we rely on the
cross-chip notifiers from tag_8021q to install the RX VLAN of a switch
port only on the proper remote ports of another switch (the ports that
are in the same bridging domain). So if there is no cross-chip bridging
in the system, the flooded packets will be sent on the DSA ports too,
but they will be dropped by the remote switches due to either
(a) a lack of the RX VLAN in the VLAN table of the ingress DSA port, or
(b) a lack of valid destinations for those packets, due to a lack of the
RX VLAN on the user ports of the switch
Note that switches which only transport packets in a cross-chip bridge,
but have no user ports of their own as part of that bridge, such as
switch 1 in this case:
DSA link DSA link
sw0p0 sw0p1 sw0p2 -------- sw1p0 sw1p2 sw1p3 -------- sw2p0 sw2p2 sw2p3
ip link set sw0p0 master br0
ip link set sw2p3 master br0
will still work, because the tag_8021q cross-chip notifiers keep the RX
VLANs installed on all DSA ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:32 +03:00
|
|
|
l2fwd[ds->num_ports + tc].vlan_pmap[port] = tc;
|
2021-05-24 16:14:14 +03:00
|
|
|
}
|
net: dsa: sja1105: add support for the SJA1110 switch family
The SJA1110 is basically an SJA1105 with more ports, some integrated
PHYs (100base-T1 and 100base-TX) and an embedded microcontroller which
can be disabled, and the switch core can be controlled by a host running
Linux, over SPI.
This patch contains:
- the static and dynamic config packing functions, for the tables that
are common with SJA1105
- one more static config tables which is "unique" to the SJA1110
(actually it is a rehash of stuff that was placed somewhere else in
SJA1105): the PCP Remapping Table
- a reset and clock configuration procedure for the SJA1110 switch.
This resets just the switch subsystem, and gates off the clock which
powers on the embedded microcontroller.
- an RGMII delay configuration procedure for SJA1110, which is very
similar to SJA1105, but different enough for us to be unable to reuse
it (this is a pattern that repeats itself)
- some adaptations to dynamic config table entries which are no longer
programmed in the same way. For example, to delete a VLAN, you used to
write an entry through the dynamic reconfiguration interface with the
desired VLAN ID, and with the VALIDENT bit set to false. Now, the VLAN
table entries contain a TYPE_ENTRY field, which must be set to zero
(in a backwards-incompatible way) in order for the entry to be deleted,
or to some other entry for the VLAN to match "inner tagged" or "outer
tagged" packets.
- a similar thing for the static config: the xMII Mode Parameters Table
encoding for SGMII and MII (the latter just when attached to a
100base-TX PHY) just isn't what it used to be in SJA1105. They are
identical, except there is an extra "special" bit which needs to be
set. Set it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-08 12:25:36 +03:00
|
|
|
|
net: dsa: sja1105: manage the forwarding domain towards DSA ports
Manage DSA links towards other switches, be they host ports or cascade
ports, the same as the CPU port, i.e. allow forwarding and flooding
unconditionally from all user ports.
We send packets as always VLAN-tagged on a DSA port, and we rely on the
cross-chip notifiers from tag_8021q to install the RX VLAN of a switch
port only on the proper remote ports of another switch (the ports that
are in the same bridging domain). So if there is no cross-chip bridging
in the system, the flooded packets will be sent on the DSA ports too,
but they will be dropped by the remote switches due to either
(a) a lack of the RX VLAN in the VLAN table of the ingress DSA port, or
(b) a lack of valid destinations for those packets, due to a lack of the
RX VLAN on the user ports of the switch
Note that switches which only transport packets in a cross-chip bridge,
but have no user ports of their own as part of that bridge, such as
switch 1 in this case:
DSA link DSA link
sw0p0 sw0p1 sw0p2 -------- sw1p0 sw1p2 sw1p3 -------- sw2p0 sw2p2 sw2p3
ip link set sw0p0 master br0
ip link set sw2p3 master br0
will still work, because the tag_8021q cross-chip notifiers keep the RX
VLANs installed on all DSA ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-04 16:54:32 +03:00
|
|
|
l2fwd[ds->num_ports + tc].type_egrpcp2outputq = true;
|
net: dsa: sja1105: add support for the SJA1110 switch family
The SJA1110 is basically an SJA1105 with more ports, some integrated
PHYs (100base-T1 and 100base-TX) and an embedded microcontroller which
can be disabled, and the switch core can be controlled by a host running
Linux, over SPI.
This patch contains:
- the static and dynamic config packing functions, for the tables that
are common with SJA1105
- one more static config tables which is "unique" to the SJA1110
(actually it is a rehash of stuff that was placed somewhere else in
SJA1105): the PCP Remapping Table
- a reset and clock configuration procedure for the SJA1110 switch.
This resets just the switch subsystem, and gates off the clock which
powers on the embedded microcontroller.
- an RGMII delay configuration procedure for SJA1110, which is very
similar to SJA1105, but different enough for us to be unable to reuse
it (this is a pattern that repeats itself)
- some adaptations to dynamic config table entries which are no longer
programmed in the same way. For example, to delete a VLAN, you used to
write an entry through the dynamic reconfiguration interface with the
desired VLAN ID, and with the VALIDENT bit set to false. Now, the VLAN
table entries contain a TYPE_ENTRY field, which must be set to zero
(in a backwards-incompatible way) in order for the entry to be deleted,
or to some other entry for the VLAN to match "inner tagged" or "outer
tagged" packets.
- a similar thing for the static config: the xMII Mode Parameters Table
encoding for SGMII and MII (the latter just when attached to a
100base-TX PHY) just isn't what it used to be in SJA1105. They are
identical, except there is an extra "special" bit which needs to be
set. Set it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-08 12:25:36 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1110_init_pcp_remapping(struct sja1105_private *priv)
|
|
|
|
{
|
|
|
|
struct sja1110_pcp_remapping_entry *pcp_remap;
|
|
|
|
struct dsa_switch *ds = priv->ds;
|
|
|
|
struct sja1105_table *table;
|
|
|
|
int port, tc;
|
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_PCP_REMAPPING];
|
|
|
|
|
|
|
|
/* Nothing to do for SJA1105 */
|
|
|
|
if (!table->ops->max_entry_count)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (table->entry_count) {
|
|
|
|
kfree(table->entries);
|
|
|
|
table->entry_count = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
table->entries = kcalloc(table->ops->max_entry_count,
|
|
|
|
table->ops->unpacked_entry_size, GFP_KERNEL);
|
|
|
|
if (!table->entries)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
table->entry_count = table->ops->max_entry_count;
|
|
|
|
|
|
|
|
pcp_remap = table->entries;
|
|
|
|
|
|
|
|
/* Repeat the configuration done for vlan_pmap */
|
|
|
|
for (port = 0; port < ds->num_ports; port++) {
|
|
|
|
if (dsa_is_unused_port(ds, port))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for (tc = 0; tc < SJA1105_NUM_TC; tc++)
|
|
|
|
pcp_remap[port].egrpcp[tc] = tc;
|
2021-05-24 16:14:14 +03:00
|
|
|
}
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_init_l2_forwarding_params(struct sja1105_private *priv)
|
|
|
|
{
|
2021-05-24 16:14:21 +03:00
|
|
|
struct sja1105_l2_forwarding_params_entry *l2fwd_params;
|
2019-05-02 23:23:30 +03:00
|
|
|
struct sja1105_table *table;
|
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING_PARAMS];
|
|
|
|
|
|
|
|
if (table->entry_count) {
|
|
|
|
kfree(table->entries);
|
|
|
|
table->entry_count = 0;
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entries = kcalloc(table->ops->max_entry_count,
|
2019-05-02 23:23:30 +03:00
|
|
|
table->ops->unpacked_entry_size, GFP_KERNEL);
|
|
|
|
if (!table->entries)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entry_count = table->ops->max_entry_count;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
/* This table only has a single entry */
|
2021-05-24 16:14:21 +03:00
|
|
|
l2fwd_params = table->entries;
|
|
|
|
|
|
|
|
/* Disallow dynamic reconfiguration of vlan_pmap */
|
|
|
|
l2fwd_params->max_dynp = 0;
|
|
|
|
/* Use a single memory partition for all ingress queues */
|
|
|
|
l2fwd_params->part_spc[0] = priv->info->max_frame_mem;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-05-12 20:20:37 +03:00
|
|
|
void sja1105_frame_memory_partitioning(struct sja1105_private *priv)
|
|
|
|
{
|
|
|
|
struct sja1105_l2_forwarding_params_entry *l2_fwd_params;
|
|
|
|
struct sja1105_vl_forwarding_params_entry *vl_fwd_params;
|
|
|
|
struct sja1105_table *table;
|
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING_PARAMS];
|
|
|
|
l2_fwd_params = table->entries;
|
2021-07-19 20:14:42 +03:00
|
|
|
l2_fwd_params->part_spc[0] = SJA1105_MAX_FRAME_MEMORY;
|
2020-05-12 20:20:37 +03:00
|
|
|
|
|
|
|
/* If we have any critical-traffic virtual links, we need to reserve
|
|
|
|
* some frame buffer memory for them. At the moment, hardcode the value
|
|
|
|
* at 100 blocks of 128 bytes of memory each. This leaves 829 blocks
|
|
|
|
* remaining for best-effort traffic. TODO: figure out a more flexible
|
|
|
|
* way to perform the frame buffer partitioning.
|
|
|
|
*/
|
|
|
|
if (!priv->static_config.tables[BLK_IDX_VL_FORWARDING].entry_count)
|
|
|
|
return;
|
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_VL_FORWARDING_PARAMS];
|
|
|
|
vl_fwd_params = table->entries;
|
|
|
|
|
|
|
|
l2_fwd_params->part_spc[0] -= SJA1105_VL_FRAME_MEMORY;
|
|
|
|
vl_fwd_params->partspc[0] = SJA1105_VL_FRAME_MEMORY;
|
|
|
|
}
|
|
|
|
|
2021-06-08 12:25:37 +03:00
|
|
|
/* SJA1110 TDMACONFIGIDX values:
|
|
|
|
*
|
|
|
|
* | 100 Mbps ports | 1Gbps ports | 2.5Gbps ports | Disabled ports
|
|
|
|
* -----+----------------+---------------+---------------+---------------
|
|
|
|
* 0 | 0, [5:10] | [1:2] | [3:4] | retag
|
|
|
|
* 1 |0, [5:10], retag| [1:2] | [3:4] | -
|
|
|
|
* 2 | 0, [5:10] | [1:3], retag | 4 | -
|
|
|
|
* 3 | 0, [5:10] |[1:2], 4, retag| 3 | -
|
|
|
|
* 4 | 0, 2, [5:10] | 1, retag | [3:4] | -
|
|
|
|
* 5 | 0, 1, [5:10] | 2, retag | [3:4] | -
|
|
|
|
* 14 | 0, [5:10] | [1:4], retag | - | -
|
|
|
|
* 15 | [5:10] | [0:4], retag | - | -
|
|
|
|
*/
|
|
|
|
static void sja1110_select_tdmaconfigidx(struct sja1105_private *priv)
|
|
|
|
{
|
|
|
|
struct sja1105_general_params_entry *general_params;
|
|
|
|
struct sja1105_table *table;
|
|
|
|
bool port_1_is_base_tx;
|
|
|
|
bool port_3_is_2500;
|
|
|
|
bool port_4_is_2500;
|
|
|
|
u64 tdmaconfigidx;
|
|
|
|
|
|
|
|
if (priv->info->device_id != SJA1110_DEVICE_ID)
|
|
|
|
return;
|
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
|
|
|
|
general_params = table->entries;
|
|
|
|
|
|
|
|
/* All the settings below are "as opposed to SGMII", which is the
|
|
|
|
* other pinmuxing option.
|
|
|
|
*/
|
|
|
|
port_1_is_base_tx = priv->phy_mode[1] == PHY_INTERFACE_MODE_INTERNAL;
|
|
|
|
port_3_is_2500 = priv->phy_mode[3] == PHY_INTERFACE_MODE_2500BASEX;
|
|
|
|
port_4_is_2500 = priv->phy_mode[4] == PHY_INTERFACE_MODE_2500BASEX;
|
|
|
|
|
|
|
|
if (port_1_is_base_tx)
|
|
|
|
/* Retagging port will operate at 1 Gbps */
|
|
|
|
tdmaconfigidx = 5;
|
|
|
|
else if (port_3_is_2500 && port_4_is_2500)
|
|
|
|
/* Retagging port will operate at 100 Mbps */
|
|
|
|
tdmaconfigidx = 1;
|
|
|
|
else if (port_3_is_2500)
|
|
|
|
/* Retagging port will operate at 1 Gbps */
|
|
|
|
tdmaconfigidx = 3;
|
|
|
|
else if (port_4_is_2500)
|
|
|
|
/* Retagging port will operate at 1 Gbps */
|
|
|
|
tdmaconfigidx = 2;
|
|
|
|
else
|
|
|
|
/* Retagging port will operate at 1 Gbps */
|
|
|
|
tdmaconfigidx = 14;
|
|
|
|
|
|
|
|
general_params->tdmaconfigidx = tdmaconfigidx;
|
|
|
|
}
|
|
|
|
|
2021-08-04 16:54:31 +03:00
|
|
|
static int sja1105_init_topology(struct sja1105_private *priv,
|
|
|
|
struct sja1105_general_params_entry *general_params)
|
|
|
|
{
|
|
|
|
struct dsa_switch *ds = priv->ds;
|
|
|
|
int port;
|
|
|
|
|
|
|
|
/* The host port is the destination for traffic matching mac_fltres1
|
|
|
|
* and mac_fltres0 on all ports except itself. Default to an invalid
|
|
|
|
* value.
|
|
|
|
*/
|
|
|
|
general_params->host_port = ds->num_ports;
|
|
|
|
|
|
|
|
/* Link-local traffic received on casc_port will be forwarded
|
|
|
|
* to host_port without embedding the source port and device ID
|
|
|
|
* info in the destination MAC address, and no RX timestamps will be
|
|
|
|
* taken either (presumably because it is a cascaded port and a
|
|
|
|
* downstream SJA switch already did that).
|
|
|
|
* To disable the feature, we need to do different things depending on
|
|
|
|
* switch generation. On SJA1105 we need to set an invalid port, while
|
|
|
|
* on SJA1110 which support multiple cascaded ports, this field is a
|
|
|
|
* bitmask so it must be left zero.
|
|
|
|
*/
|
|
|
|
if (!priv->info->multiple_cascade_ports)
|
|
|
|
general_params->casc_port = ds->num_ports;
|
|
|
|
|
|
|
|
for (port = 0; port < ds->num_ports; port++) {
|
|
|
|
bool is_upstream = dsa_is_upstream_port(ds, port);
|
|
|
|
bool is_dsa_link = dsa_is_dsa_port(ds, port);
|
|
|
|
|
|
|
|
/* Upstream ports can be dedicated CPU ports or
|
|
|
|
* upstream-facing DSA links
|
|
|
|
*/
|
|
|
|
if (is_upstream) {
|
|
|
|
if (general_params->host_port == ds->num_ports) {
|
|
|
|
general_params->host_port = port;
|
|
|
|
} else {
|
|
|
|
dev_err(ds->dev,
|
|
|
|
"Port %llu is already a host port, configuring %d as one too is not supported\n",
|
|
|
|
general_params->host_port, port);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Cascade ports are downstream-facing DSA links */
|
|
|
|
if (is_dsa_link && !is_upstream) {
|
|
|
|
if (priv->info->multiple_cascade_ports) {
|
|
|
|
general_params->casc_port |= BIT(port);
|
|
|
|
} else if (general_params->casc_port == ds->num_ports) {
|
|
|
|
general_params->casc_port = port;
|
|
|
|
} else {
|
|
|
|
dev_err(ds->dev,
|
|
|
|
"Port %llu is already a cascade port, configuring %d as one too is not supported\n",
|
|
|
|
general_params->casc_port, port);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (general_params->host_port == ds->num_ports) {
|
|
|
|
dev_err(ds->dev, "No host port configured\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
static int sja1105_init_general_params(struct sja1105_private *priv)
|
|
|
|
{
|
|
|
|
struct sja1105_general_params_entry default_general_params = {
|
2019-10-04 03:33:47 +03:00
|
|
|
/* Allow dynamic changing of the mirror port */
|
|
|
|
.mirr_ptacu = true,
|
2019-05-02 23:23:30 +03:00
|
|
|
.switchid = priv->ds->index,
|
2019-09-15 05:00:01 +03:00
|
|
|
/* Priority queue for link-local management frames
|
|
|
|
* (both ingress to and egress from CPU - PTP, STP etc)
|
|
|
|
*/
|
2019-06-08 15:04:41 +03:00
|
|
|
.hostprio = 7,
|
2019-05-02 23:23:30 +03:00
|
|
|
.mac_fltres1 = SJA1105_LINKLOCAL_FILTER_A,
|
|
|
|
.mac_flt1 = SJA1105_LINKLOCAL_FILTER_A_MASK,
|
2023-06-27 12:42:06 +03:00
|
|
|
.incl_srcpt1 = true,
|
2023-07-04 01:05:45 +03:00
|
|
|
.send_meta1 = true,
|
2019-05-02 23:23:30 +03:00
|
|
|
.mac_fltres0 = SJA1105_LINKLOCAL_FILTER_B,
|
|
|
|
.mac_flt0 = SJA1105_LINKLOCAL_FILTER_B_MASK,
|
2023-06-27 12:42:06 +03:00
|
|
|
.incl_srcpt0 = true,
|
2023-07-04 01:05:45 +03:00
|
|
|
.send_meta0 = true,
|
2019-10-04 03:33:47 +03:00
|
|
|
/* Default to an invalid value */
|
2021-05-24 16:14:13 +03:00
|
|
|
.mirr_port = priv->ds->num_ports,
|
2019-05-02 23:23:30 +03:00
|
|
|
/* No TTEthernet */
|
2020-05-05 22:20:55 +03:00
|
|
|
.vllupformat = SJA1105_VL_FORMAT_PSFP,
|
2019-05-02 23:23:30 +03:00
|
|
|
.vlmarker = 0,
|
|
|
|
.vlmask = 0,
|
|
|
|
/* Only update correctionField for 1-step PTP (L2 transport) */
|
|
|
|
.ignore2stf = 0,
|
2019-05-02 23:23:34 +03:00
|
|
|
/* Forcefully disable VLAN filtering by telling
|
|
|
|
* the switch that VLAN has a different EtherType.
|
|
|
|
*/
|
|
|
|
.tpid = ETH_P_SJA1105,
|
|
|
|
.tpid2 = ETH_P_SJA1105,
|
2021-06-11 22:01:22 +03:00
|
|
|
/* Enable the TTEthernet engine on SJA1110 */
|
|
|
|
.tte_en = true,
|
net: dsa: add support for the SJA1110 native tagging protocol
The SJA1110 has improved a few things compared to SJA1105:
- To send a control packet from the host port with SJA1105, one needed
to program a one-shot "management route" over SPI. This is no longer
true with SJA1110, you can actually send "in-band control extensions"
in the packets sent by DSA, these are in fact DSA tags which contain
the destination port and switch ID.
- When receiving a control packet from the switch with SJA1105, the
source port and switch ID were written in bytes 3 and 4 of the
destination MAC address of the frame (which was a very poor shot at a
DSA header). If the control packet also had an RX timestamp, that
timestamp was sent in an actual follow-up packet, so there were
reordering concerns on multi-core/multi-queue DSA masters, where the
metadata frame with the RX timestamp might get processed before the
actual packet to which that timestamp belonged (there is no way to
pair a packet to its timestamp other than the order in which they were
received). On SJA1110, this is no longer true, control packets have
the source port, switch ID and timestamp all in the DSA tags.
- Timestamps from the switch were partial: to get a 64-bit timestamp as
required by PTP stacks, one would need to take the partial 24-bit or
32-bit timestamp from the packet, then read the current PTP time very
quickly, and then patch in the high bits of the current PTP time into
the captured partial timestamp, to reconstruct what the full 64-bit
timestamp must have been. That is awful because packet processing is
done in NAPI context, but reading the current PTP time is done over
SPI and therefore needs sleepable context.
But it also aggravated a few things:
- Not only is there a DSA header in SJA1110, but there is a DSA trailer
in fact, too. So DSA needs to be extended to support taggers which
have both a header and a trailer. Very unconventional - my understanding
is that the trailer exists because the timestamps couldn't be prepared
in time for putting them in the header area.
- Like SJA1105, not all packets sent to the CPU have the DSA tag added
to them, only control packets do:
* the ones which match the destination MAC filters/traps in
MAC_FLTRES1 and MAC_FLTRES0
* the ones which match FDB entries which have TRAP or TAKETS bits set
So we could in theory hack something up to request the switch to take
timestamps for all packets that reach the CPU, and those would be
DSA-tagged and contain the source port / switch ID by virtue of the
fact that there needs to be a timestamp trailer provided. BUT:
- The SJA1110 does not parse its own DSA tags in a way that is useful
for routing in cross-chip topologies, a la Marvell. And the sja1105
driver already supports cross-chip bridging from the SJA1105 days.
It does that by automatically setting up the DSA links as VLAN trunks
which contain all the necessary tag_8021q RX VLANs that must be
communicated between the switches that span the same bridge. So when
using tag_8021q on sja1105, it is possible to have 2 switches with
ports sw0p0, sw0p1, sw1p0, sw1p1, and 2 VLAN-unaware bridges br0 and
br1, and br0 can take sw0p0 and sw1p0, and br1 can take sw0p1 and
sw1p1, and forwarding will happen according to the expected rules of
the Linux bridge.
We like that, and we don't want that to go away, so as a matter of
fact, the SJA1110 tagger still needs to support tag_8021q.
So the sja1110 tagger is a hybrid between tag_8021q for data packets,
and the native hardware support for control packets.
On RX, packets have a 13-byte trailer if they contain an RX timestamp.
That trailer is padded in such a way that its byte 8 (the start of the
"residence time" field - not parsed by Linux because we don't care) is
aligned on a 16 byte boundary. So the padding has a variable length
between 0 and 15 bytes. The DSA header contains the offset of the
beginning of the padding relative to the beginning of the frame (and the
end of the padding is obviously the end of the packet minus 13 bytes,
the length of the trailer). So we discard it.
Packets which don't have a trailer contain the source port and switch ID
information in the header (they are "trap-to-host" packets). Packets
which have a trailer contain the source port and switch ID in the trailer.
On TX, the destination port mask and switch ID is always in the trailer,
so we always need to say in the header that a trailer is present.
The header needs a custom EtherType and this was chosen as 0xdadc, after
0xdada which is for Marvell and 0xdadb which is for VLANs in
VLAN-unaware mode on SJA1105 (and SJA1110 in fact too).
Because we use tag_8021q in concert with the native tagging protocol,
control packets will have 2 DSA tags.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-11 22:01:29 +03:00
|
|
|
/* Set up the EtherType for control packets on SJA1110 */
|
|
|
|
.header_type = ETH_P_SJA1110,
|
2019-05-02 23:23:30 +03:00
|
|
|
};
|
net: dsa: sja1105: allow RX timestamps to be taken on all ports for SJA1110
On SJA1105, there is support for a cascade port which is presumably
connected to a downstream SJA1105 switch. The upstream one does not take
PTP timestamps for packets received on this port, presumably because the
downstream switch already did (and for PTP, it only makes sense for the
leaf nodes in a DSA switch tree to do that).
I haven't been able to validate that feature in a fully assembled setup,
so I am disabling the feature by setting the cascade port to an unused
port value (ds->num_ports).
In SJA1110, multiple cascade ports are supported, and CASC_PORT became
a bit mask from a port number. So when CASC_PORT is set to ds->num_ports
(which is 11 on SJA1110), it is actually set to 0b1011, so ports 3, 1
and 0 are configured as cascade ports and we cannot take RX timestamps
on them.
So we need to introduce a check for SJA1110 and set things differently
(to zero there), so that the cascading feature is properly disabled and
RX timestamps can be taken on all ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-11 22:01:23 +03:00
|
|
|
struct sja1105_general_params_entry *general_params;
|
2019-05-02 23:23:30 +03:00
|
|
|
struct sja1105_table *table;
|
2021-08-04 16:54:31 +03:00
|
|
|
int rc;
|
2021-05-24 16:14:16 +03:00
|
|
|
|
2021-08-04 16:54:31 +03:00
|
|
|
rc = sja1105_init_topology(priv, &default_general_params);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
|
|
|
|
|
|
|
|
if (table->entry_count) {
|
|
|
|
kfree(table->entries);
|
|
|
|
table->entry_count = 0;
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entries = kcalloc(table->ops->max_entry_count,
|
2019-05-02 23:23:30 +03:00
|
|
|
table->ops->unpacked_entry_size, GFP_KERNEL);
|
|
|
|
if (!table->entries)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entry_count = table->ops->max_entry_count;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
net: dsa: sja1105: allow RX timestamps to be taken on all ports for SJA1110
On SJA1105, there is support for a cascade port which is presumably
connected to a downstream SJA1105 switch. The upstream one does not take
PTP timestamps for packets received on this port, presumably because the
downstream switch already did (and for PTP, it only makes sense for the
leaf nodes in a DSA switch tree to do that).
I haven't been able to validate that feature in a fully assembled setup,
so I am disabling the feature by setting the cascade port to an unused
port value (ds->num_ports).
In SJA1110, multiple cascade ports are supported, and CASC_PORT became
a bit mask from a port number. So when CASC_PORT is set to ds->num_ports
(which is 11 on SJA1110), it is actually set to 0b1011, so ports 3, 1
and 0 are configured as cascade ports and we cannot take RX timestamps
on them.
So we need to introduce a check for SJA1110 and set things differently
(to zero there), so that the cascading feature is properly disabled and
RX timestamps can be taken on all ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-11 22:01:23 +03:00
|
|
|
general_params = table->entries;
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
/* This table only has a single entry */
|
net: dsa: sja1105: allow RX timestamps to be taken on all ports for SJA1110
On SJA1105, there is support for a cascade port which is presumably
connected to a downstream SJA1105 switch. The upstream one does not take
PTP timestamps for packets received on this port, presumably because the
downstream switch already did (and for PTP, it only makes sense for the
leaf nodes in a DSA switch tree to do that).
I haven't been able to validate that feature in a fully assembled setup,
so I am disabling the feature by setting the cascade port to an unused
port value (ds->num_ports).
In SJA1110, multiple cascade ports are supported, and CASC_PORT became
a bit mask from a port number. So when CASC_PORT is set to ds->num_ports
(which is 11 on SJA1110), it is actually set to 0b1011, so ports 3, 1
and 0 are configured as cascade ports and we cannot take RX timestamps
on them.
So we need to introduce a check for SJA1110 and set things differently
(to zero there), so that the cascading feature is properly disabled and
RX timestamps can be taken on all ports.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-11 22:01:23 +03:00
|
|
|
general_params[0] = default_general_params;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
2021-06-08 12:25:37 +03:00
|
|
|
sja1110_select_tdmaconfigidx(priv);
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-03-24 00:59:21 +02:00
|
|
|
static int sja1105_init_avb_params(struct sja1105_private *priv)
|
|
|
|
{
|
|
|
|
struct sja1105_avb_params_entry *avb;
|
|
|
|
struct sja1105_table *table;
|
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_AVB_PARAMS];
|
|
|
|
|
|
|
|
/* Discard previous AVB Parameters Table */
|
|
|
|
if (table->entry_count) {
|
|
|
|
kfree(table->entries);
|
|
|
|
table->entry_count = 0;
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entries = kcalloc(table->ops->max_entry_count,
|
2020-03-24 00:59:21 +02:00
|
|
|
table->ops->unpacked_entry_size, GFP_KERNEL);
|
|
|
|
if (!table->entries)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entry_count = table->ops->max_entry_count;
|
2020-03-24 00:59:21 +02:00
|
|
|
|
|
|
|
avb = table->entries;
|
|
|
|
|
|
|
|
/* Configure the MAC addresses for meta frames */
|
|
|
|
avb->destmeta = SJA1105_META_DMAC;
|
|
|
|
avb->srcmeta = SJA1105_META_SMAC;
|
net: dsa: sja1105: configure the PTP_CLK pin as EXT_TS or PER_OUT
The SJA1105 switch family has a PTP_CLK pin which emits a signal with
fixed 50% duty cycle, but variable frequency and programmable start time.
On the second generation (P/Q/R/S) switches, this pin supports even more
functionality. The use case described by the hardware documents talks
about synchronization via oneshot pulses: given 2 sja1105 switches,
arbitrarily designated as a master and a slave, the master emits a
single pulse on PTP_CLK, while the slave is configured to timestamp this
pulse received on its PTP_CLK pin (which must obviously be configured as
input). The difference between the timestamps then exactly becomes the
slave offset to the master.
The only trouble with the above is that the hardware is very much tied
into this use case only, and not very generic beyond that:
- When emitting a oneshot pulse, instead of being told when to emit it,
the switch just does it "now" and tells you later what time it was,
via the PTPSYNCTS register. [ Incidentally, this is the same register
that the slave uses to collect the ext_ts timestamp from, too. ]
- On the sync slave, there is no interrupt mechanism on reception of a
new extts, and no FIFO to buffer them, because in the foreseen use
case, software is in control of both the master and the slave pins,
so it "knows" when there's something to collect.
These 2 problems mean that:
- We don't support (at least yet) the quirky oneshot mode exposed by
the hardware, just normal periodic output.
- We abuse the hardware a little bit when we expose generic extts.
Because there's no interrupt mechanism, we need to poll at double the
frequency we expect to receive a pulse. Currently that means a
non-configurable "twice a second".
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-03-24 00:59:24 +02:00
|
|
|
/* On P/Q/R/S, configure the direction of the PTP_CLK pin as input by
|
|
|
|
* default. This is because there might be boards with a hardware
|
|
|
|
* layout where enabling the pin as output might cause an electrical
|
|
|
|
* clash. On E/T the pin is always an output, which the board designers
|
|
|
|
* probably already knew, so even if there are going to be electrical
|
|
|
|
* issues, there's nothing we can do.
|
|
|
|
*/
|
|
|
|
avb->cas_master = false;
|
2020-03-24 00:59:21 +02:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-03-29 14:52:01 +03:00
|
|
|
/* The L2 policing table is 2-stage. The table is looked up for each frame
|
|
|
|
* according to the ingress port, whether it was broadcast or not, and the
|
|
|
|
* classified traffic class (given by VLAN PCP). This portion of the lookup is
|
|
|
|
* fixed, and gives access to the SHARINDX, an indirection register pointing
|
|
|
|
* within the policing table itself, which is used to resolve the policer that
|
|
|
|
* will be used for this frame.
|
|
|
|
*
|
|
|
|
* Stage 1 Stage 2
|
|
|
|
* +------------+--------+ +---------------------------------+
|
|
|
|
* |Port 0 TC 0 |SHARINDX| | Policer 0: Rate, Burst, MTU |
|
|
|
|
* +------------+--------+ +---------------------------------+
|
|
|
|
* |Port 0 TC 1 |SHARINDX| | Policer 1: Rate, Burst, MTU |
|
|
|
|
* +------------+--------+ +---------------------------------+
|
|
|
|
* ... | Policer 2: Rate, Burst, MTU |
|
|
|
|
* +------------+--------+ +---------------------------------+
|
|
|
|
* |Port 0 TC 7 |SHARINDX| | Policer 3: Rate, Burst, MTU |
|
|
|
|
* +------------+--------+ +---------------------------------+
|
|
|
|
* |Port 1 TC 0 |SHARINDX| | Policer 4: Rate, Burst, MTU |
|
|
|
|
* +------------+--------+ +---------------------------------+
|
|
|
|
* ... | Policer 5: Rate, Burst, MTU |
|
|
|
|
* +------------+--------+ +---------------------------------+
|
|
|
|
* |Port 1 TC 7 |SHARINDX| | Policer 6: Rate, Burst, MTU |
|
|
|
|
* +------------+--------+ +---------------------------------+
|
|
|
|
* ... | Policer 7: Rate, Burst, MTU |
|
|
|
|
* +------------+--------+ +---------------------------------+
|
|
|
|
* |Port 4 TC 7 |SHARINDX| ...
|
|
|
|
* +------------+--------+
|
|
|
|
* |Port 0 BCAST|SHARINDX| ...
|
|
|
|
* +------------+--------+
|
|
|
|
* |Port 1 BCAST|SHARINDX| ...
|
|
|
|
* +------------+--------+
|
|
|
|
* ... ...
|
|
|
|
* +------------+--------+ +---------------------------------+
|
|
|
|
* |Port 4 BCAST|SHARINDX| | Policer 44: Rate, Burst, MTU |
|
|
|
|
* +------------+--------+ +---------------------------------+
|
|
|
|
*
|
|
|
|
* In this driver, we shall use policers 0-4 as statically alocated port
|
|
|
|
* (matchall) policers. So we need to make the SHARINDX for all lookups
|
|
|
|
* corresponding to this ingress port (8 VLAN PCP lookups and 1 broadcast
|
|
|
|
* lookup) equal.
|
|
|
|
* The remaining policers (40) shall be dynamically allocated for flower
|
|
|
|
* policers, where the key is either vlan_prio or dst_mac ff:ff:ff:ff:ff:ff.
|
|
|
|
*/
|
2019-05-02 23:23:30 +03:00
|
|
|
#define SJA1105_RATE_MBPS(speed) (((speed) * 64000) / 1000)
|
|
|
|
|
|
|
|
static int sja1105_init_l2_policing(struct sja1105_private *priv)
|
|
|
|
{
|
|
|
|
struct sja1105_l2_policing_entry *policing;
|
2021-05-24 16:14:13 +03:00
|
|
|
struct dsa_switch *ds = priv->ds;
|
2019-05-02 23:23:30 +03:00
|
|
|
struct sja1105_table *table;
|
2020-03-29 14:52:01 +03:00
|
|
|
int port, tc;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_L2_POLICING];
|
|
|
|
|
|
|
|
/* Discard previous L2 Policing Table */
|
|
|
|
if (table->entry_count) {
|
|
|
|
kfree(table->entries);
|
|
|
|
table->entry_count = 0;
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entries = kcalloc(table->ops->max_entry_count,
|
2019-05-02 23:23:30 +03:00
|
|
|
table->ops->unpacked_entry_size, GFP_KERNEL);
|
|
|
|
if (!table->entries)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
net: dsa: sja1105: dynamically choose the number of static config table entries
Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).
Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.
The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-24 16:14:18 +03:00
|
|
|
table->entry_count = table->ops->max_entry_count;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
policing = table->entries;
|
|
|
|
|
2020-03-29 14:52:01 +03:00
|
|
|
/* Setup shared indices for the matchall policers */
|
2021-05-24 16:14:13 +03:00
|
|
|
for (port = 0; port < ds->num_ports; port++) {
|
2021-05-24 16:14:20 +03:00
|
|
|
int mcast = (ds->num_ports * (SJA1105_NUM_TC + 1)) + port;
|
2021-05-24 16:14:13 +03:00
|
|
|
int bcast = (ds->num_ports * SJA1105_NUM_TC) + port;
|
2020-03-29 14:52:01 +03:00
|
|
|
|
|
|
|
for (tc = 0; tc < SJA1105_NUM_TC; tc++)
|
|
|
|
policing[port * SJA1105_NUM_TC + tc].sharindx = port;
|
|
|
|
|
|
|
|
policing[bcast].sharindx = port;
|
2021-05-24 16:14:20 +03:00
|
|
|
/* Only SJA1110 has multicast policers */
|
net: dsa: sja1105: avoid out of bounds access in sja1105_init_l2_policing()
The SJA1105 family has 45 L2 policing table entries
(SJA1105_MAX_L2_POLICING_COUNT) and SJA1110 has 110
(SJA1110_MAX_L2_POLICING_COUNT). Keeping the table structure but
accounting for the difference in port count (5 in SJA1105 vs 10 in
SJA1110) does not fully explain the difference. Rather, the SJA1110 also
has L2 ingress policers for multicast traffic. If a packet is classified
as multicast, it will be processed by the policer index 99 + SRCPORT.
The sja1105_init_l2_policing() function initializes all L2 policers such
that they don't interfere with normal packet reception by default. To have
a common code between SJA1105 and SJA1110, the index of the multicast
policer for the port is calculated because it's an index that is out of
bounds for SJA1105 but in bounds for SJA1110, and a bounds check is
performed.
The code fails to do the proper thing when determining what to do with the
multicast policer of port 0 on SJA1105 (ds->num_ports = 5). The "mcast"
index will be equal to 45, which is also equal to
table->ops->max_entry_count (SJA1105_MAX_L2_POLICING_COUNT). So it passes
through the check. But at the same time, SJA1105 doesn't have multicast
policers. So the code programs the SHARINDX field of an out-of-bounds
element in the L2 Policing table of the static config.
The comparison between index 45 and 45 entries should have determined the
code to not access this policer index on SJA1105, since its memory wasn't
even allocated.
With enough bad luck, the out-of-bounds write could even overwrite other
valid kernel data, but in this case, the issue was detected using KASAN.
Kernel log:
sja1105 spi5.0: Probed switch chip: SJA1105Q
==================================================================
BUG: KASAN: slab-out-of-bounds in sja1105_setup+0x1cbc/0x2340
Write of size 8 at addr ffffff880bd57708 by task kworker/u8:0/8
...
Workqueue: events_unbound deferred_probe_work_func
Call trace:
...
sja1105_setup+0x1cbc/0x2340
dsa_register_switch+0x1284/0x18d0
sja1105_probe+0x748/0x840
...
Allocated by task 8:
...
sja1105_setup+0x1bcc/0x2340
dsa_register_switch+0x1284/0x18d0
sja1105_probe+0x748/0x840
...
Fixes: 38fbe91f2287 ("net: dsa: sja1105: configure the multicast policers, if present")
CC: stable@vger.kernel.org # 5.15+
Signed-off-by: Radu Nicolae Pirea (OSS) <radu-nicolae.pirea@oss.nxp.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://lore.kernel.org/r/20221207132347.38698-1-radu-nicolae.pirea@oss.nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-12-07 15:23:47 +02:00
|
|
|
if (mcast < table->ops->max_entry_count)
|
2021-05-24 16:14:20 +03:00
|
|
|
policing[mcast].sharindx = port;
|
2020-03-29 14:52:01 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Setup the matchall policer parameters */
|
2021-05-24 16:14:13 +03:00
|
|
|
for (port = 0; port < ds->num_ports; port++) {
|
2020-03-27 21:55:45 +02:00
|
|
|
int mtu = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN;
|
|
|
|
|
2021-08-04 16:54:34 +03:00
|
|
|
if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
|
2020-03-27 21:55:45 +02:00
|
|
|
mtu += VLAN_HLEN;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
2020-03-29 14:52:01 +03:00
|
|
|
policing[port].smax = 65535; /* Burst size in bytes */
|
|
|
|
policing[port].rate = SJA1105_RATE_MBPS(1000);
|
|
|
|
policing[port].maxlen = mtu;
|
|
|
|
policing[port].partition = 0;
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
2020-03-29 14:52:01 +03:00
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-06-04 17:01:50 +03:00
|
|
|
static int sja1105_static_config_load(struct sja1105_private *priv)
|
2019-05-02 23:23:30 +03:00
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
sja1105_static_config_free(&priv->static_config);
|
|
|
|
rc = sja1105_static_config_init(&priv->static_config,
|
|
|
|
priv->info->static_ops,
|
|
|
|
priv->info->device_id);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
/* Build static configuration */
|
|
|
|
rc = sja1105_init_mac_settings(priv);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
2021-06-04 17:01:50 +03:00
|
|
|
rc = sja1105_init_mii_settings(priv);
|
2019-05-02 23:23:30 +03:00
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
rc = sja1105_init_static_fdb(priv);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
rc = sja1105_init_static_vlan(priv);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
rc = sja1105_init_l2_lookup_params(priv);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
rc = sja1105_init_l2_forwarding(priv);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
rc = sja1105_init_l2_forwarding_params(priv);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
rc = sja1105_init_l2_policing(priv);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
rc = sja1105_init_general_params(priv);
|
2020-03-24 00:59:21 +02:00
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
rc = sja1105_init_avb_params(priv);
|
net: dsa: sja1105: add support for the SJA1110 switch family
The SJA1110 is basically an SJA1105 with more ports, some integrated
PHYs (100base-T1 and 100base-TX) and an embedded microcontroller which
can be disabled, and the switch core can be controlled by a host running
Linux, over SPI.
This patch contains:
- the static and dynamic config packing functions, for the tables that
are common with SJA1105
- one more static config tables which is "unique" to the SJA1110
(actually it is a rehash of stuff that was placed somewhere else in
SJA1105): the PCP Remapping Table
- a reset and clock configuration procedure for the SJA1110 switch.
This resets just the switch subsystem, and gates off the clock which
powers on the embedded microcontroller.
- an RGMII delay configuration procedure for SJA1110, which is very
similar to SJA1105, but different enough for us to be unable to reuse
it (this is a pattern that repeats itself)
- some adaptations to dynamic config table entries which are no longer
programmed in the same way. For example, to delete a VLAN, you used to
write an entry through the dynamic reconfiguration interface with the
desired VLAN ID, and with the VALIDENT bit set to false. Now, the VLAN
table entries contain a TYPE_ENTRY field, which must be set to zero
(in a backwards-incompatible way) in order for the entry to be deleted,
or to some other entry for the VLAN to match "inner tagged" or "outer
tagged" packets.
- a similar thing for the static config: the xMII Mode Parameters Table
encoding for SGMII and MII (the latter just when attached to a
100base-TX PHY) just isn't what it used to be in SJA1105. They are
identical, except there is an extra "special" bit which needs to be
set. Set it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-08 12:25:36 +03:00
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
rc = sja1110_init_pcp_remapping(priv);
|
2019-05-02 23:23:30 +03:00
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
/* Send initial configuration to hardware via SPI */
|
|
|
|
return sja1105_static_config_upload(priv);
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: parse {rx, tx}-internal-delay-ps properties for RGMII delays
This change does not fix any functional issue or address any real life
use case that wasn't possible before. It is just a small step in the
process of standardizing the way in which Ethernet MAC drivers may apply
RGMII delays (traditionally these have been applied by PHYs, with no
clear definition of what to do in the case of a fixed-link).
The sja1105 driver used to apply MAC-level RGMII delays on the RX data
lines when in fixed-link mode and using a phy-mode of "rgmii-rxid" or
"rgmii-id" and on the TX data lines when using "rgmii-txid" or "rgmii-id".
But the standard definitions don't say anything about behaving
differently when the port is in fixed-link vs when it isn't, and the new
device tree bindings are about having a way of applying the delays in a
way that is independent of the phy-mode and of the fixed-link property.
When the {rx,tx}-internal-delay-ps properties are present, use them,
otherwise fall back to the old behavior and warn.
One other thing to note is that the SJA1105 hardware applies a delay
value in degrees rather than in picoseconds (the delay in ps changes
depending on the frequency of the RGMII clock - 125 MHz at 1G, 25 MHz at
100M, 2.5MHz at 10M). I assume that is fine, we calculate the phase
shift of the internal delay lines assuming that the device tree meant
gigabit, and we let the hardware scale those according to the link speed.
Link: https://patchwork.kernel.org/project/netdevbpf/patch/20210723173108.459770-6-prasanna.vengateshan@microchip.com/
Link: https://patchwork.ozlabs.org/project/netdev/patch/20200616074955.GA9092@laureti-dev/#2461123
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-18 22:29:52 +03:00
|
|
|
/* This is the "new way" for a MAC driver to configure its RGMII delay lines,
|
|
|
|
* based on the explicit "rx-internal-delay-ps" and "tx-internal-delay-ps"
|
|
|
|
* properties. It has the advantage of working with fixed links and with PHYs
|
|
|
|
* that apply RGMII delays too, and the MAC driver needs not perform any
|
|
|
|
* special checks.
|
|
|
|
*
|
|
|
|
* Previously we were acting upon the "phy-mode" property when we were
|
|
|
|
* operating in fixed-link, basically acting as a PHY, but with a reversed
|
|
|
|
* interpretation: PHY_INTERFACE_MODE_RGMII_TXID means that the MAC should
|
|
|
|
* behave as if it is connected to a PHY which has applied RGMII delays in the
|
|
|
|
* TX direction. So if anything, RX delays should have been added by the MAC,
|
|
|
|
* but we were adding TX delays.
|
|
|
|
*
|
|
|
|
* If the "{rx,tx}-internal-delay-ps" properties are not specified, we fall
|
|
|
|
* back to the legacy behavior and apply delays on fixed-link ports based on
|
|
|
|
* the reverse interpretation of the phy-mode. This is a deviation from the
|
|
|
|
* expected default behavior which is to simply apply no delays. To achieve
|
|
|
|
* that behavior with the new bindings, it is mandatory to specify
|
|
|
|
* "{rx,tx}-internal-delay-ps" with a value of 0.
|
|
|
|
*/
|
|
|
|
static int sja1105_parse_rgmii_delays(struct sja1105_private *priv, int port,
|
|
|
|
struct device_node *port_dn)
|
net: dsa: sja1105: Error out if RGMII delays are requested in DT
Documentation/devicetree/bindings/net/ethernet.txt is confusing because
it says what the MAC should not do, but not what it *should* do:
* "rgmii-rxid" (RGMII with internal RX delay provided by the PHY, the MAC
should not add an RX delay in this case)
The gap in semantics is threefold:
1. Is it illegal for the MAC to apply the Rx internal delay by itself,
and simplify the phy_mode (mask off "rgmii-rxid" into "rgmii") before
passing it to of_phy_connect? The documentation would suggest yes.
1. For "rgmii-rxid", while the situation with the Rx clock skew is more
or less clear (needs to be added by the PHY), what should the MAC
driver do about the Tx delays? Is it an implicit wild card for the
MAC to apply delays in the Tx direction if it can? What if those were
already added as serpentine PCB traces, how could that be made more
obvious through DT bindings so that the MAC doesn't attempt to add
them twice and again potentially break the link?
3. If the interface is a fixed-link and therefore the PHY object is
fixed (a purely software entity that obviously cannot add clock
skew), what is the meaning of the above property?
So an interpretation of the RGMII bindings was chosen that hopefully
does not contradict their intention but also makes them more applied.
The SJA1105 driver understands to act upon "rgmii-*id" phy-mode bindings
if the port is in the PHY role (either explicitly, or if it is a
fixed-link). Otherwise it always passes the duty of setting up delays to
the PHY driver.
The error behavior that this patch adds is required on SJA1105E/T where
the MAC really cannot apply internal delays. If the other end of the
fixed-link cannot apply RGMII delays either (this would be specified
through its own DT bindings), then the situation requires PCB delays.
For SJA1105P/Q/R/S, this is however hardware supported and the error is
thus only temporary. I created a stub function pointer for configuring
delays per-port on RXC and TXC, and will implement it when I have access
to a board with this hardware setup.
Meanwhile do not allow the user to select an invalid configuration.
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-02 23:23:32 +03:00
|
|
|
{
|
net: dsa: sja1105: parse {rx, tx}-internal-delay-ps properties for RGMII delays
This change does not fix any functional issue or address any real life
use case that wasn't possible before. It is just a small step in the
process of standardizing the way in which Ethernet MAC drivers may apply
RGMII delays (traditionally these have been applied by PHYs, with no
clear definition of what to do in the case of a fixed-link).
The sja1105 driver used to apply MAC-level RGMII delays on the RX data
lines when in fixed-link mode and using a phy-mode of "rgmii-rxid" or
"rgmii-id" and on the TX data lines when using "rgmii-txid" or "rgmii-id".
But the standard definitions don't say anything about behaving
differently when the port is in fixed-link vs when it isn't, and the new
device tree bindings are about having a way of applying the delays in a
way that is independent of the phy-mode and of the fixed-link property.
When the {rx,tx}-internal-delay-ps properties are present, use them,
otherwise fall back to the old behavior and warn.
One other thing to note is that the SJA1105 hardware applies a delay
value in degrees rather than in picoseconds (the delay in ps changes
depending on the frequency of the RGMII clock - 125 MHz at 1G, 25 MHz at
100M, 2.5MHz at 10M). I assume that is fine, we calculate the phase
shift of the internal delay lines assuming that the device tree meant
gigabit, and we let the hardware scale those according to the link speed.
Link: https://patchwork.kernel.org/project/netdevbpf/patch/20210723173108.459770-6-prasanna.vengateshan@microchip.com/
Link: https://patchwork.ozlabs.org/project/netdev/patch/20200616074955.GA9092@laureti-dev/#2461123
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-18 22:29:52 +03:00
|
|
|
phy_interface_t phy_mode = priv->phy_mode[port];
|
|
|
|
struct device *dev = &priv->spidev->dev;
|
|
|
|
int rx_delay = -1, tx_delay = -1;
|
net: dsa: sja1105: Error out if RGMII delays are requested in DT
Documentation/devicetree/bindings/net/ethernet.txt is confusing because
it says what the MAC should not do, but not what it *should* do:
* "rgmii-rxid" (RGMII with internal RX delay provided by the PHY, the MAC
should not add an RX delay in this case)
The gap in semantics is threefold:
1. Is it illegal for the MAC to apply the Rx internal delay by itself,
and simplify the phy_mode (mask off "rgmii-rxid" into "rgmii") before
passing it to of_phy_connect? The documentation would suggest yes.
1. For "rgmii-rxid", while the situation with the Rx clock skew is more
or less clear (needs to be added by the PHY), what should the MAC
driver do about the Tx delays? Is it an implicit wild card for the
MAC to apply delays in the Tx direction if it can? What if those were
already added as serpentine PCB traces, how could that be made more
obvious through DT bindings so that the MAC doesn't attempt to add
them twice and again potentially break the link?
3. If the interface is a fixed-link and therefore the PHY object is
fixed (a purely software entity that obviously cannot add clock
skew), what is the meaning of the above property?
So an interpretation of the RGMII bindings was chosen that hopefully
does not contradict their intention but also makes them more applied.
The SJA1105 driver understands to act upon "rgmii-*id" phy-mode bindings
if the port is in the PHY role (either explicitly, or if it is a
fixed-link). Otherwise it always passes the duty of setting up delays to
the PHY driver.
The error behavior that this patch adds is required on SJA1105E/T where
the MAC really cannot apply internal delays. If the other end of the
fixed-link cannot apply RGMII delays either (this would be specified
through its own DT bindings), then the situation requires PCB delays.
For SJA1105P/Q/R/S, this is however hardware supported and the error is
thus only temporary. I created a stub function pointer for configuring
delays per-port on RXC and TXC, and will implement it when I have access
to a board with this hardware setup.
Meanwhile do not allow the user to select an invalid configuration.
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-02 23:23:32 +03:00
|
|
|
|
net: dsa: sja1105: parse {rx, tx}-internal-delay-ps properties for RGMII delays
This change does not fix any functional issue or address any real life
use case that wasn't possible before. It is just a small step in the
process of standardizing the way in which Ethernet MAC drivers may apply
RGMII delays (traditionally these have been applied by PHYs, with no
clear definition of what to do in the case of a fixed-link).
The sja1105 driver used to apply MAC-level RGMII delays on the RX data
lines when in fixed-link mode and using a phy-mode of "rgmii-rxid" or
"rgmii-id" and on the TX data lines when using "rgmii-txid" or "rgmii-id".
But the standard definitions don't say anything about behaving
differently when the port is in fixed-link vs when it isn't, and the new
device tree bindings are about having a way of applying the delays in a
way that is independent of the phy-mode and of the fixed-link property.
When the {rx,tx}-internal-delay-ps properties are present, use them,
otherwise fall back to the old behavior and warn.
One other thing to note is that the SJA1105 hardware applies a delay
value in degrees rather than in picoseconds (the delay in ps changes
depending on the frequency of the RGMII clock - 125 MHz at 1G, 25 MHz at
100M, 2.5MHz at 10M). I assume that is fine, we calculate the phase
shift of the internal delay lines assuming that the device tree meant
gigabit, and we let the hardware scale those according to the link speed.
Link: https://patchwork.kernel.org/project/netdevbpf/patch/20210723173108.459770-6-prasanna.vengateshan@microchip.com/
Link: https://patchwork.ozlabs.org/project/netdev/patch/20200616074955.GA9092@laureti-dev/#2461123
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-18 22:29:52 +03:00
|
|
|
if (!phy_interface_mode_is_rgmii(phy_mode))
|
|
|
|
return 0;
|
net: dsa: sja1105: Error out if RGMII delays are requested in DT
Documentation/devicetree/bindings/net/ethernet.txt is confusing because
it says what the MAC should not do, but not what it *should* do:
* "rgmii-rxid" (RGMII with internal RX delay provided by the PHY, the MAC
should not add an RX delay in this case)
The gap in semantics is threefold:
1. Is it illegal for the MAC to apply the Rx internal delay by itself,
and simplify the phy_mode (mask off "rgmii-rxid" into "rgmii") before
passing it to of_phy_connect? The documentation would suggest yes.
1. For "rgmii-rxid", while the situation with the Rx clock skew is more
or less clear (needs to be added by the PHY), what should the MAC
driver do about the Tx delays? Is it an implicit wild card for the
MAC to apply delays in the Tx direction if it can? What if those were
already added as serpentine PCB traces, how could that be made more
obvious through DT bindings so that the MAC doesn't attempt to add
them twice and again potentially break the link?
3. If the interface is a fixed-link and therefore the PHY object is
fixed (a purely software entity that obviously cannot add clock
skew), what is the meaning of the above property?
So an interpretation of the RGMII bindings was chosen that hopefully
does not contradict their intention but also makes them more applied.
The SJA1105 driver understands to act upon "rgmii-*id" phy-mode bindings
if the port is in the PHY role (either explicitly, or if it is a
fixed-link). Otherwise it always passes the duty of setting up delays to
the PHY driver.
The error behavior that this patch adds is required on SJA1105E/T where
the MAC really cannot apply internal delays. If the other end of the
fixed-link cannot apply RGMII delays either (this would be specified
through its own DT bindings), then the situation requires PCB delays.
For SJA1105P/Q/R/S, this is however hardware supported and the error is
thus only temporary. I created a stub function pointer for configuring
delays per-port on RXC and TXC, and will implement it when I have access
to a board with this hardware setup.
Meanwhile do not allow the user to select an invalid configuration.
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-02 23:23:32 +03:00
|
|
|
|
net: dsa: sja1105: parse {rx, tx}-internal-delay-ps properties for RGMII delays
This change does not fix any functional issue or address any real life
use case that wasn't possible before. It is just a small step in the
process of standardizing the way in which Ethernet MAC drivers may apply
RGMII delays (traditionally these have been applied by PHYs, with no
clear definition of what to do in the case of a fixed-link).
The sja1105 driver used to apply MAC-level RGMII delays on the RX data
lines when in fixed-link mode and using a phy-mode of "rgmii-rxid" or
"rgmii-id" and on the TX data lines when using "rgmii-txid" or "rgmii-id".
But the standard definitions don't say anything about behaving
differently when the port is in fixed-link vs when it isn't, and the new
device tree bindings are about having a way of applying the delays in a
way that is independent of the phy-mode and of the fixed-link property.
When the {rx,tx}-internal-delay-ps properties are present, use them,
otherwise fall back to the old behavior and warn.
One other thing to note is that the SJA1105 hardware applies a delay
value in degrees rather than in picoseconds (the delay in ps changes
depending on the frequency of the RGMII clock - 125 MHz at 1G, 25 MHz at
100M, 2.5MHz at 10M). I assume that is fine, we calculate the phase
shift of the internal delay lines assuming that the device tree meant
gigabit, and we let the hardware scale those according to the link speed.
Link: https://patchwork.kernel.org/project/netdevbpf/patch/20210723173108.459770-6-prasanna.vengateshan@microchip.com/
Link: https://patchwork.ozlabs.org/project/netdev/patch/20200616074955.GA9092@laureti-dev/#2461123
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-18 22:29:52 +03:00
|
|
|
of_property_read_u32(port_dn, "rx-internal-delay-ps", &rx_delay);
|
|
|
|
of_property_read_u32(port_dn, "tx-internal-delay-ps", &tx_delay);
|
net: dsa: sja1105: Error out if RGMII delays are requested in DT
Documentation/devicetree/bindings/net/ethernet.txt is confusing because
it says what the MAC should not do, but not what it *should* do:
* "rgmii-rxid" (RGMII with internal RX delay provided by the PHY, the MAC
should not add an RX delay in this case)
The gap in semantics is threefold:
1. Is it illegal for the MAC to apply the Rx internal delay by itself,
and simplify the phy_mode (mask off "rgmii-rxid" into "rgmii") before
passing it to of_phy_connect? The documentation would suggest yes.
1. For "rgmii-rxid", while the situation with the Rx clock skew is more
or less clear (needs to be added by the PHY), what should the MAC
driver do about the Tx delays? Is it an implicit wild card for the
MAC to apply delays in the Tx direction if it can? What if those were
already added as serpentine PCB traces, how could that be made more
obvious through DT bindings so that the MAC doesn't attempt to add
them twice and again potentially break the link?
3. If the interface is a fixed-link and therefore the PHY object is
fixed (a purely software entity that obviously cannot add clock
skew), what is the meaning of the above property?
So an interpretation of the RGMII bindings was chosen that hopefully
does not contradict their intention but also makes them more applied.
The SJA1105 driver understands to act upon "rgmii-*id" phy-mode bindings
if the port is in the PHY role (either explicitly, or if it is a
fixed-link). Otherwise it always passes the duty of setting up delays to
the PHY driver.
The error behavior that this patch adds is required on SJA1105E/T where
the MAC really cannot apply internal delays. If the other end of the
fixed-link cannot apply RGMII delays either (this would be specified
through its own DT bindings), then the situation requires PCB delays.
For SJA1105P/Q/R/S, this is however hardware supported and the error is
thus only temporary. I created a stub function pointer for configuring
delays per-port on RXC and TXC, and will implement it when I have access
to a board with this hardware setup.
Meanwhile do not allow the user to select an invalid configuration.
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-02 23:23:32 +03:00
|
|
|
|
net: dsa: sja1105: parse {rx, tx}-internal-delay-ps properties for RGMII delays
This change does not fix any functional issue or address any real life
use case that wasn't possible before. It is just a small step in the
process of standardizing the way in which Ethernet MAC drivers may apply
RGMII delays (traditionally these have been applied by PHYs, with no
clear definition of what to do in the case of a fixed-link).
The sja1105 driver used to apply MAC-level RGMII delays on the RX data
lines when in fixed-link mode and using a phy-mode of "rgmii-rxid" or
"rgmii-id" and on the TX data lines when using "rgmii-txid" or "rgmii-id".
But the standard definitions don't say anything about behaving
differently when the port is in fixed-link vs when it isn't, and the new
device tree bindings are about having a way of applying the delays in a
way that is independent of the phy-mode and of the fixed-link property.
When the {rx,tx}-internal-delay-ps properties are present, use them,
otherwise fall back to the old behavior and warn.
One other thing to note is that the SJA1105 hardware applies a delay
value in degrees rather than in picoseconds (the delay in ps changes
depending on the frequency of the RGMII clock - 125 MHz at 1G, 25 MHz at
100M, 2.5MHz at 10M). I assume that is fine, we calculate the phase
shift of the internal delay lines assuming that the device tree meant
gigabit, and we let the hardware scale those according to the link speed.
Link: https://patchwork.kernel.org/project/netdevbpf/patch/20210723173108.459770-6-prasanna.vengateshan@microchip.com/
Link: https://patchwork.ozlabs.org/project/netdev/patch/20200616074955.GA9092@laureti-dev/#2461123
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-18 22:29:52 +03:00
|
|
|
if (rx_delay == -1 && tx_delay == -1 && priv->fixed_link[port]) {
|
|
|
|
dev_warn(dev,
|
|
|
|
"Port %d interpreting RGMII delay settings based on \"phy-mode\" property, "
|
|
|
|
"please update device tree to specify \"rx-internal-delay-ps\" and "
|
|
|
|
"\"tx-internal-delay-ps\"",
|
|
|
|
port);
|
net: dsa: sja1105: Error out if RGMII delays are requested in DT
Documentation/devicetree/bindings/net/ethernet.txt is confusing because
it says what the MAC should not do, but not what it *should* do:
* "rgmii-rxid" (RGMII with internal RX delay provided by the PHY, the MAC
should not add an RX delay in this case)
The gap in semantics is threefold:
1. Is it illegal for the MAC to apply the Rx internal delay by itself,
and simplify the phy_mode (mask off "rgmii-rxid" into "rgmii") before
passing it to of_phy_connect? The documentation would suggest yes.
1. For "rgmii-rxid", while the situation with the Rx clock skew is more
or less clear (needs to be added by the PHY), what should the MAC
driver do about the Tx delays? Is it an implicit wild card for the
MAC to apply delays in the Tx direction if it can? What if those were
already added as serpentine PCB traces, how could that be made more
obvious through DT bindings so that the MAC doesn't attempt to add
them twice and again potentially break the link?
3. If the interface is a fixed-link and therefore the PHY object is
fixed (a purely software entity that obviously cannot add clock
skew), what is the meaning of the above property?
So an interpretation of the RGMII bindings was chosen that hopefully
does not contradict their intention but also makes them more applied.
The SJA1105 driver understands to act upon "rgmii-*id" phy-mode bindings
if the port is in the PHY role (either explicitly, or if it is a
fixed-link). Otherwise it always passes the duty of setting up delays to
the PHY driver.
The error behavior that this patch adds is required on SJA1105E/T where
the MAC really cannot apply internal delays. If the other end of the
fixed-link cannot apply RGMII delays either (this would be specified
through its own DT bindings), then the situation requires PCB delays.
For SJA1105P/Q/R/S, this is however hardware supported and the error is
thus only temporary. I created a stub function pointer for configuring
delays per-port on RXC and TXC, and will implement it when I have access
to a board with this hardware setup.
Meanwhile do not allow the user to select an invalid configuration.
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-02 23:23:32 +03:00
|
|
|
|
net: dsa: sja1105: parse {rx, tx}-internal-delay-ps properties for RGMII delays
This change does not fix any functional issue or address any real life
use case that wasn't possible before. It is just a small step in the
process of standardizing the way in which Ethernet MAC drivers may apply
RGMII delays (traditionally these have been applied by PHYs, with no
clear definition of what to do in the case of a fixed-link).
The sja1105 driver used to apply MAC-level RGMII delays on the RX data
lines when in fixed-link mode and using a phy-mode of "rgmii-rxid" or
"rgmii-id" and on the TX data lines when using "rgmii-txid" or "rgmii-id".
But the standard definitions don't say anything about behaving
differently when the port is in fixed-link vs when it isn't, and the new
device tree bindings are about having a way of applying the delays in a
way that is independent of the phy-mode and of the fixed-link property.
When the {rx,tx}-internal-delay-ps properties are present, use them,
otherwise fall back to the old behavior and warn.
One other thing to note is that the SJA1105 hardware applies a delay
value in degrees rather than in picoseconds (the delay in ps changes
depending on the frequency of the RGMII clock - 125 MHz at 1G, 25 MHz at
100M, 2.5MHz at 10M). I assume that is fine, we calculate the phase
shift of the internal delay lines assuming that the device tree meant
gigabit, and we let the hardware scale those according to the link speed.
Link: https://patchwork.kernel.org/project/netdevbpf/patch/20210723173108.459770-6-prasanna.vengateshan@microchip.com/
Link: https://patchwork.ozlabs.org/project/netdev/patch/20200616074955.GA9092@laureti-dev/#2461123
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-18 22:29:52 +03:00
|
|
|
if (phy_mode == PHY_INTERFACE_MODE_RGMII_RXID ||
|
|
|
|
phy_mode == PHY_INTERFACE_MODE_RGMII_ID)
|
|
|
|
rx_delay = 2000;
|
|
|
|
|
|
|
|
if (phy_mode == PHY_INTERFACE_MODE_RGMII_TXID ||
|
|
|
|
phy_mode == PHY_INTERFACE_MODE_RGMII_ID)
|
|
|
|
tx_delay = 2000;
|
net: dsa: sja1105: Error out if RGMII delays are requested in DT
Documentation/devicetree/bindings/net/ethernet.txt is confusing because
it says what the MAC should not do, but not what it *should* do:
* "rgmii-rxid" (RGMII with internal RX delay provided by the PHY, the MAC
should not add an RX delay in this case)
The gap in semantics is threefold:
1. Is it illegal for the MAC to apply the Rx internal delay by itself,
and simplify the phy_mode (mask off "rgmii-rxid" into "rgmii") before
passing it to of_phy_connect? The documentation would suggest yes.
1. For "rgmii-rxid", while the situation with the Rx clock skew is more
or less clear (needs to be added by the PHY), what should the MAC
driver do about the Tx delays? Is it an implicit wild card for the
MAC to apply delays in the Tx direction if it can? What if those were
already added as serpentine PCB traces, how could that be made more
obvious through DT bindings so that the MAC doesn't attempt to add
them twice and again potentially break the link?
3. If the interface is a fixed-link and therefore the PHY object is
fixed (a purely software entity that obviously cannot add clock
skew), what is the meaning of the above property?
So an interpretation of the RGMII bindings was chosen that hopefully
does not contradict their intention but also makes them more applied.
The SJA1105 driver understands to act upon "rgmii-*id" phy-mode bindings
if the port is in the PHY role (either explicitly, or if it is a
fixed-link). Otherwise it always passes the duty of setting up delays to
the PHY driver.
The error behavior that this patch adds is required on SJA1105E/T where
the MAC really cannot apply internal delays. If the other end of the
fixed-link cannot apply RGMII delays either (this would be specified
through its own DT bindings), then the situation requires PCB delays.
For SJA1105P/Q/R/S, this is however hardware supported and the error is
thus only temporary. I created a stub function pointer for configuring
delays per-port on RXC and TXC, and will implement it when I have access
to a board with this hardware setup.
Meanwhile do not allow the user to select an invalid configuration.
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-02 23:23:32 +03:00
|
|
|
}
|
net: dsa: sja1105: parse {rx, tx}-internal-delay-ps properties for RGMII delays
This change does not fix any functional issue or address any real life
use case that wasn't possible before. It is just a small step in the
process of standardizing the way in which Ethernet MAC drivers may apply
RGMII delays (traditionally these have been applied by PHYs, with no
clear definition of what to do in the case of a fixed-link).
The sja1105 driver used to apply MAC-level RGMII delays on the RX data
lines when in fixed-link mode and using a phy-mode of "rgmii-rxid" or
"rgmii-id" and on the TX data lines when using "rgmii-txid" or "rgmii-id".
But the standard definitions don't say anything about behaving
differently when the port is in fixed-link vs when it isn't, and the new
device tree bindings are about having a way of applying the delays in a
way that is independent of the phy-mode and of the fixed-link property.
When the {rx,tx}-internal-delay-ps properties are present, use them,
otherwise fall back to the old behavior and warn.
One other thing to note is that the SJA1105 hardware applies a delay
value in degrees rather than in picoseconds (the delay in ps changes
depending on the frequency of the RGMII clock - 125 MHz at 1G, 25 MHz at
100M, 2.5MHz at 10M). I assume that is fine, we calculate the phase
shift of the internal delay lines assuming that the device tree meant
gigabit, and we let the hardware scale those according to the link speed.
Link: https://patchwork.kernel.org/project/netdevbpf/patch/20210723173108.459770-6-prasanna.vengateshan@microchip.com/
Link: https://patchwork.ozlabs.org/project/netdev/patch/20200616074955.GA9092@laureti-dev/#2461123
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-18 22:29:52 +03:00
|
|
|
|
|
|
|
if (rx_delay < 0)
|
|
|
|
rx_delay = 0;
|
|
|
|
if (tx_delay < 0)
|
|
|
|
tx_delay = 0;
|
|
|
|
|
|
|
|
if ((rx_delay || tx_delay) && !priv->info->setup_rgmii_delay) {
|
|
|
|
dev_err(dev, "Chip cannot apply RGMII delays\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((rx_delay && rx_delay < SJA1105_RGMII_DELAY_MIN_PS) ||
|
|
|
|
(tx_delay && tx_delay < SJA1105_RGMII_DELAY_MIN_PS) ||
|
|
|
|
(rx_delay > SJA1105_RGMII_DELAY_MAX_PS) ||
|
|
|
|
(tx_delay > SJA1105_RGMII_DELAY_MAX_PS)) {
|
|
|
|
dev_err(dev,
|
|
|
|
"port %d RGMII delay values out of range, must be between %d and %d ps\n",
|
|
|
|
port, SJA1105_RGMII_DELAY_MIN_PS, SJA1105_RGMII_DELAY_MAX_PS);
|
|
|
|
return -ERANGE;
|
|
|
|
}
|
|
|
|
|
|
|
|
priv->rgmii_rx_delay_ps[port] = rx_delay;
|
|
|
|
priv->rgmii_tx_delay_ps[port] = tx_delay;
|
|
|
|
|
net: dsa: sja1105: Error out if RGMII delays are requested in DT
Documentation/devicetree/bindings/net/ethernet.txt is confusing because
it says what the MAC should not do, but not what it *should* do:
* "rgmii-rxid" (RGMII with internal RX delay provided by the PHY, the MAC
should not add an RX delay in this case)
The gap in semantics is threefold:
1. Is it illegal for the MAC to apply the Rx internal delay by itself,
and simplify the phy_mode (mask off "rgmii-rxid" into "rgmii") before
passing it to of_phy_connect? The documentation would suggest yes.
1. For "rgmii-rxid", while the situation with the Rx clock skew is more
or less clear (needs to be added by the PHY), what should the MAC
driver do about the Tx delays? Is it an implicit wild card for the
MAC to apply delays in the Tx direction if it can? What if those were
already added as serpentine PCB traces, how could that be made more
obvious through DT bindings so that the MAC doesn't attempt to add
them twice and again potentially break the link?
3. If the interface is a fixed-link and therefore the PHY object is
fixed (a purely software entity that obviously cannot add clock
skew), what is the meaning of the above property?
So an interpretation of the RGMII bindings was chosen that hopefully
does not contradict their intention but also makes them more applied.
The SJA1105 driver understands to act upon "rgmii-*id" phy-mode bindings
if the port is in the PHY role (either explicitly, or if it is a
fixed-link). Otherwise it always passes the duty of setting up delays to
the PHY driver.
The error behavior that this patch adds is required on SJA1105E/T where
the MAC really cannot apply internal delays. If the other end of the
fixed-link cannot apply RGMII delays either (this would be specified
through its own DT bindings), then the situation requires PCB delays.
For SJA1105P/Q/R/S, this is however hardware supported and the error is
thus only temporary. I created a stub function pointer for configuring
delays per-port on RXC and TXC, and will implement it when I have access
to a board with this hardware setup.
Meanwhile do not allow the user to select an invalid configuration.
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-02 23:23:32 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
static int sja1105_parse_ports_node(struct sja1105_private *priv,
|
|
|
|
struct device_node *ports_node)
|
|
|
|
{
|
|
|
|
struct device *dev = &priv->spidev->dev;
|
|
|
|
struct device_node *child;
|
|
|
|
|
2020-01-16 20:43:27 +02:00
|
|
|
for_each_available_child_of_node(ports_node, child) {
|
2019-05-02 23:23:30 +03:00
|
|
|
struct device_node *phy_node;
|
net: of_get_phy_mode: Change API to solve int/unit warnings
Before this change of_get_phy_mode() returned an enum,
phy_interface_t. On error, -ENODEV etc, is returned. If the result of
the function is stored in a variable of type phy_interface_t, and the
compiler has decided to represent this as an unsigned int, comparision
with -ENODEV etc, is a signed vs unsigned comparision.
Fix this problem by changing the API. Make the function return an
error, or 0 on success, and pass a pointer, of type phy_interface_t,
where the phy mode should be stored.
v2:
Return with *interface set to PHY_INTERFACE_MODE_NA on error.
Add error checks to all users of of_get_phy_mode()
Fixup a few reverse christmas tree errors
Fixup a few slightly malformed reverse christmas trees
v3:
Fix 0-day reported errors.
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-04 02:40:33 +01:00
|
|
|
phy_interface_t phy_mode;
|
2019-05-02 23:23:30 +03:00
|
|
|
u32 index;
|
net: of_get_phy_mode: Change API to solve int/unit warnings
Before this change of_get_phy_mode() returned an enum,
phy_interface_t. On error, -ENODEV etc, is returned. If the result of
the function is stored in a variable of type phy_interface_t, and the
compiler has decided to represent this as an unsigned int, comparision
with -ENODEV etc, is a signed vs unsigned comparision.
Fix this problem by changing the API. Make the function return an
error, or 0 on success, and pass a pointer, of type phy_interface_t,
where the phy mode should be stored.
v2:
Return with *interface set to PHY_INTERFACE_MODE_NA on error.
Add error checks to all users of of_get_phy_mode()
Fixup a few reverse christmas tree errors
Fixup a few slightly malformed reverse christmas trees
v3:
Fix 0-day reported errors.
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-04 02:40:33 +01:00
|
|
|
int err;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
/* Get switch port number from DT */
|
|
|
|
if (of_property_read_u32(child, "reg", &index) < 0) {
|
|
|
|
dev_err(dev, "Port number not defined in device tree "
|
|
|
|
"(property \"reg\")\n");
|
2019-07-23 16:14:48 +05:30
|
|
|
of_node_put(child);
|
2019-05-02 23:23:30 +03:00
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get PHY mode from DT */
|
net: of_get_phy_mode: Change API to solve int/unit warnings
Before this change of_get_phy_mode() returned an enum,
phy_interface_t. On error, -ENODEV etc, is returned. If the result of
the function is stored in a variable of type phy_interface_t, and the
compiler has decided to represent this as an unsigned int, comparision
with -ENODEV etc, is a signed vs unsigned comparision.
Fix this problem by changing the API. Make the function return an
error, or 0 on success, and pass a pointer, of type phy_interface_t,
where the phy mode should be stored.
v2:
Return with *interface set to PHY_INTERFACE_MODE_NA on error.
Add error checks to all users of of_get_phy_mode()
Fixup a few reverse christmas tree errors
Fixup a few slightly malformed reverse christmas trees
v3:
Fix 0-day reported errors.
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-04 02:40:33 +01:00
|
|
|
err = of_get_phy_mode(child, &phy_mode);
|
|
|
|
if (err) {
|
2019-05-02 23:23:30 +03:00
|
|
|
dev_err(dev, "Failed to read phy-mode or "
|
|
|
|
"phy-interface-type property for port %d\n",
|
|
|
|
index);
|
2019-07-23 16:14:48 +05:30
|
|
|
of_node_put(child);
|
2019-05-02 23:23:30 +03:00
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
|
|
|
phy_node = of_parse_phandle(child, "phy-handle", 0);
|
|
|
|
if (!phy_node) {
|
|
|
|
if (!of_phy_is_fixed_link(child)) {
|
|
|
|
dev_err(dev, "phy-handle or fixed-link "
|
|
|
|
"properties missing!\n");
|
2019-07-23 16:14:48 +05:30
|
|
|
of_node_put(child);
|
2019-05-02 23:23:30 +03:00
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
/* phy-handle is missing, but fixed-link isn't.
|
|
|
|
* So it's a fixed link. Default to PHY role.
|
|
|
|
*/
|
2021-06-04 17:01:49 +03:00
|
|
|
priv->fixed_link[index] = true;
|
2019-05-02 23:23:30 +03:00
|
|
|
} else {
|
|
|
|
of_node_put(phy_node);
|
|
|
|
}
|
|
|
|
|
2021-05-31 01:59:35 +03:00
|
|
|
priv->phy_mode[index] = phy_mode;
|
net: dsa: sja1105: parse {rx, tx}-internal-delay-ps properties for RGMII delays
This change does not fix any functional issue or address any real life
use case that wasn't possible before. It is just a small step in the
process of standardizing the way in which Ethernet MAC drivers may apply
RGMII delays (traditionally these have been applied by PHYs, with no
clear definition of what to do in the case of a fixed-link).
The sja1105 driver used to apply MAC-level RGMII delays on the RX data
lines when in fixed-link mode and using a phy-mode of "rgmii-rxid" or
"rgmii-id" and on the TX data lines when using "rgmii-txid" or "rgmii-id".
But the standard definitions don't say anything about behaving
differently when the port is in fixed-link vs when it isn't, and the new
device tree bindings are about having a way of applying the delays in a
way that is independent of the phy-mode and of the fixed-link property.
When the {rx,tx}-internal-delay-ps properties are present, use them,
otherwise fall back to the old behavior and warn.
One other thing to note is that the SJA1105 hardware applies a delay
value in degrees rather than in picoseconds (the delay in ps changes
depending on the frequency of the RGMII clock - 125 MHz at 1G, 25 MHz at
100M, 2.5MHz at 10M). I assume that is fine, we calculate the phase
shift of the internal delay lines assuming that the device tree meant
gigabit, and we let the hardware scale those according to the link speed.
Link: https://patchwork.kernel.org/project/netdevbpf/patch/20210723173108.459770-6-prasanna.vengateshan@microchip.com/
Link: https://patchwork.ozlabs.org/project/netdev/patch/20200616074955.GA9092@laureti-dev/#2461123
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-18 22:29:52 +03:00
|
|
|
|
|
|
|
err = sja1105_parse_rgmii_delays(priv, index, child);
|
2021-10-21 05:46:06 -04:00
|
|
|
if (err) {
|
|
|
|
of_node_put(child);
|
net: dsa: sja1105: parse {rx, tx}-internal-delay-ps properties for RGMII delays
This change does not fix any functional issue or address any real life
use case that wasn't possible before. It is just a small step in the
process of standardizing the way in which Ethernet MAC drivers may apply
RGMII delays (traditionally these have been applied by PHYs, with no
clear definition of what to do in the case of a fixed-link).
The sja1105 driver used to apply MAC-level RGMII delays on the RX data
lines when in fixed-link mode and using a phy-mode of "rgmii-rxid" or
"rgmii-id" and on the TX data lines when using "rgmii-txid" or "rgmii-id".
But the standard definitions don't say anything about behaving
differently when the port is in fixed-link vs when it isn't, and the new
device tree bindings are about having a way of applying the delays in a
way that is independent of the phy-mode and of the fixed-link property.
When the {rx,tx}-internal-delay-ps properties are present, use them,
otherwise fall back to the old behavior and warn.
One other thing to note is that the SJA1105 hardware applies a delay
value in degrees rather than in picoseconds (the delay in ps changes
depending on the frequency of the RGMII clock - 125 MHz at 1G, 25 MHz at
100M, 2.5MHz at 10M). I assume that is fine, we calculate the phase
shift of the internal delay lines assuming that the device tree meant
gigabit, and we let the hardware scale those according to the link speed.
Link: https://patchwork.kernel.org/project/netdevbpf/patch/20210723173108.459770-6-prasanna.vengateshan@microchip.com/
Link: https://patchwork.ozlabs.org/project/netdev/patch/20200616074955.GA9092@laureti-dev/#2461123
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-18 22:29:52 +03:00
|
|
|
return err;
|
2021-10-21 05:46:06 -04:00
|
|
|
}
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-06-04 17:01:50 +03:00
|
|
|
static int sja1105_parse_dt(struct sja1105_private *priv)
|
2019-05-02 23:23:30 +03:00
|
|
|
{
|
|
|
|
struct device *dev = &priv->spidev->dev;
|
|
|
|
struct device_node *switch_node = dev->of_node;
|
|
|
|
struct device_node *ports_node;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
ports_node = of_get_child_by_name(switch_node, "ports");
|
2021-05-31 01:59:32 +03:00
|
|
|
if (!ports_node)
|
|
|
|
ports_node = of_get_child_by_name(switch_node, "ethernet-ports");
|
2019-05-02 23:23:30 +03:00
|
|
|
if (!ports_node) {
|
|
|
|
dev_err(dev, "Incorrect bindings: absent \"ports\" node\n");
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
2021-06-04 17:01:50 +03:00
|
|
|
rc = sja1105_parse_ports_node(priv, ports_node);
|
2019-05-02 23:23:30 +03:00
|
|
|
of_node_put(ports_node);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2019-06-08 16:03:41 +03:00
|
|
|
/* Convert link speed from SJA1105 to ethtool encoding */
|
2021-05-31 01:59:37 +03:00
|
|
|
static int sja1105_port_speed_to_ethtool(struct sja1105_private *priv,
|
|
|
|
u64 speed)
|
|
|
|
{
|
|
|
|
if (speed == priv->info->port_speed[SJA1105_SPEED_10MBPS])
|
|
|
|
return SPEED_10;
|
|
|
|
if (speed == priv->info->port_speed[SJA1105_SPEED_100MBPS])
|
|
|
|
return SPEED_100;
|
|
|
|
if (speed == priv->info->port_speed[SJA1105_SPEED_1000MBPS])
|
|
|
|
return SPEED_1000;
|
|
|
|
if (speed == priv->info->port_speed[SJA1105_SPEED_2500MBPS])
|
|
|
|
return SPEED_2500;
|
|
|
|
return SPEED_UNKNOWN;
|
|
|
|
}
|
2019-05-02 23:23:30 +03:00
|
|
|
|
2019-06-08 16:03:44 +03:00
|
|
|
/* Set link speed in the MAC configuration for a specific port. */
|
2019-05-02 23:23:30 +03:00
|
|
|
static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
|
2019-06-08 16:03:44 +03:00
|
|
|
int speed_mbps)
|
2019-05-02 23:23:30 +03:00
|
|
|
{
|
|
|
|
struct sja1105_mac_config_entry *mac;
|
|
|
|
struct device *dev = priv->ds->dev;
|
2021-05-31 01:59:37 +03:00
|
|
|
u64 speed;
|
2019-05-02 23:23:30 +03:00
|
|
|
int rc;
|
|
|
|
|
2019-06-08 16:03:44 +03:00
|
|
|
/* On P/Q/R/S, one can read from the device via the MAC reconfiguration
|
|
|
|
* tables. On E/T, MAC reconfig tables are not readable, only writable.
|
|
|
|
* We have to *know* what the MAC looks like. For the sake of keeping
|
|
|
|
* the code common, we'll use the static configuration tables as a
|
|
|
|
* reasonable approximation for both E/T and P/Q/R/S.
|
|
|
|
*/
|
2019-05-02 23:23:30 +03:00
|
|
|
mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
|
|
|
|
|
2019-06-03 02:31:37 +03:00
|
|
|
switch (speed_mbps) {
|
2019-06-08 16:03:41 +03:00
|
|
|
case SPEED_UNKNOWN:
|
2019-06-28 00:46:35 +03:00
|
|
|
/* PHYLINK called sja1105_mac_config() to inform us about
|
|
|
|
* the state->interface, but AN has not completed and the
|
|
|
|
* speed is not yet valid. UM10944.pdf says that setting
|
|
|
|
* SJA1105_SPEED_AUTO at runtime disables the port, so that is
|
|
|
|
* ok for power consumption in case AN will never complete -
|
|
|
|
* otherwise PHYLINK should come back with a new update.
|
|
|
|
*/
|
2021-05-31 01:59:37 +03:00
|
|
|
speed = priv->info->port_speed[SJA1105_SPEED_AUTO];
|
2019-06-03 02:31:37 +03:00
|
|
|
break;
|
2019-06-08 16:03:41 +03:00
|
|
|
case SPEED_10:
|
2021-05-31 01:59:37 +03:00
|
|
|
speed = priv->info->port_speed[SJA1105_SPEED_10MBPS];
|
2019-06-03 02:31:37 +03:00
|
|
|
break;
|
2019-06-08 16:03:41 +03:00
|
|
|
case SPEED_100:
|
2021-05-31 01:59:37 +03:00
|
|
|
speed = priv->info->port_speed[SJA1105_SPEED_100MBPS];
|
2019-06-03 02:31:37 +03:00
|
|
|
break;
|
2019-06-08 16:03:41 +03:00
|
|
|
case SPEED_1000:
|
2021-05-31 01:59:37 +03:00
|
|
|
speed = priv->info->port_speed[SJA1105_SPEED_1000MBPS];
|
2019-06-03 02:31:37 +03:00
|
|
|
break;
|
2021-06-11 23:05:31 +03:00
|
|
|
case SPEED_2500:
|
|
|
|
speed = priv->info->port_speed[SJA1105_SPEED_2500MBPS];
|
|
|
|
break;
|
2019-06-03 02:31:37 +03:00
|
|
|
default:
|
2019-05-02 23:23:30 +03:00
|
|
|
dev_err(dev, "Invalid speed %iMbps\n", speed_mbps);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-06-08 16:03:44 +03:00
|
|
|
/* Overwrite SJA1105_SPEED_AUTO from the static MAC configuration
|
|
|
|
* table, since this will be used for the clocking setup, and we no
|
|
|
|
* longer need to store it in the static config (already told hardware
|
|
|
|
* we want auto during upload phase).
|
2020-03-20 13:29:37 +02:00
|
|
|
* Actually for the SGMII port, the MAC is fixed at 1 Gbps and
|
|
|
|
* we need to configure the PCS only (if even that).
|
2019-05-02 23:23:30 +03:00
|
|
|
*/
|
net: dsa: sja1105: add a PHY interface type compatibility matrix
On the SJA1105, all ports support the parallel "xMII" protocols (MII,
RMII, RGMII) except for port 4 on SJA1105R/S which supports only SGMII.
This was relatively easy to model, by special-casing the SGMII port.
On the SJA1110, certain ports can be pinmuxed between SGMII and xMII, or
between SGMII and an internal 100base-TX PHY. This creates problems,
because the driver's assumption so far was that if a port supports
SGMII, it uses SGMII.
We allow the device tree to tell us how the port pinmuxing is done, and
check that against a PHY interface type compatibility matrix for
plausibility.
The other big change is that instead of doing SGMII configuration based
on what the port supports, we do it based on what is the configured
phy_mode of the port.
The 2500base-x support added in this patch is not complete.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-05-31 01:59:36 +03:00
|
|
|
if (priv->phy_mode[port] == PHY_INTERFACE_MODE_SGMII)
|
2021-05-31 01:59:37 +03:00
|
|
|
mac[port].speed = priv->info->port_speed[SJA1105_SPEED_1000MBPS];
|
2021-06-11 23:05:31 +03:00
|
|
|
else if (priv->phy_mode[port] == PHY_INTERFACE_MODE_2500BASEX)
|
|
|
|
mac[port].speed = priv->info->port_speed[SJA1105_SPEED_2500MBPS];
|
2020-03-20 13:29:37 +02:00
|
|
|
else
|
|
|
|
mac[port].speed = speed;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
/* Write to the dynamic reconfiguration tables */
|
2019-06-08 16:03:44 +03:00
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
|
|
|
|
&mac[port], true);
|
2019-05-02 23:23:30 +03:00
|
|
|
if (rc < 0) {
|
|
|
|
dev_err(dev, "Failed to write MAC config: %d\n", rc);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Reconfigure the PLLs for the RGMII interfaces (required 125 MHz at
|
|
|
|
* gigabit, 25 MHz at 100 Mbps and 2.5 MHz at 10 Mbps). For MII and
|
|
|
|
* RMII no change of the clock setup is required. Actually, changing
|
|
|
|
* the clock setup does interrupt the clock signal for a certain time
|
|
|
|
* which causes trouble for all PHYs relying on this signal.
|
|
|
|
*/
|
net: dsa: sja1105: add a PHY interface type compatibility matrix
On the SJA1105, all ports support the parallel "xMII" protocols (MII,
RMII, RGMII) except for port 4 on SJA1105R/S which supports only SGMII.
This was relatively easy to model, by special-casing the SGMII port.
On the SJA1110, certain ports can be pinmuxed between SGMII and xMII, or
between SGMII and an internal 100base-TX PHY. This creates problems,
because the driver's assumption so far was that if a port supports
SGMII, it uses SGMII.
We allow the device tree to tell us how the port pinmuxing is done, and
check that against a PHY interface type compatibility matrix for
plausibility.
The other big change is that instead of doing SGMII configuration based
on what the port supports, we do it based on what is the configured
phy_mode of the port.
The 2500base-x support added in this patch is not complete.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-05-31 01:59:36 +03:00
|
|
|
if (!phy_interface_mode_is_rgmii(priv->phy_mode[port]))
|
2019-05-02 23:23:30 +03:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
return sja1105_clocking_setup_port(priv, port);
|
|
|
|
}
|
|
|
|
|
2022-02-25 11:56:12 +00:00
|
|
|
static struct phylink_pcs *
|
2024-04-12 16:15:13 +01:00
|
|
|
sja1105_mac_select_pcs(struct phylink_config *config, phy_interface_t iface)
|
2019-05-02 23:23:30 +03:00
|
|
|
{
|
2024-04-12 16:15:13 +01:00
|
|
|
struct dsa_port *dp = dsa_phylink_to_port(config);
|
|
|
|
struct sja1105_private *priv = dp->ds->priv;
|
|
|
|
struct dw_xpcs *xpcs = priv->xpcs[dp->index];
|
2020-03-20 13:29:37 +02:00
|
|
|
|
2021-06-11 23:05:28 +03:00
|
|
|
if (xpcs)
|
2022-02-25 11:56:12 +00:00
|
|
|
return &xpcs->pcs;
|
|
|
|
|
|
|
|
return NULL;
|
2019-06-08 16:03:44 +03:00
|
|
|
}
|
|
|
|
|
2024-04-12 16:15:13 +01:00
|
|
|
static void sja1105_mac_config(struct phylink_config *config,
|
|
|
|
unsigned int mode,
|
|
|
|
const struct phylink_link_state *state)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sja1105_mac_link_down(struct phylink_config *config,
|
2019-06-08 16:03:44 +03:00
|
|
|
unsigned int mode,
|
|
|
|
phy_interface_t interface)
|
|
|
|
{
|
2024-04-12 16:15:13 +01:00
|
|
|
struct dsa_port *dp = dsa_phylink_to_port(config);
|
|
|
|
|
|
|
|
sja1105_inhibit_tx(dp->ds->priv, BIT(dp->index), true);
|
2019-06-08 16:03:44 +03:00
|
|
|
}
|
|
|
|
|
2024-04-12 16:15:13 +01:00
|
|
|
static void sja1105_mac_link_up(struct phylink_config *config,
|
|
|
|
struct phy_device *phydev,
|
2019-06-08 16:03:44 +03:00
|
|
|
unsigned int mode,
|
|
|
|
phy_interface_t interface,
|
2020-02-26 10:23:46 +00:00
|
|
|
int speed, int duplex,
|
|
|
|
bool tx_pause, bool rx_pause)
|
2019-06-08 16:03:44 +03:00
|
|
|
{
|
2024-04-12 16:15:13 +01:00
|
|
|
struct dsa_port *dp = dsa_phylink_to_port(config);
|
|
|
|
struct sja1105_private *priv = dp->ds->priv;
|
|
|
|
int port = dp->index;
|
2020-03-12 12:19:51 +00:00
|
|
|
|
|
|
|
sja1105_adjust_port_config(priv, port, speed);
|
|
|
|
|
|
|
|
sja1105_inhibit_tx(priv, BIT(port), false);
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
|
2022-02-25 11:56:02 +00:00
|
|
|
static void sja1105_phylink_get_caps(struct dsa_switch *ds, int port,
|
|
|
|
struct phylink_config *config)
|
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
2022-02-25 11:56:23 +00:00
|
|
|
struct sja1105_xmii_params_entry *mii;
|
2022-02-25 11:56:28 +00:00
|
|
|
phy_interface_t phy_mode;
|
2022-02-25 11:56:02 +00:00
|
|
|
|
2022-02-25 11:56:28 +00:00
|
|
|
phy_mode = priv->phy_mode[port];
|
|
|
|
if (phy_mode == PHY_INTERFACE_MODE_SGMII ||
|
|
|
|
phy_mode == PHY_INTERFACE_MODE_2500BASEX) {
|
|
|
|
/* Changing the PHY mode on SERDES ports is possible and makes
|
|
|
|
* sense, because that is done through the XPCS. We allow
|
|
|
|
* changes between SGMII and 2500base-X.
|
|
|
|
*/
|
|
|
|
if (priv->info->supports_sgmii[port])
|
|
|
|
__set_bit(PHY_INTERFACE_MODE_SGMII,
|
|
|
|
config->supported_interfaces);
|
|
|
|
|
|
|
|
if (priv->info->supports_2500basex[port])
|
|
|
|
__set_bit(PHY_INTERFACE_MODE_2500BASEX,
|
|
|
|
config->supported_interfaces);
|
|
|
|
} else {
|
|
|
|
/* The SJA1105 MAC programming model is through the static
|
|
|
|
* config (the xMII Mode table cannot be dynamically
|
|
|
|
* reconfigured), and we have to program that early.
|
|
|
|
*/
|
|
|
|
__set_bit(phy_mode, config->supported_interfaces);
|
|
|
|
}
|
2019-05-02 23:23:38 +03:00
|
|
|
|
|
|
|
/* The MAC does not support pause frames, and also doesn't
|
|
|
|
* support half-duplex traffic modes.
|
|
|
|
*/
|
2022-02-25 11:56:23 +00:00
|
|
|
config->mac_capabilities = MAC_10FD | MAC_100FD;
|
|
|
|
|
|
|
|
mii = priv->static_config.tables[BLK_IDX_XMII_PARAMS].entries;
|
2020-03-20 13:29:37 +02:00
|
|
|
if (mii->xmii_mode[port] == XMII_MODE_RGMII ||
|
|
|
|
mii->xmii_mode[port] == XMII_MODE_SGMII)
|
2022-02-25 11:56:23 +00:00
|
|
|
config->mac_capabilities |= MAC_1000FD;
|
2019-05-02 23:23:38 +03:00
|
|
|
|
2022-02-25 11:56:23 +00:00
|
|
|
if (priv->info->supports_2500basex[port])
|
|
|
|
config->mac_capabilities |= MAC_2500FD;
|
2019-05-02 23:23:38 +03:00
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: Back up static FDB entries in kernel memory
After commit 8456721dd4ec ("net: dsa: sja1105: Add support for
configuring address ageing time"), we started to reset the switch rather
often (each time the bridge core changes the ageing time on a switch
port).
The unfortunate reality is that SJA1105 doesn't have any {cold, warm,
whatever} reset mode in which it accepts a new configuration stream
without flushing the FDB. Instead, in its world, the FDB *is* an
optional part of the static configuration.
So we play its game, and do what we also do for VLANs: for each 'bridge
fdb' command, we add the FDB entry through the dynamic interface, and we
append the in-kernel static config memory with info that we're going to
use later, when the next reset command is going to be issued.
The result is that 'bridge fdb' commands are now persistent (dynamically
learned entries are lost, but that's ok).
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-26 02:39:38 +03:00
|
|
|
static int
|
|
|
|
sja1105_find_static_fdb_entry(struct sja1105_private *priv, int port,
|
|
|
|
const struct sja1105_l2_lookup_entry *requested)
|
|
|
|
{
|
|
|
|
struct sja1105_l2_lookup_entry *l2_lookup;
|
|
|
|
struct sja1105_table *table;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP];
|
|
|
|
l2_lookup = table->entries;
|
|
|
|
|
|
|
|
for (i = 0; i < table->entry_count; i++)
|
|
|
|
if (l2_lookup[i].macaddr == requested->macaddr &&
|
|
|
|
l2_lookup[i].vlanid == requested->vlanid &&
|
|
|
|
l2_lookup[i].destports & BIT(port))
|
|
|
|
return i;
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We want FDB entries added statically through the bridge command to persist
|
|
|
|
* across switch resets, which are a common thing during normal SJA1105
|
|
|
|
* operation. So we have to back them up in the static configuration tables
|
|
|
|
* and hence apply them on next static config upload... yay!
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
sja1105_static_fdb_change(struct sja1105_private *priv, int port,
|
|
|
|
const struct sja1105_l2_lookup_entry *requested,
|
|
|
|
bool keep)
|
|
|
|
{
|
|
|
|
struct sja1105_l2_lookup_entry *l2_lookup;
|
|
|
|
struct sja1105_table *table;
|
|
|
|
int rc, match;
|
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP];
|
|
|
|
|
|
|
|
match = sja1105_find_static_fdb_entry(priv, port, requested);
|
|
|
|
if (match < 0) {
|
|
|
|
/* Can't delete a missing entry. */
|
|
|
|
if (!keep)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* No match => new entry */
|
|
|
|
rc = sja1105_table_resize(table, table->entry_count + 1);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
match = table->entry_count - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Assign pointer after the resize (it may be new memory) */
|
|
|
|
l2_lookup = table->entries;
|
|
|
|
|
|
|
|
/* We have a match.
|
|
|
|
* If the job was to add this FDB entry, it's already done (mostly
|
|
|
|
* anyway, since the port forwarding mask may have changed, case in
|
|
|
|
* which we update it).
|
|
|
|
* Otherwise we have to delete it.
|
|
|
|
*/
|
|
|
|
if (keep) {
|
|
|
|
l2_lookup[match] = *requested;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* To remove, the strategy is to overwrite the element with
|
|
|
|
* the last one, and then reduce the array size by 1
|
|
|
|
*/
|
|
|
|
l2_lookup[match] = l2_lookup[table->entry_count - 1];
|
|
|
|
return sja1105_table_resize(table, table->entry_count - 1);
|
|
|
|
}
|
|
|
|
|
2019-05-02 23:23:31 +03:00
|
|
|
/* First-generation switches have a 4-way set associative TCAM that
|
|
|
|
* holds the FDB entries. An FDB index spans from 0 to 1023 and is comprised of
|
|
|
|
* a "bin" (grouping of 4 entries) and a "way" (an entry within a bin).
|
|
|
|
* For the placement of a newly learnt FDB entry, the switch selects the bin
|
|
|
|
* based on a hash function, and the way within that bin incrementally.
|
|
|
|
*/
|
2019-10-01 22:17:59 +03:00
|
|
|
static int sja1105et_fdb_index(int bin, int way)
|
2019-05-02 23:23:31 +03:00
|
|
|
{
|
|
|
|
return bin * SJA1105ET_FDB_BIN_SIZE + way;
|
|
|
|
}
|
|
|
|
|
2019-06-03 00:11:57 +03:00
|
|
|
static int sja1105et_is_fdb_entry_in_bin(struct sja1105_private *priv, int bin,
|
|
|
|
const u8 *addr, u16 vid,
|
|
|
|
struct sja1105_l2_lookup_entry *match,
|
|
|
|
int *last_unused)
|
2019-05-02 23:23:31 +03:00
|
|
|
{
|
|
|
|
int way;
|
|
|
|
|
|
|
|
for (way = 0; way < SJA1105ET_FDB_BIN_SIZE; way++) {
|
|
|
|
struct sja1105_l2_lookup_entry l2_lookup = {0};
|
|
|
|
int index = sja1105et_fdb_index(bin, way);
|
|
|
|
|
|
|
|
/* Skip unused entries, optionally marking them
|
|
|
|
* into the return value
|
|
|
|
*/
|
|
|
|
if (sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
index, &l2_lookup)) {
|
|
|
|
if (last_unused)
|
|
|
|
*last_unused = way;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (l2_lookup.macaddr == ether_addr_to_u64(addr) &&
|
|
|
|
l2_lookup.vlanid == vid) {
|
|
|
|
if (match)
|
|
|
|
*match = l2_lookup;
|
|
|
|
return way;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Return an invalid entry index if not found */
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2019-06-03 00:11:57 +03:00
|
|
|
int sja1105et_fdb_add(struct dsa_switch *ds, int port,
|
|
|
|
const unsigned char *addr, u16 vid)
|
2019-05-02 23:23:31 +03:00
|
|
|
{
|
net: dsa: sja1105: invalidate dynamic FDB entries learned concurrently with statically added ones
The procedure to add a static FDB entry in sja1105 is concurrent with
dynamic learning performed on all bridge ports and the CPU port.
The switch looks up the FDB from left to right, and also learns
dynamically from left to right, so it is possible that between the
moment when we pick up a free slot to install an FDB entry, another slot
to the left of that one becomes free due to an address ageing out, and
that other slot is then immediately used by the switch to learn
dynamically the same address as we're trying to add statically.
The result is that we succeeded to add our static FDB entry, but it is
being shadowed by a dynamic FDB entry to its left, and the switch will
behave as if our static FDB entry did not exist.
We cannot really prevent this from happening unless we make the entire
process to add a static FDB entry a huge critical section where address
learning is temporarily disabled on _all_ ports, and then re-enabled
according to the configuration done by sja1105_port_set_learning.
However, that is kind of disruptive for the operation of the network.
What we can do alternatively is to simply read back the FDB for dynamic
entries located before our newly added static one, and delete them.
This will guarantee that our static FDB entry is now operational. It
will still not guarantee that there aren't dynamic FDB entries to the
_right_ of that static FDB entry, but at least those entries will age
out by themselves since they aren't hit, and won't bother anyone.
Fixes: 291d1e72b756 ("net: dsa: sja1105: Add support for FDB and MDB management")
Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-30 20:18:12 +03:00
|
|
|
struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp;
|
2019-05-02 23:23:31 +03:00
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
struct device *dev = ds->dev;
|
|
|
|
int last_unused = -1;
|
net: dsa: sja1105: invalidate dynamic FDB entries learned concurrently with statically added ones
The procedure to add a static FDB entry in sja1105 is concurrent with
dynamic learning performed on all bridge ports and the CPU port.
The switch looks up the FDB from left to right, and also learns
dynamically from left to right, so it is possible that between the
moment when we pick up a free slot to install an FDB entry, another slot
to the left of that one becomes free due to an address ageing out, and
that other slot is then immediately used by the switch to learn
dynamically the same address as we're trying to add statically.
The result is that we succeeded to add our static FDB entry, but it is
being shadowed by a dynamic FDB entry to its left, and the switch will
behave as if our static FDB entry did not exist.
We cannot really prevent this from happening unless we make the entire
process to add a static FDB entry a huge critical section where address
learning is temporarily disabled on _all_ ports, and then re-enabled
according to the configuration done by sja1105_port_set_learning.
However, that is kind of disruptive for the operation of the network.
What we can do alternatively is to simply read back the FDB for dynamic
entries located before our newly added static one, and delete them.
This will guarantee that our static FDB entry is now operational. It
will still not guarantee that there aren't dynamic FDB entries to the
_right_ of that static FDB entry, but at least those entries will age
out by themselves since they aren't hit, and won't bother anyone.
Fixes: 291d1e72b756 ("net: dsa: sja1105: Add support for FDB and MDB management")
Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-30 20:18:12 +03:00
|
|
|
int start, end, i;
|
net: dsa: sja1105: Back up static FDB entries in kernel memory
After commit 8456721dd4ec ("net: dsa: sja1105: Add support for
configuring address ageing time"), we started to reset the switch rather
often (each time the bridge core changes the ageing time on a switch
port).
The unfortunate reality is that SJA1105 doesn't have any {cold, warm,
whatever} reset mode in which it accepts a new configuration stream
without flushing the FDB. Instead, in its world, the FDB *is* an
optional part of the static configuration.
So we play its game, and do what we also do for VLANs: for each 'bridge
fdb' command, we add the FDB entry through the dynamic interface, and we
append the in-kernel static config memory with info that we're going to
use later, when the next reset command is going to be issued.
The result is that 'bridge fdb' commands are now persistent (dynamically
learned entries are lost, but that's ok).
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-26 02:39:38 +03:00
|
|
|
int bin, way, rc;
|
2019-05-02 23:23:31 +03:00
|
|
|
|
2019-06-03 00:11:57 +03:00
|
|
|
bin = sja1105et_fdb_hash(priv, addr, vid);
|
2019-05-02 23:23:31 +03:00
|
|
|
|
2019-06-03 00:11:57 +03:00
|
|
|
way = sja1105et_is_fdb_entry_in_bin(priv, bin, addr, vid,
|
|
|
|
&l2_lookup, &last_unused);
|
2019-05-02 23:23:31 +03:00
|
|
|
if (way >= 0) {
|
|
|
|
/* We have an FDB entry. Is our port in the destination
|
|
|
|
* mask? If yes, we need to do nothing. If not, we need
|
|
|
|
* to rewrite the entry by adding this port to it.
|
|
|
|
*/
|
2021-07-30 20:18:11 +03:00
|
|
|
if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds)
|
2019-05-02 23:23:31 +03:00
|
|
|
return 0;
|
|
|
|
l2_lookup.destports |= BIT(port);
|
|
|
|
} else {
|
|
|
|
int index = sja1105et_fdb_index(bin, way);
|
|
|
|
|
|
|
|
/* We don't have an FDB entry. We construct a new one and
|
|
|
|
* try to find a place for it within the FDB table.
|
|
|
|
*/
|
|
|
|
l2_lookup.macaddr = ether_addr_to_u64(addr);
|
|
|
|
l2_lookup.destports = BIT(port);
|
|
|
|
l2_lookup.vlanid = vid;
|
|
|
|
|
|
|
|
if (last_unused >= 0) {
|
|
|
|
way = last_unused;
|
|
|
|
} else {
|
|
|
|
/* Bin is full, need to evict somebody.
|
|
|
|
* Choose victim at random. If you get these messages
|
|
|
|
* often, you may need to consider changing the
|
|
|
|
* distribution function:
|
|
|
|
* static_config[BLK_IDX_L2_LOOKUP_PARAMS].entries->poly
|
|
|
|
*/
|
|
|
|
get_random_bytes(&way, sizeof(u8));
|
|
|
|
way %= SJA1105ET_FDB_BIN_SIZE;
|
|
|
|
dev_warn(dev, "Warning, FDB bin %d full while adding entry for %pM. Evicting entry %u.\n",
|
|
|
|
bin, addr, way);
|
|
|
|
/* Evict entry */
|
|
|
|
sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
index, NULL, false);
|
|
|
|
}
|
|
|
|
}
|
2021-07-30 20:18:11 +03:00
|
|
|
l2_lookup.lockeds = true;
|
2019-05-02 23:23:31 +03:00
|
|
|
l2_lookup.index = sja1105et_fdb_index(bin, way);
|
|
|
|
|
net: dsa: sja1105: Back up static FDB entries in kernel memory
After commit 8456721dd4ec ("net: dsa: sja1105: Add support for
configuring address ageing time"), we started to reset the switch rather
often (each time the bridge core changes the ageing time on a switch
port).
The unfortunate reality is that SJA1105 doesn't have any {cold, warm,
whatever} reset mode in which it accepts a new configuration stream
without flushing the FDB. Instead, in its world, the FDB *is* an
optional part of the static configuration.
So we play its game, and do what we also do for VLANs: for each 'bridge
fdb' command, we add the FDB entry through the dynamic interface, and we
append the in-kernel static config memory with info that we're going to
use later, when the next reset command is going to be issued.
The result is that 'bridge fdb' commands are now persistent (dynamically
learned entries are lost, but that's ok).
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-26 02:39:38 +03:00
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
l2_lookup.index, &l2_lookup,
|
|
|
|
true);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
|
net: dsa: sja1105: invalidate dynamic FDB entries learned concurrently with statically added ones
The procedure to add a static FDB entry in sja1105 is concurrent with
dynamic learning performed on all bridge ports and the CPU port.
The switch looks up the FDB from left to right, and also learns
dynamically from left to right, so it is possible that between the
moment when we pick up a free slot to install an FDB entry, another slot
to the left of that one becomes free due to an address ageing out, and
that other slot is then immediately used by the switch to learn
dynamically the same address as we're trying to add statically.
The result is that we succeeded to add our static FDB entry, but it is
being shadowed by a dynamic FDB entry to its left, and the switch will
behave as if our static FDB entry did not exist.
We cannot really prevent this from happening unless we make the entire
process to add a static FDB entry a huge critical section where address
learning is temporarily disabled on _all_ ports, and then re-enabled
according to the configuration done by sja1105_port_set_learning.
However, that is kind of disruptive for the operation of the network.
What we can do alternatively is to simply read back the FDB for dynamic
entries located before our newly added static one, and delete them.
This will guarantee that our static FDB entry is now operational. It
will still not guarantee that there aren't dynamic FDB entries to the
_right_ of that static FDB entry, but at least those entries will age
out by themselves since they aren't hit, and won't bother anyone.
Fixes: 291d1e72b756 ("net: dsa: sja1105: Add support for FDB and MDB management")
Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-30 20:18:12 +03:00
|
|
|
/* Invalidate a dynamically learned entry if that exists */
|
|
|
|
start = sja1105et_fdb_index(bin, 0);
|
|
|
|
end = sja1105et_fdb_index(bin, way);
|
|
|
|
|
|
|
|
for (i = start; i < end; i++) {
|
|
|
|
rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
i, &tmp);
|
|
|
|
if (rc == -ENOENT)
|
|
|
|
continue;
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
if (tmp.macaddr != ether_addr_to_u64(addr) || tmp.vlanid != vid)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
i, NULL, false);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: Back up static FDB entries in kernel memory
After commit 8456721dd4ec ("net: dsa: sja1105: Add support for
configuring address ageing time"), we started to reset the switch rather
often (each time the bridge core changes the ageing time on a switch
port).
The unfortunate reality is that SJA1105 doesn't have any {cold, warm,
whatever} reset mode in which it accepts a new configuration stream
without flushing the FDB. Instead, in its world, the FDB *is* an
optional part of the static configuration.
So we play its game, and do what we also do for VLANs: for each 'bridge
fdb' command, we add the FDB entry through the dynamic interface, and we
append the in-kernel static config memory with info that we're going to
use later, when the next reset command is going to be issued.
The result is that 'bridge fdb' commands are now persistent (dynamically
learned entries are lost, but that's ok).
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-26 02:39:38 +03:00
|
|
|
return sja1105_static_fdb_change(priv, port, &l2_lookup, true);
|
2019-05-02 23:23:31 +03:00
|
|
|
}
|
|
|
|
|
2019-06-03 00:11:57 +03:00
|
|
|
int sja1105et_fdb_del(struct dsa_switch *ds, int port,
|
|
|
|
const unsigned char *addr, u16 vid)
|
2019-05-02 23:23:31 +03:00
|
|
|
{
|
|
|
|
struct sja1105_l2_lookup_entry l2_lookup = {0};
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
net: dsa: sja1105: Back up static FDB entries in kernel memory
After commit 8456721dd4ec ("net: dsa: sja1105: Add support for
configuring address ageing time"), we started to reset the switch rather
often (each time the bridge core changes the ageing time on a switch
port).
The unfortunate reality is that SJA1105 doesn't have any {cold, warm,
whatever} reset mode in which it accepts a new configuration stream
without flushing the FDB. Instead, in its world, the FDB *is* an
optional part of the static configuration.
So we play its game, and do what we also do for VLANs: for each 'bridge
fdb' command, we add the FDB entry through the dynamic interface, and we
append the in-kernel static config memory with info that we're going to
use later, when the next reset command is going to be issued.
The result is that 'bridge fdb' commands are now persistent (dynamically
learned entries are lost, but that's ok).
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-26 02:39:38 +03:00
|
|
|
int index, bin, way, rc;
|
2019-05-02 23:23:31 +03:00
|
|
|
bool keep;
|
|
|
|
|
2019-06-03 00:11:57 +03:00
|
|
|
bin = sja1105et_fdb_hash(priv, addr, vid);
|
|
|
|
way = sja1105et_is_fdb_entry_in_bin(priv, bin, addr, vid,
|
|
|
|
&l2_lookup, NULL);
|
2019-05-02 23:23:31 +03:00
|
|
|
if (way < 0)
|
|
|
|
return 0;
|
|
|
|
index = sja1105et_fdb_index(bin, way);
|
|
|
|
|
|
|
|
/* We have an FDB entry. Is our port in the destination mask? If yes,
|
|
|
|
* we need to remove it. If the resulting port mask becomes empty, we
|
|
|
|
* need to completely evict the FDB entry.
|
|
|
|
* Otherwise we just write it back.
|
|
|
|
*/
|
2019-06-03 00:15:54 +03:00
|
|
|
l2_lookup.destports &= ~BIT(port);
|
|
|
|
|
2019-05-02 23:23:31 +03:00
|
|
|
if (l2_lookup.destports)
|
|
|
|
keep = true;
|
|
|
|
else
|
|
|
|
keep = false;
|
|
|
|
|
net: dsa: sja1105: Back up static FDB entries in kernel memory
After commit 8456721dd4ec ("net: dsa: sja1105: Add support for
configuring address ageing time"), we started to reset the switch rather
often (each time the bridge core changes the ageing time on a switch
port).
The unfortunate reality is that SJA1105 doesn't have any {cold, warm,
whatever} reset mode in which it accepts a new configuration stream
without flushing the FDB. Instead, in its world, the FDB *is* an
optional part of the static configuration.
So we play its game, and do what we also do for VLANs: for each 'bridge
fdb' command, we add the FDB entry through the dynamic interface, and we
append the in-kernel static config memory with info that we're going to
use later, when the next reset command is going to be issued.
The result is that 'bridge fdb' commands are now persistent (dynamically
learned entries are lost, but that's ok).
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-26 02:39:38 +03:00
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
index, &l2_lookup, keep);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
return sja1105_static_fdb_change(priv, port, &l2_lookup, keep);
|
2019-05-02 23:23:31 +03:00
|
|
|
}
|
|
|
|
|
2019-06-03 00:11:57 +03:00
|
|
|
int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
|
|
|
|
const unsigned char *addr, u16 vid)
|
|
|
|
{
|
net: dsa: sja1105: invalidate dynamic FDB entries learned concurrently with statically added ones
The procedure to add a static FDB entry in sja1105 is concurrent with
dynamic learning performed on all bridge ports and the CPU port.
The switch looks up the FDB from left to right, and also learns
dynamically from left to right, so it is possible that between the
moment when we pick up a free slot to install an FDB entry, another slot
to the left of that one becomes free due to an address ageing out, and
that other slot is then immediately used by the switch to learn
dynamically the same address as we're trying to add statically.
The result is that we succeeded to add our static FDB entry, but it is
being shadowed by a dynamic FDB entry to its left, and the switch will
behave as if our static FDB entry did not exist.
We cannot really prevent this from happening unless we make the entire
process to add a static FDB entry a huge critical section where address
learning is temporarily disabled on _all_ ports, and then re-enabled
according to the configuration done by sja1105_port_set_learning.
However, that is kind of disruptive for the operation of the network.
What we can do alternatively is to simply read back the FDB for dynamic
entries located before our newly added static one, and delete them.
This will guarantee that our static FDB entry is now operational. It
will still not guarantee that there aren't dynamic FDB entries to the
_right_ of that static FDB entry, but at least those entries will age
out by themselves since they aren't hit, and won't bother anyone.
Fixes: 291d1e72b756 ("net: dsa: sja1105: Add support for FDB and MDB management")
Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-30 20:18:12 +03:00
|
|
|
struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp;
|
2019-06-03 00:15:45 +03:00
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
int rc, i;
|
|
|
|
|
|
|
|
/* Search for an existing entry in the FDB table */
|
|
|
|
l2_lookup.macaddr = ether_addr_to_u64(addr);
|
|
|
|
l2_lookup.vlanid = vid;
|
|
|
|
l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
|
net: dsa: sja1105: be stateless with FDB entries on SJA1105P/Q/R/S/SJA1110 too
Similar but not quite the same with what was done in commit b11f0a4c0c81
("net: dsa: sja1105: be stateless when installing FDB entries") for
SJA1105E/T, it is desirable to drop the priv->vlan_aware check and
simply go ahead and install FDB entries in the VLAN that was given by
the bridge.
As opposed to SJA1105E/T, in SJA1105P/Q/R/S and SJA1110, the FDB is a
maskable TCAM, and we are installing VLAN-unaware FDB entries with the
VLAN ID masked off. However, such FDB entries might completely obscure
VLAN-aware entries where the VLAN ID is included in the search mask,
because the switch looks up the FDB from left to right and picks the
first entry which results in a masked match. So it depends on whether
the bridge installs first the VLAN-unaware or the VLAN-aware FDB entries.
Anyway, if we had a VLAN-unaware FDB entry towards one set of DESTPORTS
and a VLAN-aware one towards other set of DESTPORTS, the result is that
the packets in VLAN-aware mode will be forwarded towards the DESTPORTS
specified by the VLAN-unaware entry.
To solve this, simply do not use the masked matching ability of the FDB
for VLAN ID, and always match precisely on it. In VLAN-unaware mode, we
configure the switch for shared VLAN learning, so the VLAN ID will be
ignored anyway during lookup, so it is redundant to mask it off in the
TCAM.
This patch conflicts with net-next commit 0fac6aa098ed ("net: dsa: sja1105:
delete the best_effort_vlan_filtering mode") which changed this line:
if (priv->vlan_state != SJA1105_VLAN_UNAWARE) {
into:
if (priv->vlan_aware) {
When merging with net-next, the lines added by this patch should take
precedence in the conflict resolution (i.e. the "if" condition should be
deleted in both cases).
Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-30 20:18:14 +03:00
|
|
|
l2_lookup.mask_vlanid = VLAN_VID_MASK;
|
2019-06-03 00:15:45 +03:00
|
|
|
l2_lookup.destports = BIT(port);
|
|
|
|
|
net: dsa: sja1105: ignore the FDB entry for unknown multicast when adding a new address
Currently, when sja1105pqrs_fdb_add() is called for a host-joined IPv6
MDB entry such as 33:33:00:00:00:6a, the search for that address will
return the FDB entry for SJA1105_UNKNOWN_MULTICAST, which has a
destination MAC of 01:00:00:00:00:00 and a mask of 01:00:00:00:00:00.
It returns that entry because, well, it matches, in the sense that
unknown multicast is supposed by design to match it...
But the issue is that we then proceed to overwrite this entry with the
one for our precise host-joined multicast address, and the unknown
multicast entry is no longer there - unknown multicast is now flooded to
the same group of ports as broadcast, which does not look up the FDB.
To solve this problem, we should ignore searches that return the unknown
multicast address as the match, and treat them as "no match" which will
result in the entry being installed to hardware.
For this to work properly, we need to put the result of the FDB search
in a temporary variable in order to avoid overwriting the l2_lookup
entry we want to program. The l2_lookup entry returned by the search
might not have the same set of DESTPORTS and not even the same MACADDR
as the entry we're trying to add.
Fixes: 4d9423549501 ("net: dsa: sja1105: offload bridge port flags to device")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-30 20:18:13 +03:00
|
|
|
tmp = l2_lookup;
|
|
|
|
|
2019-06-03 00:15:45 +03:00
|
|
|
rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
|
net: dsa: sja1105: ignore the FDB entry for unknown multicast when adding a new address
Currently, when sja1105pqrs_fdb_add() is called for a host-joined IPv6
MDB entry such as 33:33:00:00:00:6a, the search for that address will
return the FDB entry for SJA1105_UNKNOWN_MULTICAST, which has a
destination MAC of 01:00:00:00:00:00 and a mask of 01:00:00:00:00:00.
It returns that entry because, well, it matches, in the sense that
unknown multicast is supposed by design to match it...
But the issue is that we then proceed to overwrite this entry with the
one for our precise host-joined multicast address, and the unknown
multicast entry is no longer there - unknown multicast is now flooded to
the same group of ports as broadcast, which does not look up the FDB.
To solve this problem, we should ignore searches that return the unknown
multicast address as the match, and treat them as "no match" which will
result in the entry being installed to hardware.
For this to work properly, we need to put the result of the FDB search
in a temporary variable in order to avoid overwriting the l2_lookup
entry we want to program. The l2_lookup entry returned by the search
might not have the same set of DESTPORTS and not even the same MACADDR
as the entry we're trying to add.
Fixes: 4d9423549501 ("net: dsa: sja1105: offload bridge port flags to device")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-30 20:18:13 +03:00
|
|
|
SJA1105_SEARCH, &tmp);
|
|
|
|
if (rc == 0 && tmp.index != SJA1105_MAX_L2_LOOKUP_COUNT - 1) {
|
2021-07-30 20:18:11 +03:00
|
|
|
/* Found a static entry and this port is already in the entry's
|
2019-06-03 00:15:45 +03:00
|
|
|
* port mask => job done
|
|
|
|
*/
|
net: dsa: sja1105: ignore the FDB entry for unknown multicast when adding a new address
Currently, when sja1105pqrs_fdb_add() is called for a host-joined IPv6
MDB entry such as 33:33:00:00:00:6a, the search for that address will
return the FDB entry for SJA1105_UNKNOWN_MULTICAST, which has a
destination MAC of 01:00:00:00:00:00 and a mask of 01:00:00:00:00:00.
It returns that entry because, well, it matches, in the sense that
unknown multicast is supposed by design to match it...
But the issue is that we then proceed to overwrite this entry with the
one for our precise host-joined multicast address, and the unknown
multicast entry is no longer there - unknown multicast is now flooded to
the same group of ports as broadcast, which does not look up the FDB.
To solve this problem, we should ignore searches that return the unknown
multicast address as the match, and treat them as "no match" which will
result in the entry being installed to hardware.
For this to work properly, we need to put the result of the FDB search
in a temporary variable in order to avoid overwriting the l2_lookup
entry we want to program. The l2_lookup entry returned by the search
might not have the same set of DESTPORTS and not even the same MACADDR
as the entry we're trying to add.
Fixes: 4d9423549501 ("net: dsa: sja1105: offload bridge port flags to device")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-30 20:18:13 +03:00
|
|
|
if ((tmp.destports & BIT(port)) && tmp.lockeds)
|
2019-06-03 00:15:45 +03:00
|
|
|
return 0;
|
net: dsa: sja1105: ignore the FDB entry for unknown multicast when adding a new address
Currently, when sja1105pqrs_fdb_add() is called for a host-joined IPv6
MDB entry such as 33:33:00:00:00:6a, the search for that address will
return the FDB entry for SJA1105_UNKNOWN_MULTICAST, which has a
destination MAC of 01:00:00:00:00:00 and a mask of 01:00:00:00:00:00.
It returns that entry because, well, it matches, in the sense that
unknown multicast is supposed by design to match it...
But the issue is that we then proceed to overwrite this entry with the
one for our precise host-joined multicast address, and the unknown
multicast entry is no longer there - unknown multicast is now flooded to
the same group of ports as broadcast, which does not look up the FDB.
To solve this problem, we should ignore searches that return the unknown
multicast address as the match, and treat them as "no match" which will
result in the entry being installed to hardware.
For this to work properly, we need to put the result of the FDB search
in a temporary variable in order to avoid overwriting the l2_lookup
entry we want to program. The l2_lookup entry returned by the search
might not have the same set of DESTPORTS and not even the same MACADDR
as the entry we're trying to add.
Fixes: 4d9423549501 ("net: dsa: sja1105: offload bridge port flags to device")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-30 20:18:13 +03:00
|
|
|
|
|
|
|
l2_lookup = tmp;
|
|
|
|
|
2019-06-03 00:15:45 +03:00
|
|
|
/* l2_lookup.index is populated by the switch in case it
|
|
|
|
* found something.
|
|
|
|
*/
|
|
|
|
l2_lookup.destports |= BIT(port);
|
|
|
|
goto skip_finding_an_index;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Not found, so try to find an unused spot in the FDB.
|
|
|
|
* This is slightly inefficient because the strategy is knock-knock at
|
|
|
|
* every possible position from 0 to 1023.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) {
|
|
|
|
rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
i, NULL);
|
|
|
|
if (rc < 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (i == SJA1105_MAX_L2_LOOKUP_COUNT) {
|
|
|
|
dev_err(ds->dev, "FDB is full, cannot add entry.\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
l2_lookup.index = i;
|
|
|
|
|
|
|
|
skip_finding_an_index:
|
2021-07-30 20:18:11 +03:00
|
|
|
l2_lookup.lockeds = true;
|
|
|
|
|
net: dsa: sja1105: Back up static FDB entries in kernel memory
After commit 8456721dd4ec ("net: dsa: sja1105: Add support for
configuring address ageing time"), we started to reset the switch rather
often (each time the bridge core changes the ageing time on a switch
port).
The unfortunate reality is that SJA1105 doesn't have any {cold, warm,
whatever} reset mode in which it accepts a new configuration stream
without flushing the FDB. Instead, in its world, the FDB *is* an
optional part of the static configuration.
So we play its game, and do what we also do for VLANs: for each 'bridge
fdb' command, we add the FDB entry through the dynamic interface, and we
append the in-kernel static config memory with info that we're going to
use later, when the next reset command is going to be issued.
The result is that 'bridge fdb' commands are now persistent (dynamically
learned entries are lost, but that's ok).
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-26 02:39:38 +03:00
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
l2_lookup.index, &l2_lookup,
|
|
|
|
true);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
|
net: dsa: sja1105: invalidate dynamic FDB entries learned concurrently with statically added ones
The procedure to add a static FDB entry in sja1105 is concurrent with
dynamic learning performed on all bridge ports and the CPU port.
The switch looks up the FDB from left to right, and also learns
dynamically from left to right, so it is possible that between the
moment when we pick up a free slot to install an FDB entry, another slot
to the left of that one becomes free due to an address ageing out, and
that other slot is then immediately used by the switch to learn
dynamically the same address as we're trying to add statically.
The result is that we succeeded to add our static FDB entry, but it is
being shadowed by a dynamic FDB entry to its left, and the switch will
behave as if our static FDB entry did not exist.
We cannot really prevent this from happening unless we make the entire
process to add a static FDB entry a huge critical section where address
learning is temporarily disabled on _all_ ports, and then re-enabled
according to the configuration done by sja1105_port_set_learning.
However, that is kind of disruptive for the operation of the network.
What we can do alternatively is to simply read back the FDB for dynamic
entries located before our newly added static one, and delete them.
This will guarantee that our static FDB entry is now operational. It
will still not guarantee that there aren't dynamic FDB entries to the
_right_ of that static FDB entry, but at least those entries will age
out by themselves since they aren't hit, and won't bother anyone.
Fixes: 291d1e72b756 ("net: dsa: sja1105: Add support for FDB and MDB management")
Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-30 20:18:12 +03:00
|
|
|
/* The switch learns dynamic entries and looks up the FDB left to
|
|
|
|
* right. It is possible that our addition was concurrent with the
|
|
|
|
* dynamic learning of the same address, so now that the static entry
|
|
|
|
* has been installed, we are certain that address learning for this
|
|
|
|
* particular address has been turned off, so the dynamic entry either
|
|
|
|
* is in the FDB at an index smaller than the static one, or isn't (it
|
|
|
|
* can also be at a larger index, but in that case it is inactive
|
|
|
|
* because the static FDB entry will match first, and the dynamic one
|
|
|
|
* will eventually age out). Search for a dynamically learned address
|
|
|
|
* prior to our static one and invalidate it.
|
|
|
|
*/
|
|
|
|
tmp = l2_lookup;
|
|
|
|
|
|
|
|
rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
SJA1105_SEARCH, &tmp);
|
|
|
|
if (rc < 0) {
|
|
|
|
dev_err(ds->dev,
|
|
|
|
"port %d failed to read back entry for %pM vid %d: %pe\n",
|
|
|
|
port, addr, vid, ERR_PTR(rc));
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tmp.index < l2_lookup.index) {
|
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
tmp.index, NULL, false);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: Back up static FDB entries in kernel memory
After commit 8456721dd4ec ("net: dsa: sja1105: Add support for
configuring address ageing time"), we started to reset the switch rather
often (each time the bridge core changes the ageing time on a switch
port).
The unfortunate reality is that SJA1105 doesn't have any {cold, warm,
whatever} reset mode in which it accepts a new configuration stream
without flushing the FDB. Instead, in its world, the FDB *is* an
optional part of the static configuration.
So we play its game, and do what we also do for VLANs: for each 'bridge
fdb' command, we add the FDB entry through the dynamic interface, and we
append the in-kernel static config memory with info that we're going to
use later, when the next reset command is going to be issued.
The result is that 'bridge fdb' commands are now persistent (dynamically
learned entries are lost, but that's ok).
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-26 02:39:38 +03:00
|
|
|
return sja1105_static_fdb_change(priv, port, &l2_lookup, true);
|
2019-06-03 00:11:57 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port,
|
|
|
|
const unsigned char *addr, u16 vid)
|
|
|
|
{
|
2019-06-03 00:15:45 +03:00
|
|
|
struct sja1105_l2_lookup_entry l2_lookup = {0};
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
bool keep;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
l2_lookup.macaddr = ether_addr_to_u64(addr);
|
|
|
|
l2_lookup.vlanid = vid;
|
|
|
|
l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
|
net: dsa: sja1105: be stateless with FDB entries on SJA1105P/Q/R/S/SJA1110 too
Similar but not quite the same with what was done in commit b11f0a4c0c81
("net: dsa: sja1105: be stateless when installing FDB entries") for
SJA1105E/T, it is desirable to drop the priv->vlan_aware check and
simply go ahead and install FDB entries in the VLAN that was given by
the bridge.
As opposed to SJA1105E/T, in SJA1105P/Q/R/S and SJA1110, the FDB is a
maskable TCAM, and we are installing VLAN-unaware FDB entries with the
VLAN ID masked off. However, such FDB entries might completely obscure
VLAN-aware entries where the VLAN ID is included in the search mask,
because the switch looks up the FDB from left to right and picks the
first entry which results in a masked match. So it depends on whether
the bridge installs first the VLAN-unaware or the VLAN-aware FDB entries.
Anyway, if we had a VLAN-unaware FDB entry towards one set of DESTPORTS
and a VLAN-aware one towards other set of DESTPORTS, the result is that
the packets in VLAN-aware mode will be forwarded towards the DESTPORTS
specified by the VLAN-unaware entry.
To solve this, simply do not use the masked matching ability of the FDB
for VLAN ID, and always match precisely on it. In VLAN-unaware mode, we
configure the switch for shared VLAN learning, so the VLAN ID will be
ignored anyway during lookup, so it is redundant to mask it off in the
TCAM.
This patch conflicts with net-next commit 0fac6aa098ed ("net: dsa: sja1105:
delete the best_effort_vlan_filtering mode") which changed this line:
if (priv->vlan_state != SJA1105_VLAN_UNAWARE) {
into:
if (priv->vlan_aware) {
When merging with net-next, the lines added by this patch should take
precedence in the conflict resolution (i.e. the "if" condition should be
deleted in both cases).
Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-30 20:18:14 +03:00
|
|
|
l2_lookup.mask_vlanid = VLAN_VID_MASK;
|
2019-06-03 00:15:45 +03:00
|
|
|
l2_lookup.destports = BIT(port);
|
|
|
|
|
|
|
|
rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
SJA1105_SEARCH, &l2_lookup);
|
|
|
|
if (rc < 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
l2_lookup.destports &= ~BIT(port);
|
|
|
|
|
|
|
|
/* Decide whether we remove just this port from the FDB entry,
|
|
|
|
* or if we remove it completely.
|
|
|
|
*/
|
|
|
|
if (l2_lookup.destports)
|
|
|
|
keep = true;
|
|
|
|
else
|
|
|
|
keep = false;
|
|
|
|
|
net: dsa: sja1105: Back up static FDB entries in kernel memory
After commit 8456721dd4ec ("net: dsa: sja1105: Add support for
configuring address ageing time"), we started to reset the switch rather
often (each time the bridge core changes the ageing time on a switch
port).
The unfortunate reality is that SJA1105 doesn't have any {cold, warm,
whatever} reset mode in which it accepts a new configuration stream
without flushing the FDB. Instead, in its world, the FDB *is* an
optional part of the static configuration.
So we play its game, and do what we also do for VLANs: for each 'bridge
fdb' command, we add the FDB entry through the dynamic interface, and we
append the in-kernel static config memory with info that we're going to
use later, when the next reset command is going to be issued.
The result is that 'bridge fdb' commands are now persistent (dynamically
learned entries are lost, but that's ok).
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-26 02:39:38 +03:00
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
l2_lookup.index, &l2_lookup, keep);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
return sja1105_static_fdb_change(priv, port, &l2_lookup, keep);
|
2019-06-03 00:11:57 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_fdb_add(struct dsa_switch *ds, int port,
|
net: dsa: request drivers to perform FDB isolation
For DSA, to encourage drivers to perform FDB isolation simply means to
track which bridge does each FDB and MDB entry belong to. It then
becomes the driver responsibility to use something that makes the FDB
entry from one bridge not match the FDB lookup of ports from other
bridges.
The top-level functions where the bridge is determined are:
- dsa_port_fdb_{add,del}
- dsa_port_host_fdb_{add,del}
- dsa_port_mdb_{add,del}
- dsa_port_host_mdb_{add,del}
aka the pre-crosschip-notifier functions.
Changing the API to pass a reference to a bridge is not superfluous, and
looking at the passed bridge argument is not the same as having the
driver look at dsa_to_port(ds, port)->bridge from the ->port_fdb_add()
method.
DSA installs FDB and MDB entries on shared (CPU and DSA) ports as well,
and those do not have any dp->bridge information to retrieve, because
they are not in any bridge - they are merely the pipes that serve the
user ports that are in one or multiple bridges.
The struct dsa_bridge associated with each FDB/MDB entry is encapsulated
in a larger "struct dsa_db" database. Although only databases associated
to bridges are notified for now, this API will be the starting point for
implementing IFF_UNICAST_FLT in DSA. There, the idea is to install FDB
entries on the CPU port which belong to the corresponding user port's
port database. These are supposed to match only when the port is
standalone.
It is better to introduce the API in its expected final form than to
introduce it for bridges first, then to have to change drivers which may
have made one or more assumptions.
Drivers can use the provided bridge.num, but they can also use a
different numbering scheme that is more convenient.
DSA must perform refcounting on the CPU and DSA ports by also taking
into account the bridge number. So if two bridges request the same local
address, DSA must notify the driver twice, once for each bridge.
In fact, if the driver supports FDB isolation, DSA must perform
refcounting per bridge, but if the driver doesn't, DSA must refcount
host addresses across all bridges, otherwise it would be telling the
driver to delete an FDB entry for a bridge and the driver would delete
it for all bridges. So introduce a bool fdb_isolation in drivers which
would make all bridge databases passed to the cross-chip notifier have
the same number (0). This makes dsa_mac_addr_find() -> dsa_db_equal()
say that all bridge databases are the same database - which is
essentially the legacy behavior.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 11:22:22 +02:00
|
|
|
const unsigned char *addr, u16 vid,
|
|
|
|
struct dsa_db db)
|
2019-06-03 00:11:57 +03:00
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
2023-09-08 16:33:51 +03:00
|
|
|
int rc;
|
2019-06-26 02:39:41 +03:00
|
|
|
|
2022-02-25 11:22:24 +02:00
|
|
|
if (!vid) {
|
|
|
|
switch (db.type) {
|
|
|
|
case DSA_DB_PORT:
|
|
|
|
vid = dsa_tag_8021q_standalone_vid(db.dp);
|
|
|
|
break;
|
|
|
|
case DSA_DB_BRIDGE:
|
|
|
|
vid = dsa_tag_8021q_bridge_vid(db.bridge.num);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-08 16:33:51 +03:00
|
|
|
mutex_lock(&priv->fdb_lock);
|
|
|
|
rc = priv->info->fdb_add_cmd(ds, port, addr, vid);
|
|
|
|
mutex_unlock(&priv->fdb_lock);
|
|
|
|
|
|
|
|
return rc;
|
2019-06-03 00:11:57 +03:00
|
|
|
}
|
|
|
|
|
2023-09-08 16:33:51 +03:00
|
|
|
static int __sja1105_fdb_del(struct dsa_switch *ds, int port,
|
|
|
|
const unsigned char *addr, u16 vid,
|
|
|
|
struct dsa_db db)
|
2019-06-03 00:11:57 +03:00
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
2019-06-26 02:39:41 +03:00
|
|
|
|
2022-02-25 11:22:24 +02:00
|
|
|
if (!vid) {
|
|
|
|
switch (db.type) {
|
|
|
|
case DSA_DB_PORT:
|
|
|
|
vid = dsa_tag_8021q_standalone_vid(db.dp);
|
|
|
|
break;
|
|
|
|
case DSA_DB_BRIDGE:
|
|
|
|
vid = dsa_tag_8021q_bridge_vid(db.bridge.num);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: Fix broken learning with vlan_filtering disabled
When put under a bridge with vlan_filtering 0, the SJA1105 ports will
flood all traffic as if learning was broken. This is because learning
interferes with the rx_vid's configured by dsa_8021q as unique pvid's.
So learning technically still *does* work, it's just that the learnt
entries never get matched due to their unique VLAN ID.
The setting that saves the day is Shared VLAN Learning, which on this
switch family works exactly as desired: VLAN tagging still works
(untagged traffic gets the correct pvid) and FDB entries are still
populated with the correct contents including VID. Also, a frame cannot
violate the forwarding domain restrictions enforced by its classified
VLAN. It is just that the VID is ignored when looking up the FDB for
taking a forwarding decision (selecting the egress port).
This patch activates SVL, and the result is that frames with a learnt
DMAC are no longer flooded in the scenario described above.
Now exactly *because* SVL works as desired, we have to revisit some
earlier patches:
- It is no longer necessary to manipulate the VID of the 'bridge fdb
{add,del}' command when vlan_filtering is off. This is because now,
SVL is enabled for that case, so the actual VID does not matter*.
- It is still desirable to hide dsa_8021q VID's in the FDB dump
callback. But right now the dump callback should no longer hide
duplicates (one per each front panel port's pvid, plus one for the
VLAN that the CPU port is going to tag a TX frame with), because there
shouldn't be any (the switch will match a single FDB entry no matter
its VID anyway).
* Not really... It's no longer necessary to transform a 'bridge fdb add'
into 5 fdb add operations, but the user might still add a fdb entry with
any vid, and all of them would appear as duplicates in 'bridge fdb
show'. So force a 'bridge fdb add' to insert the VID of 0**, so that we
can prune the duplicates at insertion time.
** The VID of 0 is better than 1 because it is always guaranteed to be
in the ports' hardware filter. DSA also avoids putting the VID inside
the netlink response message towards the bridge driver when we return
this particular VID, which makes it suitable for FDB entries learnt
with vlan_filtering off.
Fixes: 227d07a07ef1 ("net: dsa: sja1105: Add support for traffic through standalone ports")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Georg Waibel <georg.waibel@sensor-technik.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-08-05 01:38:44 +03:00
|
|
|
return priv->info->fdb_del_cmd(ds, port, addr, vid);
|
2019-06-03 00:11:57 +03:00
|
|
|
}
|
|
|
|
|
2023-09-08 16:33:51 +03:00
|
|
|
static int sja1105_fdb_del(struct dsa_switch *ds, int port,
|
|
|
|
const unsigned char *addr, u16 vid,
|
|
|
|
struct dsa_db db)
|
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
mutex_lock(&priv->fdb_lock);
|
|
|
|
rc = __sja1105_fdb_del(ds, port, addr, vid, db);
|
|
|
|
mutex_unlock(&priv->fdb_lock);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2019-05-02 23:23:31 +03:00
|
|
|
static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
|
|
|
|
dsa_fdb_dump_cb_t *cb, void *data)
|
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
struct device *dev = ds->dev;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) {
|
|
|
|
struct sja1105_l2_lookup_entry l2_lookup = {0};
|
|
|
|
u8 macaddr[ETH_ALEN];
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
i, &l2_lookup);
|
|
|
|
/* No fdb entry at i, not an issue */
|
2019-06-03 00:11:59 +03:00
|
|
|
if (rc == -ENOENT)
|
2019-05-02 23:23:31 +03:00
|
|
|
continue;
|
|
|
|
if (rc) {
|
|
|
|
dev_err(dev, "Failed to dump FDB: %d\n", rc);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* FDB dump callback is per port. This means we have to
|
|
|
|
* disregard a valid entry if it's not for this port, even if
|
|
|
|
* only to revisit it later. This is inefficient because the
|
|
|
|
* 1024-sized FDB table needs to be traversed 4 times through
|
|
|
|
* SPI during a 'bridge fdb show' command.
|
|
|
|
*/
|
|
|
|
if (!(l2_lookup.destports & BIT(port)))
|
|
|
|
continue;
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
|
2019-05-02 23:23:31 +03:00
|
|
|
u64_to_ether_addr(l2_lookup.macaddr, macaddr);
|
2019-06-03 00:16:01 +03:00
|
|
|
|
2023-09-08 16:33:48 +03:00
|
|
|
/* Hardware FDB is shared for fdb and mdb, "bridge fdb show"
|
|
|
|
* only wants to see unicast
|
|
|
|
*/
|
|
|
|
if (is_multicast_ether_addr(macaddr))
|
|
|
|
continue;
|
|
|
|
|
net: dsa: sja1105: Fix broken learning with vlan_filtering disabled
When put under a bridge with vlan_filtering 0, the SJA1105 ports will
flood all traffic as if learning was broken. This is because learning
interferes with the rx_vid's configured by dsa_8021q as unique pvid's.
So learning technically still *does* work, it's just that the learnt
entries never get matched due to their unique VLAN ID.
The setting that saves the day is Shared VLAN Learning, which on this
switch family works exactly as desired: VLAN tagging still works
(untagged traffic gets the correct pvid) and FDB entries are still
populated with the correct contents including VID. Also, a frame cannot
violate the forwarding domain restrictions enforced by its classified
VLAN. It is just that the VID is ignored when looking up the FDB for
taking a forwarding decision (selecting the egress port).
This patch activates SVL, and the result is that frames with a learnt
DMAC are no longer flooded in the scenario described above.
Now exactly *because* SVL works as desired, we have to revisit some
earlier patches:
- It is no longer necessary to manipulate the VID of the 'bridge fdb
{add,del}' command when vlan_filtering is off. This is because now,
SVL is enabled for that case, so the actual VID does not matter*.
- It is still desirable to hide dsa_8021q VID's in the FDB dump
callback. But right now the dump callback should no longer hide
duplicates (one per each front panel port's pvid, plus one for the
VLAN that the CPU port is going to tag a TX frame with), because there
shouldn't be any (the switch will match a single FDB entry no matter
its VID anyway).
* Not really... It's no longer necessary to transform a 'bridge fdb add'
into 5 fdb add operations, but the user might still add a fdb entry with
any vid, and all of them would appear as duplicates in 'bridge fdb
show'. So force a 'bridge fdb add' to insert the VID of 0**, so that we
can prune the duplicates at insertion time.
** The VID of 0 is better than 1 because it is always guaranteed to be
in the ports' hardware filter. DSA also avoids putting the VID inside
the netlink response message towards the bridge driver when we return
this particular VID, which makes it suitable for FDB entries learnt
with vlan_filtering off.
Fixes: 227d07a07ef1 ("net: dsa: sja1105: Add support for traffic through standalone ports")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Georg Waibel <georg.waibel@sensor-technik.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-08-05 01:38:44 +03:00
|
|
|
/* We need to hide the dsa_8021q VLANs from the user. */
|
2022-02-25 11:22:24 +02:00
|
|
|
if (vid_is_dsa_8021q(l2_lookup.vlanid))
|
net: dsa: sja1105: Fix broken learning with vlan_filtering disabled
When put under a bridge with vlan_filtering 0, the SJA1105 ports will
flood all traffic as if learning was broken. This is because learning
interferes with the rx_vid's configured by dsa_8021q as unique pvid's.
So learning technically still *does* work, it's just that the learnt
entries never get matched due to their unique VLAN ID.
The setting that saves the day is Shared VLAN Learning, which on this
switch family works exactly as desired: VLAN tagging still works
(untagged traffic gets the correct pvid) and FDB entries are still
populated with the correct contents including VID. Also, a frame cannot
violate the forwarding domain restrictions enforced by its classified
VLAN. It is just that the VID is ignored when looking up the FDB for
taking a forwarding decision (selecting the egress port).
This patch activates SVL, and the result is that frames with a learnt
DMAC are no longer flooded in the scenario described above.
Now exactly *because* SVL works as desired, we have to revisit some
earlier patches:
- It is no longer necessary to manipulate the VID of the 'bridge fdb
{add,del}' command when vlan_filtering is off. This is because now,
SVL is enabled for that case, so the actual VID does not matter*.
- It is still desirable to hide dsa_8021q VID's in the FDB dump
callback. But right now the dump callback should no longer hide
duplicates (one per each front panel port's pvid, plus one for the
VLAN that the CPU port is going to tag a TX frame with), because there
shouldn't be any (the switch will match a single FDB entry no matter
its VID anyway).
* Not really... It's no longer necessary to transform a 'bridge fdb add'
into 5 fdb add operations, but the user might still add a fdb entry with
any vid, and all of them would appear as duplicates in 'bridge fdb
show'. So force a 'bridge fdb add' to insert the VID of 0**, so that we
can prune the duplicates at insertion time.
** The VID of 0 is better than 1 because it is always guaranteed to be
in the ports' hardware filter. DSA also avoids putting the VID inside
the netlink response message towards the bridge driver when we return
this particular VID, which makes it suitable for FDB entries learnt
with vlan_filtering off.
Fixes: 227d07a07ef1 ("net: dsa: sja1105: Add support for traffic through standalone ports")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Georg Waibel <georg.waibel@sensor-technik.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-08-05 01:38:44 +03:00
|
|
|
l2_lookup.vlanid = 0;
|
2021-08-10 14:19:56 +03:00
|
|
|
rc = cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
2019-05-02 23:23:31 +03:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-08-08 17:35:27 +03:00
|
|
|
static void sja1105_fast_age(struct dsa_switch *ds, int port)
|
|
|
|
{
|
net: dsa: request drivers to perform FDB isolation
For DSA, to encourage drivers to perform FDB isolation simply means to
track which bridge does each FDB and MDB entry belong to. It then
becomes the driver responsibility to use something that makes the FDB
entry from one bridge not match the FDB lookup of ports from other
bridges.
The top-level functions where the bridge is determined are:
- dsa_port_fdb_{add,del}
- dsa_port_host_fdb_{add,del}
- dsa_port_mdb_{add,del}
- dsa_port_host_mdb_{add,del}
aka the pre-crosschip-notifier functions.
Changing the API to pass a reference to a bridge is not superfluous, and
looking at the passed bridge argument is not the same as having the
driver look at dsa_to_port(ds, port)->bridge from the ->port_fdb_add()
method.
DSA installs FDB and MDB entries on shared (CPU and DSA) ports as well,
and those do not have any dp->bridge information to retrieve, because
they are not in any bridge - they are merely the pipes that serve the
user ports that are in one or multiple bridges.
The struct dsa_bridge associated with each FDB/MDB entry is encapsulated
in a larger "struct dsa_db" database. Although only databases associated
to bridges are notified for now, this API will be the starting point for
implementing IFF_UNICAST_FLT in DSA. There, the idea is to install FDB
entries on the CPU port which belong to the corresponding user port's
port database. These are supposed to match only when the port is
standalone.
It is better to introduce the API in its expected final form than to
introduce it for bridges first, then to have to change drivers which may
have made one or more assumptions.
Drivers can use the provided bridge.num, but they can also use a
different numbering scheme that is more convenient.
DSA must perform refcounting on the CPU and DSA ports by also taking
into account the bridge number. So if two bridges request the same local
address, DSA must notify the driver twice, once for each bridge.
In fact, if the driver supports FDB isolation, DSA must perform
refcounting per bridge, but if the driver doesn't, DSA must refcount
host addresses across all bridges, otherwise it would be telling the
driver to delete an FDB entry for a bridge and the driver would delete
it for all bridges. So introduce a bool fdb_isolation in drivers which
would make all bridge databases passed to the cross-chip notifier have
the same number (0). This makes dsa_mac_addr_find() -> dsa_db_equal()
say that all bridge databases are the same database - which is
essentially the legacy behavior.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 11:22:22 +02:00
|
|
|
struct dsa_port *dp = dsa_to_port(ds, port);
|
2021-08-08 17:35:27 +03:00
|
|
|
struct sja1105_private *priv = ds->priv;
|
net: dsa: request drivers to perform FDB isolation
For DSA, to encourage drivers to perform FDB isolation simply means to
track which bridge does each FDB and MDB entry belong to. It then
becomes the driver responsibility to use something that makes the FDB
entry from one bridge not match the FDB lookup of ports from other
bridges.
The top-level functions where the bridge is determined are:
- dsa_port_fdb_{add,del}
- dsa_port_host_fdb_{add,del}
- dsa_port_mdb_{add,del}
- dsa_port_host_mdb_{add,del}
aka the pre-crosschip-notifier functions.
Changing the API to pass a reference to a bridge is not superfluous, and
looking at the passed bridge argument is not the same as having the
driver look at dsa_to_port(ds, port)->bridge from the ->port_fdb_add()
method.
DSA installs FDB and MDB entries on shared (CPU and DSA) ports as well,
and those do not have any dp->bridge information to retrieve, because
they are not in any bridge - they are merely the pipes that serve the
user ports that are in one or multiple bridges.
The struct dsa_bridge associated with each FDB/MDB entry is encapsulated
in a larger "struct dsa_db" database. Although only databases associated
to bridges are notified for now, this API will be the starting point for
implementing IFF_UNICAST_FLT in DSA. There, the idea is to install FDB
entries on the CPU port which belong to the corresponding user port's
port database. These are supposed to match only when the port is
standalone.
It is better to introduce the API in its expected final form than to
introduce it for bridges first, then to have to change drivers which may
have made one or more assumptions.
Drivers can use the provided bridge.num, but they can also use a
different numbering scheme that is more convenient.
DSA must perform refcounting on the CPU and DSA ports by also taking
into account the bridge number. So if two bridges request the same local
address, DSA must notify the driver twice, once for each bridge.
In fact, if the driver supports FDB isolation, DSA must perform
refcounting per bridge, but if the driver doesn't, DSA must refcount
host addresses across all bridges, otherwise it would be telling the
driver to delete an FDB entry for a bridge and the driver would delete
it for all bridges. So introduce a bool fdb_isolation in drivers which
would make all bridge databases passed to the cross-chip notifier have
the same number (0). This makes dsa_mac_addr_find() -> dsa_db_equal()
say that all bridge databases are the same database - which is
essentially the legacy behavior.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 11:22:22 +02:00
|
|
|
struct dsa_db db = {
|
|
|
|
.type = DSA_DB_BRIDGE,
|
|
|
|
.bridge = {
|
|
|
|
.dev = dsa_port_bridge_dev_get(dp),
|
|
|
|
.num = dsa_port_bridge_num_get(dp),
|
|
|
|
},
|
|
|
|
};
|
2021-08-08 17:35:27 +03:00
|
|
|
int i;
|
|
|
|
|
2023-09-08 16:33:51 +03:00
|
|
|
mutex_lock(&priv->fdb_lock);
|
|
|
|
|
2021-08-08 17:35:27 +03:00
|
|
|
for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) {
|
|
|
|
struct sja1105_l2_lookup_entry l2_lookup = {0};
|
|
|
|
u8 macaddr[ETH_ALEN];
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
i, &l2_lookup);
|
|
|
|
/* No fdb entry at i, not an issue */
|
|
|
|
if (rc == -ENOENT)
|
|
|
|
continue;
|
|
|
|
if (rc) {
|
|
|
|
dev_err(ds->dev, "Failed to read FDB: %pe\n",
|
|
|
|
ERR_PTR(rc));
|
2023-09-08 16:33:51 +03:00
|
|
|
break;
|
2021-08-08 17:35:27 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!(l2_lookup.destports & BIT(port)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Don't delete static FDB entries */
|
|
|
|
if (l2_lookup.lockeds)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
u64_to_ether_addr(l2_lookup.macaddr, macaddr);
|
|
|
|
|
2023-09-08 16:33:51 +03:00
|
|
|
rc = __sja1105_fdb_del(ds, port, macaddr, l2_lookup.vlanid, db);
|
2021-08-08 17:35:27 +03:00
|
|
|
if (rc) {
|
|
|
|
dev_err(ds->dev,
|
|
|
|
"Failed to delete FDB entry %pM vid %lld: %pe\n",
|
|
|
|
macaddr, l2_lookup.vlanid, ERR_PTR(rc));
|
2023-09-08 16:33:51 +03:00
|
|
|
break;
|
2021-08-08 17:35:27 +03:00
|
|
|
}
|
|
|
|
}
|
2023-09-08 16:33:51 +03:00
|
|
|
|
|
|
|
mutex_unlock(&priv->fdb_lock);
|
2021-08-08 17:35:27 +03:00
|
|
|
}
|
|
|
|
|
2021-01-09 02:01:52 +02:00
|
|
|
static int sja1105_mdb_add(struct dsa_switch *ds, int port,
|
net: dsa: request drivers to perform FDB isolation
For DSA, to encourage drivers to perform FDB isolation simply means to
track which bridge does each FDB and MDB entry belong to. It then
becomes the driver responsibility to use something that makes the FDB
entry from one bridge not match the FDB lookup of ports from other
bridges.
The top-level functions where the bridge is determined are:
- dsa_port_fdb_{add,del}
- dsa_port_host_fdb_{add,del}
- dsa_port_mdb_{add,del}
- dsa_port_host_mdb_{add,del}
aka the pre-crosschip-notifier functions.
Changing the API to pass a reference to a bridge is not superfluous, and
looking at the passed bridge argument is not the same as having the
driver look at dsa_to_port(ds, port)->bridge from the ->port_fdb_add()
method.
DSA installs FDB and MDB entries on shared (CPU and DSA) ports as well,
and those do not have any dp->bridge information to retrieve, because
they are not in any bridge - they are merely the pipes that serve the
user ports that are in one or multiple bridges.
The struct dsa_bridge associated with each FDB/MDB entry is encapsulated
in a larger "struct dsa_db" database. Although only databases associated
to bridges are notified for now, this API will be the starting point for
implementing IFF_UNICAST_FLT in DSA. There, the idea is to install FDB
entries on the CPU port which belong to the corresponding user port's
port database. These are supposed to match only when the port is
standalone.
It is better to introduce the API in its expected final form than to
introduce it for bridges first, then to have to change drivers which may
have made one or more assumptions.
Drivers can use the provided bridge.num, but they can also use a
different numbering scheme that is more convenient.
DSA must perform refcounting on the CPU and DSA ports by also taking
into account the bridge number. So if two bridges request the same local
address, DSA must notify the driver twice, once for each bridge.
In fact, if the driver supports FDB isolation, DSA must perform
refcounting per bridge, but if the driver doesn't, DSA must refcount
host addresses across all bridges, otherwise it would be telling the
driver to delete an FDB entry for a bridge and the driver would delete
it for all bridges. So introduce a bool fdb_isolation in drivers which
would make all bridge databases passed to the cross-chip notifier have
the same number (0). This makes dsa_mac_addr_find() -> dsa_db_equal()
say that all bridge databases are the same database - which is
essentially the legacy behavior.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 11:22:22 +02:00
|
|
|
const struct switchdev_obj_port_mdb *mdb,
|
|
|
|
struct dsa_db db)
|
2019-05-02 23:23:31 +03:00
|
|
|
{
|
net: dsa: request drivers to perform FDB isolation
For DSA, to encourage drivers to perform FDB isolation simply means to
track which bridge does each FDB and MDB entry belong to. It then
becomes the driver responsibility to use something that makes the FDB
entry from one bridge not match the FDB lookup of ports from other
bridges.
The top-level functions where the bridge is determined are:
- dsa_port_fdb_{add,del}
- dsa_port_host_fdb_{add,del}
- dsa_port_mdb_{add,del}
- dsa_port_host_mdb_{add,del}
aka the pre-crosschip-notifier functions.
Changing the API to pass a reference to a bridge is not superfluous, and
looking at the passed bridge argument is not the same as having the
driver look at dsa_to_port(ds, port)->bridge from the ->port_fdb_add()
method.
DSA installs FDB and MDB entries on shared (CPU and DSA) ports as well,
and those do not have any dp->bridge information to retrieve, because
they are not in any bridge - they are merely the pipes that serve the
user ports that are in one or multiple bridges.
The struct dsa_bridge associated with each FDB/MDB entry is encapsulated
in a larger "struct dsa_db" database. Although only databases associated
to bridges are notified for now, this API will be the starting point for
implementing IFF_UNICAST_FLT in DSA. There, the idea is to install FDB
entries on the CPU port which belong to the corresponding user port's
port database. These are supposed to match only when the port is
standalone.
It is better to introduce the API in its expected final form than to
introduce it for bridges first, then to have to change drivers which may
have made one or more assumptions.
Drivers can use the provided bridge.num, but they can also use a
different numbering scheme that is more convenient.
DSA must perform refcounting on the CPU and DSA ports by also taking
into account the bridge number. So if two bridges request the same local
address, DSA must notify the driver twice, once for each bridge.
In fact, if the driver supports FDB isolation, DSA must perform
refcounting per bridge, but if the driver doesn't, DSA must refcount
host addresses across all bridges, otherwise it would be telling the
driver to delete an FDB entry for a bridge and the driver would delete
it for all bridges. So introduce a bool fdb_isolation in drivers which
would make all bridge databases passed to the cross-chip notifier have
the same number (0). This makes dsa_mac_addr_find() -> dsa_db_equal()
say that all bridge databases are the same database - which is
essentially the legacy behavior.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 11:22:22 +02:00
|
|
|
return sja1105_fdb_add(ds, port, mdb->addr, mdb->vid, db);
|
2019-05-02 23:23:31 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_mdb_del(struct dsa_switch *ds, int port,
|
net: dsa: request drivers to perform FDB isolation
For DSA, to encourage drivers to perform FDB isolation simply means to
track which bridge does each FDB and MDB entry belong to. It then
becomes the driver responsibility to use something that makes the FDB
entry from one bridge not match the FDB lookup of ports from other
bridges.
The top-level functions where the bridge is determined are:
- dsa_port_fdb_{add,del}
- dsa_port_host_fdb_{add,del}
- dsa_port_mdb_{add,del}
- dsa_port_host_mdb_{add,del}
aka the pre-crosschip-notifier functions.
Changing the API to pass a reference to a bridge is not superfluous, and
looking at the passed bridge argument is not the same as having the
driver look at dsa_to_port(ds, port)->bridge from the ->port_fdb_add()
method.
DSA installs FDB and MDB entries on shared (CPU and DSA) ports as well,
and those do not have any dp->bridge information to retrieve, because
they are not in any bridge - they are merely the pipes that serve the
user ports that are in one or multiple bridges.
The struct dsa_bridge associated with each FDB/MDB entry is encapsulated
in a larger "struct dsa_db" database. Although only databases associated
to bridges are notified for now, this API will be the starting point for
implementing IFF_UNICAST_FLT in DSA. There, the idea is to install FDB
entries on the CPU port which belong to the corresponding user port's
port database. These are supposed to match only when the port is
standalone.
It is better to introduce the API in its expected final form than to
introduce it for bridges first, then to have to change drivers which may
have made one or more assumptions.
Drivers can use the provided bridge.num, but they can also use a
different numbering scheme that is more convenient.
DSA must perform refcounting on the CPU and DSA ports by also taking
into account the bridge number. So if two bridges request the same local
address, DSA must notify the driver twice, once for each bridge.
In fact, if the driver supports FDB isolation, DSA must perform
refcounting per bridge, but if the driver doesn't, DSA must refcount
host addresses across all bridges, otherwise it would be telling the
driver to delete an FDB entry for a bridge and the driver would delete
it for all bridges. So introduce a bool fdb_isolation in drivers which
would make all bridge databases passed to the cross-chip notifier have
the same number (0). This makes dsa_mac_addr_find() -> dsa_db_equal()
say that all bridge databases are the same database - which is
essentially the legacy behavior.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 11:22:22 +02:00
|
|
|
const struct switchdev_obj_port_mdb *mdb,
|
|
|
|
struct dsa_db db)
|
2019-05-02 23:23:31 +03:00
|
|
|
{
|
net: dsa: request drivers to perform FDB isolation
For DSA, to encourage drivers to perform FDB isolation simply means to
track which bridge does each FDB and MDB entry belong to. It then
becomes the driver responsibility to use something that makes the FDB
entry from one bridge not match the FDB lookup of ports from other
bridges.
The top-level functions where the bridge is determined are:
- dsa_port_fdb_{add,del}
- dsa_port_host_fdb_{add,del}
- dsa_port_mdb_{add,del}
- dsa_port_host_mdb_{add,del}
aka the pre-crosschip-notifier functions.
Changing the API to pass a reference to a bridge is not superfluous, and
looking at the passed bridge argument is not the same as having the
driver look at dsa_to_port(ds, port)->bridge from the ->port_fdb_add()
method.
DSA installs FDB and MDB entries on shared (CPU and DSA) ports as well,
and those do not have any dp->bridge information to retrieve, because
they are not in any bridge - they are merely the pipes that serve the
user ports that are in one or multiple bridges.
The struct dsa_bridge associated with each FDB/MDB entry is encapsulated
in a larger "struct dsa_db" database. Although only databases associated
to bridges are notified for now, this API will be the starting point for
implementing IFF_UNICAST_FLT in DSA. There, the idea is to install FDB
entries on the CPU port which belong to the corresponding user port's
port database. These are supposed to match only when the port is
standalone.
It is better to introduce the API in its expected final form than to
introduce it for bridges first, then to have to change drivers which may
have made one or more assumptions.
Drivers can use the provided bridge.num, but they can also use a
different numbering scheme that is more convenient.
DSA must perform refcounting on the CPU and DSA ports by also taking
into account the bridge number. So if two bridges request the same local
address, DSA must notify the driver twice, once for each bridge.
In fact, if the driver supports FDB isolation, DSA must perform
refcounting per bridge, but if the driver doesn't, DSA must refcount
host addresses across all bridges, otherwise it would be telling the
driver to delete an FDB entry for a bridge and the driver would delete
it for all bridges. So introduce a bool fdb_isolation in drivers which
would make all bridge databases passed to the cross-chip notifier have
the same number (0). This makes dsa_mac_addr_find() -> dsa_db_equal()
say that all bridge databases are the same database - which is
essentially the legacy behavior.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 11:22:22 +02:00
|
|
|
return sja1105_fdb_del(ds, port, mdb->addr, mdb->vid, db);
|
2019-05-02 23:23:31 +03:00
|
|
|
}
|
|
|
|
|
2021-02-16 13:41:19 +02:00
|
|
|
/* Common function for unicast and broadcast flood configuration.
|
|
|
|
* Flooding is configured between each {ingress, egress} port pair, and since
|
|
|
|
* the bridge's semantics are those of "egress flooding", it means we must
|
|
|
|
* enable flooding towards this port from all ingress ports that are in the
|
|
|
|
* same forwarding domain.
|
|
|
|
*/
|
|
|
|
static int sja1105_manage_flood_domains(struct sja1105_private *priv)
|
|
|
|
{
|
|
|
|
struct sja1105_l2_forwarding_entry *l2_fwd;
|
|
|
|
struct dsa_switch *ds = priv->ds;
|
|
|
|
int from, to, rc;
|
|
|
|
|
|
|
|
l2_fwd = priv->static_config.tables[BLK_IDX_L2_FORWARDING].entries;
|
|
|
|
|
|
|
|
for (from = 0; from < ds->num_ports; from++) {
|
|
|
|
u64 fl_domain = 0, bc_domain = 0;
|
|
|
|
|
|
|
|
for (to = 0; to < priv->ds->num_ports; to++) {
|
|
|
|
if (!sja1105_can_forward(l2_fwd, from, to))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (priv->ucast_egress_floods & BIT(to))
|
|
|
|
fl_domain |= BIT(to);
|
|
|
|
if (priv->bcast_egress_floods & BIT(to))
|
|
|
|
bc_domain |= BIT(to);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Nothing changed, nothing to do */
|
|
|
|
if (l2_fwd[from].fl_domain == fl_domain &&
|
|
|
|
l2_fwd[from].bc_domain == bc_domain)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
l2_fwd[from].fl_domain = fl_domain;
|
|
|
|
l2_fwd[from].bc_domain = bc_domain;
|
|
|
|
|
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_FORWARDING,
|
|
|
|
from, &l2_fwd[from], true);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
static int sja1105_bridge_member(struct dsa_switch *ds, int port,
|
net: dsa: keep the bridge_dev and bridge_num as part of the same structure
The main desire behind this is to provide coherent bridge information to
the fast path without locking.
For example, right now we set dp->bridge_dev and dp->bridge_num from
separate code paths, it is theoretically possible for a packet
transmission to read these two port properties consecutively and find a
bridge number which does not correspond with the bridge device.
Another desire is to start passing more complex bridge information to
dsa_switch_ops functions. For example, with FDB isolation, it is
expected that drivers will need to be passed the bridge which requested
an FDB/MDB entry to be offloaded, and along with that bridge_dev, the
associated bridge_num should be passed too, in case the driver might
want to implement an isolation scheme based on that number.
We already pass the {bridge_dev, bridge_num} pair to the TX forwarding
offload switch API, however we'd like to remove that and squash it into
the basic bridge join/leave API. So that means we need to pass this
pair to the bridge join/leave API.
During dsa_port_bridge_leave, first we unset dp->bridge_dev, then we
call the driver's .port_bridge_leave with what used to be our
dp->bridge_dev, but provided as an argument.
When bridge_dev and bridge_num get folded into a single structure, we
need to preserve this behavior in dsa_port_bridge_leave: we need a copy
of what used to be in dp->bridge.
Switch drivers check bridge membership by comparing dp->bridge_dev with
the provided bridge_dev, but now, if we provide the struct dsa_bridge as
a pointer, they cannot keep comparing dp->bridge to the provided
pointer, since this only points to an on-stack copy. To make this
obvious and prevent driver writers from forgetting and doing stupid
things, in this new API, the struct dsa_bridge is provided as a full
structure (not very large, contains an int and a pointer) instead of a
pointer. An explicit comparison function needs to be used to determine
bridge membership: dsa_port_offloads_bridge().
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-06 18:57:56 +02:00
|
|
|
struct dsa_bridge bridge, bool member)
|
2019-05-02 23:23:30 +03:00
|
|
|
{
|
|
|
|
struct sja1105_l2_forwarding_entry *l2_fwd;
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
int i, rc;
|
|
|
|
|
|
|
|
l2_fwd = priv->static_config.tables[BLK_IDX_L2_FORWARDING].entries;
|
|
|
|
|
2021-05-24 16:14:13 +03:00
|
|
|
for (i = 0; i < ds->num_ports; i++) {
|
2019-05-02 23:23:30 +03:00
|
|
|
/* Add this port to the forwarding matrix of the
|
|
|
|
* other ports in the same bridge, and viceversa.
|
|
|
|
*/
|
|
|
|
if (!dsa_is_user_port(ds, i))
|
|
|
|
continue;
|
|
|
|
/* For the ports already under the bridge, only one thing needs
|
|
|
|
* to be done, and that is to add this port to their
|
|
|
|
* reachability domain. So we can perform the SPI write for
|
|
|
|
* them immediately. However, for this port itself (the one
|
|
|
|
* that is new to the bridge), we need to add all other ports
|
|
|
|
* to its reachability domain. So we do that incrementally in
|
|
|
|
* this loop, and perform the SPI write only at the end, once
|
|
|
|
* the domain contains all other bridge ports.
|
|
|
|
*/
|
|
|
|
if (i == port)
|
|
|
|
continue;
|
net: dsa: keep the bridge_dev and bridge_num as part of the same structure
The main desire behind this is to provide coherent bridge information to
the fast path without locking.
For example, right now we set dp->bridge_dev and dp->bridge_num from
separate code paths, it is theoretically possible for a packet
transmission to read these two port properties consecutively and find a
bridge number which does not correspond with the bridge device.
Another desire is to start passing more complex bridge information to
dsa_switch_ops functions. For example, with FDB isolation, it is
expected that drivers will need to be passed the bridge which requested
an FDB/MDB entry to be offloaded, and along with that bridge_dev, the
associated bridge_num should be passed too, in case the driver might
want to implement an isolation scheme based on that number.
We already pass the {bridge_dev, bridge_num} pair to the TX forwarding
offload switch API, however we'd like to remove that and squash it into
the basic bridge join/leave API. So that means we need to pass this
pair to the bridge join/leave API.
During dsa_port_bridge_leave, first we unset dp->bridge_dev, then we
call the driver's .port_bridge_leave with what used to be our
dp->bridge_dev, but provided as an argument.
When bridge_dev and bridge_num get folded into a single structure, we
need to preserve this behavior in dsa_port_bridge_leave: we need a copy
of what used to be in dp->bridge.
Switch drivers check bridge membership by comparing dp->bridge_dev with
the provided bridge_dev, but now, if we provide the struct dsa_bridge as
a pointer, they cannot keep comparing dp->bridge to the provided
pointer, since this only points to an on-stack copy. To make this
obvious and prevent driver writers from forgetting and doing stupid
things, in this new API, the struct dsa_bridge is provided as a full
structure (not very large, contains an int and a pointer) instead of a
pointer. An explicit comparison function needs to be used to determine
bridge membership: dsa_port_offloads_bridge().
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-06 18:57:56 +02:00
|
|
|
if (!dsa_port_offloads_bridge(dsa_to_port(ds, i), &bridge))
|
2019-05-02 23:23:30 +03:00
|
|
|
continue;
|
|
|
|
sja1105_port_allow_traffic(l2_fwd, i, port, member);
|
|
|
|
sja1105_port_allow_traffic(l2_fwd, port, i, member);
|
|
|
|
|
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_FORWARDING,
|
|
|
|
i, &l2_fwd[i], true);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2021-02-16 13:41:19 +02:00
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_FORWARDING,
|
|
|
|
port, &l2_fwd[port], true);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
2021-07-29 00:54:27 +03:00
|
|
|
rc = sja1105_commit_pvid(ds, port);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
2021-02-16 13:41:19 +02:00
|
|
|
return sja1105_manage_flood_domains(priv);
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
|
2019-05-05 13:19:28 +03:00
|
|
|
static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port,
|
|
|
|
u8 state)
|
|
|
|
{
|
2021-08-08 17:35:26 +03:00
|
|
|
struct dsa_port *dp = dsa_to_port(ds, port);
|
2019-05-05 13:19:28 +03:00
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
struct sja1105_mac_config_entry *mac;
|
|
|
|
|
|
|
|
mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
|
|
|
|
|
|
|
|
switch (state) {
|
|
|
|
case BR_STATE_DISABLED:
|
|
|
|
case BR_STATE_BLOCKING:
|
|
|
|
/* From UM10944 description of DRPDTAG (why put this there?):
|
|
|
|
* "Management traffic flows to the port regardless of the state
|
|
|
|
* of the INGRESS flag". So BPDUs are still be allowed to pass.
|
|
|
|
* At the moment no difference between DISABLED and BLOCKING.
|
|
|
|
*/
|
|
|
|
mac[port].ingress = false;
|
|
|
|
mac[port].egress = false;
|
|
|
|
mac[port].dyn_learn = false;
|
|
|
|
break;
|
|
|
|
case BR_STATE_LISTENING:
|
|
|
|
mac[port].ingress = true;
|
|
|
|
mac[port].egress = false;
|
|
|
|
mac[port].dyn_learn = false;
|
|
|
|
break;
|
|
|
|
case BR_STATE_LEARNING:
|
|
|
|
mac[port].ingress = true;
|
|
|
|
mac[port].egress = false;
|
2021-08-08 17:35:26 +03:00
|
|
|
mac[port].dyn_learn = dp->learning;
|
2019-05-05 13:19:28 +03:00
|
|
|
break;
|
|
|
|
case BR_STATE_FORWARDING:
|
|
|
|
mac[port].ingress = true;
|
|
|
|
mac[port].egress = true;
|
2021-08-08 17:35:26 +03:00
|
|
|
mac[port].dyn_learn = dp->learning;
|
2019-05-05 13:19:28 +03:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
dev_err(ds->dev, "invalid STP state: %d\n", state);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
|
|
|
|
&mac[port], true);
|
|
|
|
}
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
static int sja1105_bridge_join(struct dsa_switch *ds, int port,
|
2021-12-06 18:57:57 +02:00
|
|
|
struct dsa_bridge bridge,
|
2022-02-25 11:22:23 +02:00
|
|
|
bool *tx_fwd_offload,
|
|
|
|
struct netlink_ext_ack *extack)
|
2019-05-02 23:23:30 +03:00
|
|
|
{
|
2021-12-06 18:57:58 +02:00
|
|
|
int rc;
|
|
|
|
|
|
|
|
rc = sja1105_bridge_member(ds, port, bridge, true);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
net: dsa: tag_8021q: replace the SVL bridging with VLAN-unaware IVL bridging
For VLAN-unaware bridging, tag_8021q uses something perhaps a bit too
tied with the sja1105 switch: each port uses the same pvid which is also
used for standalone operation (a unique one from which the source port
and device ID can be retrieved when packets from that port are forwarded
to the CPU). Since each port has a unique pvid when performing
autonomous forwarding, the switch must be configured for Shared VLAN
Learning (SVL) such that the VLAN ID itself is ignored when performing
FDB lookups. Without SVL, packets would always be flooded, since FDB
lookup in the source port's VLAN would never find any entry.
First of all, to make tag_8021q more palatable to switches which might
not support Shared VLAN Learning, let's just use a common VLAN for all
ports that are under the same bridge.
Secondly, using Shared VLAN Learning means that FDB isolation can never
be enforced. But if all ports under the same VLAN-unaware bridge share
the same VLAN ID, it can.
The disadvantage is that the CPU port can no longer perform precise
source port identification for these packets. But at least we have a
mechanism which has proven to be adequate for that situation: imprecise
RX (dsa_find_designated_bridge_port_by_vid), which is what we use for
termination on VLAN-aware bridges.
The VLAN ID that VLAN-unaware bridges will use with tag_8021q is the
same one as we were previously using for imprecise TX (bridge TX
forwarding offload). It is already allocated, it is just a matter of
using it.
Note that because now all ports under the same bridge share the same
VLAN, the complexity of performing a tag_8021q bridge join decreases
dramatically. We no longer have to install the RX VLAN of a newly
joining port into the port membership of the existing bridge ports.
The newly joining port just becomes a member of the VLAN corresponding
to that bridge, and the other ports are already members of it from when
they joined the bridge themselves. So forwarding works properly.
This means that we can unhook dsa_tag_8021q_bridge_{join,leave} from the
cross-chip notifier level dsa_switch_bridge_{join,leave}. We can put
these calls directly into the sja1105 driver.
With this new mode of operation, a port controlled by tag_8021q can have
two pvids whereas before it could only have one. The pvid for standalone
operation is different from the pvid used for VLAN-unaware bridging.
This is done, again, so that FDB isolation can be enforced.
Let tag_8021q manage this by deleting the standalone pvid when a port
joins a bridge, and restoring it when it leaves it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 11:22:16 +02:00
|
|
|
rc = dsa_tag_8021q_bridge_join(ds, port, bridge);
|
2021-12-06 18:57:58 +02:00
|
|
|
if (rc) {
|
|
|
|
sja1105_bridge_member(ds, port, bridge, false);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
*tx_fwd_offload = true;
|
|
|
|
|
|
|
|
return 0;
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void sja1105_bridge_leave(struct dsa_switch *ds, int port,
|
net: dsa: keep the bridge_dev and bridge_num as part of the same structure
The main desire behind this is to provide coherent bridge information to
the fast path without locking.
For example, right now we set dp->bridge_dev and dp->bridge_num from
separate code paths, it is theoretically possible for a packet
transmission to read these two port properties consecutively and find a
bridge number which does not correspond with the bridge device.
Another desire is to start passing more complex bridge information to
dsa_switch_ops functions. For example, with FDB isolation, it is
expected that drivers will need to be passed the bridge which requested
an FDB/MDB entry to be offloaded, and along with that bridge_dev, the
associated bridge_num should be passed too, in case the driver might
want to implement an isolation scheme based on that number.
We already pass the {bridge_dev, bridge_num} pair to the TX forwarding
offload switch API, however we'd like to remove that and squash it into
the basic bridge join/leave API. So that means we need to pass this
pair to the bridge join/leave API.
During dsa_port_bridge_leave, first we unset dp->bridge_dev, then we
call the driver's .port_bridge_leave with what used to be our
dp->bridge_dev, but provided as an argument.
When bridge_dev and bridge_num get folded into a single structure, we
need to preserve this behavior in dsa_port_bridge_leave: we need a copy
of what used to be in dp->bridge.
Switch drivers check bridge membership by comparing dp->bridge_dev with
the provided bridge_dev, but now, if we provide the struct dsa_bridge as
a pointer, they cannot keep comparing dp->bridge to the provided
pointer, since this only points to an on-stack copy. To make this
obvious and prevent driver writers from forgetting and doing stupid
things, in this new API, the struct dsa_bridge is provided as a full
structure (not very large, contains an int and a pointer) instead of a
pointer. An explicit comparison function needs to be used to determine
bridge membership: dsa_port_offloads_bridge().
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-06 18:57:56 +02:00
|
|
|
struct dsa_bridge bridge)
|
2019-05-02 23:23:30 +03:00
|
|
|
{
|
net: dsa: tag_8021q: replace the SVL bridging with VLAN-unaware IVL bridging
For VLAN-unaware bridging, tag_8021q uses something perhaps a bit too
tied with the sja1105 switch: each port uses the same pvid which is also
used for standalone operation (a unique one from which the source port
and device ID can be retrieved when packets from that port are forwarded
to the CPU). Since each port has a unique pvid when performing
autonomous forwarding, the switch must be configured for Shared VLAN
Learning (SVL) such that the VLAN ID itself is ignored when performing
FDB lookups. Without SVL, packets would always be flooded, since FDB
lookup in the source port's VLAN would never find any entry.
First of all, to make tag_8021q more palatable to switches which might
not support Shared VLAN Learning, let's just use a common VLAN for all
ports that are under the same bridge.
Secondly, using Shared VLAN Learning means that FDB isolation can never
be enforced. But if all ports under the same VLAN-unaware bridge share
the same VLAN ID, it can.
The disadvantage is that the CPU port can no longer perform precise
source port identification for these packets. But at least we have a
mechanism which has proven to be adequate for that situation: imprecise
RX (dsa_find_designated_bridge_port_by_vid), which is what we use for
termination on VLAN-aware bridges.
The VLAN ID that VLAN-unaware bridges will use with tag_8021q is the
same one as we were previously using for imprecise TX (bridge TX
forwarding offload). It is already allocated, it is just a matter of
using it.
Note that because now all ports under the same bridge share the same
VLAN, the complexity of performing a tag_8021q bridge join decreases
dramatically. We no longer have to install the RX VLAN of a newly
joining port into the port membership of the existing bridge ports.
The newly joining port just becomes a member of the VLAN corresponding
to that bridge, and the other ports are already members of it from when
they joined the bridge themselves. So forwarding works properly.
This means that we can unhook dsa_tag_8021q_bridge_{join,leave} from the
cross-chip notifier level dsa_switch_bridge_{join,leave}. We can put
these calls directly into the sja1105 driver.
With this new mode of operation, a port controlled by tag_8021q can have
two pvids whereas before it could only have one. The pvid for standalone
operation is different from the pvid used for VLAN-unaware bridging.
This is done, again, so that FDB isolation can be enforced.
Let tag_8021q manage this by deleting the standalone pvid when a port
joins a bridge, and restoring it when it leaves it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 11:22:16 +02:00
|
|
|
dsa_tag_8021q_bridge_leave(ds, port, bridge);
|
net: dsa: keep the bridge_dev and bridge_num as part of the same structure
The main desire behind this is to provide coherent bridge information to
the fast path without locking.
For example, right now we set dp->bridge_dev and dp->bridge_num from
separate code paths, it is theoretically possible for a packet
transmission to read these two port properties consecutively and find a
bridge number which does not correspond with the bridge device.
Another desire is to start passing more complex bridge information to
dsa_switch_ops functions. For example, with FDB isolation, it is
expected that drivers will need to be passed the bridge which requested
an FDB/MDB entry to be offloaded, and along with that bridge_dev, the
associated bridge_num should be passed too, in case the driver might
want to implement an isolation scheme based on that number.
We already pass the {bridge_dev, bridge_num} pair to the TX forwarding
offload switch API, however we'd like to remove that and squash it into
the basic bridge join/leave API. So that means we need to pass this
pair to the bridge join/leave API.
During dsa_port_bridge_leave, first we unset dp->bridge_dev, then we
call the driver's .port_bridge_leave with what used to be our
dp->bridge_dev, but provided as an argument.
When bridge_dev and bridge_num get folded into a single structure, we
need to preserve this behavior in dsa_port_bridge_leave: we need a copy
of what used to be in dp->bridge.
Switch drivers check bridge membership by comparing dp->bridge_dev with
the provided bridge_dev, but now, if we provide the struct dsa_bridge as
a pointer, they cannot keep comparing dp->bridge to the provided
pointer, since this only points to an on-stack copy. To make this
obvious and prevent driver writers from forgetting and doing stupid
things, in this new API, the struct dsa_bridge is provided as a full
structure (not very large, contains an int and a pointer) instead of a
pointer. An explicit comparison function needs to be used to determine
bridge membership: dsa_port_offloads_bridge().
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-06 18:57:56 +02:00
|
|
|
sja1105_bridge_member(ds, port, bridge, false);
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
|
2023-09-06 00:53:38 +03:00
|
|
|
/* Port 0 (the uC port) does not have CBS shapers */
|
|
|
|
#define SJA1110_FIXED_CBS(port, prio) ((((port) - 1) * SJA1105_NUM_TC) + (prio))
|
2020-05-28 03:27:58 +03:00
|
|
|
|
2023-09-06 00:53:37 +03:00
|
|
|
static int sja1105_find_cbs_shaper(struct sja1105_private *priv,
|
|
|
|
int port, int prio)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2023-09-06 00:53:38 +03:00
|
|
|
if (priv->info->fixed_cbs_mapping) {
|
|
|
|
i = SJA1110_FIXED_CBS(port, prio);
|
|
|
|
if (i >= 0 && i < priv->info->num_cbs_shapers)
|
|
|
|
return i;
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2023-09-06 00:53:37 +03:00
|
|
|
for (i = 0; i < priv->info->num_cbs_shapers; i++)
|
|
|
|
if (priv->cbs[i].port == port && priv->cbs[i].prio == prio)
|
|
|
|
return i;
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2020-05-28 03:27:58 +03:00
|
|
|
static int sja1105_find_unused_cbs_shaper(struct sja1105_private *priv)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2023-09-06 00:53:38 +03:00
|
|
|
if (priv->info->fixed_cbs_mapping)
|
|
|
|
return -1;
|
|
|
|
|
2020-05-28 03:27:58 +03:00
|
|
|
for (i = 0; i < priv->info->num_cbs_shapers; i++)
|
|
|
|
if (!priv->cbs[i].idle_slope && !priv->cbs[i].send_slope)
|
|
|
|
return i;
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_delete_cbs_shaper(struct sja1105_private *priv, int port,
|
|
|
|
int prio)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < priv->info->num_cbs_shapers; i++) {
|
|
|
|
struct sja1105_cbs_entry *cbs = &priv->cbs[i];
|
|
|
|
|
|
|
|
if (cbs->port == port && cbs->prio == prio) {
|
|
|
|
memset(cbs, 0, sizeof(*cbs));
|
|
|
|
return sja1105_dynamic_config_write(priv, BLK_IDX_CBS,
|
|
|
|
i, cbs, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port,
|
|
|
|
struct tc_cbs_qopt_offload *offload)
|
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
struct sja1105_cbs_entry *cbs;
|
net: dsa: sja1105: fix bandwidth discrepancy between tc-cbs software and offload
More careful measurement of the tc-cbs bandwidth shows that the stream
bandwidth (effectively idleslope) increases, there is a larger and
larger discrepancy between the rate limit obtained by the software
Qdisc, and the rate limit obtained by its offloaded counterpart.
The discrepancy becomes so large, that e.g. at an idleslope of 40000
(40Mbps), the offloaded cbs does not actually rate limit anything, and
traffic will pass at line rate through a 100 Mbps port.
The reason for the discrepancy is that the hardware documentation I've
been following is incorrect. UM11040.pdf (for SJA1105P/Q/R/S) states
about IDLE_SLOPE that it is "the rate (in unit of bytes/sec) at which
the credit counter is increased".
Cross-checking with UM10944.pdf (for SJA1105E/T) and UM11107.pdf
(for SJA1110), the wording is different: "This field specifies the
value, in bytes per second times link speed, by which the credit counter
is increased".
So there's an extra scaling for link speed that the driver is currently
not accounting for, and apparently (empirically), that link speed is
expressed in Kbps.
I've pondered whether to pollute the sja1105_mac_link_up()
implementation with CBS shaper reprogramming, but I don't think it is
worth it. IMO, the UAPI exposed by tc-cbs requires user space to
recalculate the sendslope anyway, since the formula for that depends on
port_transmit_rate (see man tc-cbs), which is not an invariant from tc's
perspective.
So we use the offload->sendslope and offload->idleslope to deduce the
original port_transmit_rate from the CBS formula, and use that value to
scale the offload->sendslope and offload->idleslope to values that the
hardware understands.
Some numerical data points:
40Mbps stream, max interfering frame size 1500, port speed 100M
---------------------------------------------------------------
tc-cbs parameters:
idleslope 40000 sendslope -60000 locredit -900 hicredit 600
which result in hardware values:
Before (doesn't work) After (works)
credit_hi 600 600
credit_lo 900 900
send_slope 7500000 75
idle_slope 5000000 50
40Mbps stream, max interfering frame size 1500, port speed 1G
-------------------------------------------------------------
tc-cbs parameters:
idleslope 40000 sendslope -960000 locredit -1440 hicredit 60
which result in hardware values:
Before (doesn't work) After (works)
credit_hi 60 60
credit_lo 1440 1440
send_slope 120000000 120
idle_slope 5000000 5
5.12Mbps stream, max interfering frame size 1522, port speed 100M
-----------------------------------------------------------------
tc-cbs parameters:
idleslope 5120 sendslope -94880 locredit -1444 hicredit 77
which result in hardware values:
Before (doesn't work) After (works)
credit_hi 77 77
credit_lo 1444 1444
send_slope 11860000 118
idle_slope 640000 6
Tested on SJA1105T, SJA1105S and SJA1110A, at 1Gbps and 100Mbps.
Fixes: 4d7525085a9b ("net: dsa: sja1105: offload the Credit-Based Shaper qdisc")
Reported-by: Yanan Yang <yanan.yang@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-09-06 00:53:36 +03:00
|
|
|
s64 port_transmit_rate_kbps;
|
2020-05-28 03:27:58 +03:00
|
|
|
int index;
|
|
|
|
|
|
|
|
if (!offload->enable)
|
|
|
|
return sja1105_delete_cbs_shaper(priv, port, offload->queue);
|
|
|
|
|
2023-09-06 00:53:37 +03:00
|
|
|
/* The user may be replacing an existing shaper */
|
|
|
|
index = sja1105_find_cbs_shaper(priv, port, offload->queue);
|
|
|
|
if (index < 0) {
|
|
|
|
/* That isn't the case - see if we can allocate a new one */
|
|
|
|
index = sja1105_find_unused_cbs_shaper(priv);
|
|
|
|
if (index < 0)
|
|
|
|
return -ENOSPC;
|
|
|
|
}
|
2020-05-28 03:27:58 +03:00
|
|
|
|
|
|
|
cbs = &priv->cbs[index];
|
|
|
|
cbs->port = port;
|
|
|
|
cbs->prio = offload->queue;
|
|
|
|
/* locredit and sendslope are negative by definition. In hardware,
|
|
|
|
* positive values must be provided, and the negative sign is implicit.
|
|
|
|
*/
|
|
|
|
cbs->credit_hi = offload->hicredit;
|
|
|
|
cbs->credit_lo = abs(offload->locredit);
|
net: dsa: sja1105: fix bandwidth discrepancy between tc-cbs software and offload
More careful measurement of the tc-cbs bandwidth shows that the stream
bandwidth (effectively idleslope) increases, there is a larger and
larger discrepancy between the rate limit obtained by the software
Qdisc, and the rate limit obtained by its offloaded counterpart.
The discrepancy becomes so large, that e.g. at an idleslope of 40000
(40Mbps), the offloaded cbs does not actually rate limit anything, and
traffic will pass at line rate through a 100 Mbps port.
The reason for the discrepancy is that the hardware documentation I've
been following is incorrect. UM11040.pdf (for SJA1105P/Q/R/S) states
about IDLE_SLOPE that it is "the rate (in unit of bytes/sec) at which
the credit counter is increased".
Cross-checking with UM10944.pdf (for SJA1105E/T) and UM11107.pdf
(for SJA1110), the wording is different: "This field specifies the
value, in bytes per second times link speed, by which the credit counter
is increased".
So there's an extra scaling for link speed that the driver is currently
not accounting for, and apparently (empirically), that link speed is
expressed in Kbps.
I've pondered whether to pollute the sja1105_mac_link_up()
implementation with CBS shaper reprogramming, but I don't think it is
worth it. IMO, the UAPI exposed by tc-cbs requires user space to
recalculate the sendslope anyway, since the formula for that depends on
port_transmit_rate (see man tc-cbs), which is not an invariant from tc's
perspective.
So we use the offload->sendslope and offload->idleslope to deduce the
original port_transmit_rate from the CBS formula, and use that value to
scale the offload->sendslope and offload->idleslope to values that the
hardware understands.
Some numerical data points:
40Mbps stream, max interfering frame size 1500, port speed 100M
---------------------------------------------------------------
tc-cbs parameters:
idleslope 40000 sendslope -60000 locredit -900 hicredit 600
which result in hardware values:
Before (doesn't work) After (works)
credit_hi 600 600
credit_lo 900 900
send_slope 7500000 75
idle_slope 5000000 50
40Mbps stream, max interfering frame size 1500, port speed 1G
-------------------------------------------------------------
tc-cbs parameters:
idleslope 40000 sendslope -960000 locredit -1440 hicredit 60
which result in hardware values:
Before (doesn't work) After (works)
credit_hi 60 60
credit_lo 1440 1440
send_slope 120000000 120
idle_slope 5000000 5
5.12Mbps stream, max interfering frame size 1522, port speed 100M
-----------------------------------------------------------------
tc-cbs parameters:
idleslope 5120 sendslope -94880 locredit -1444 hicredit 77
which result in hardware values:
Before (doesn't work) After (works)
credit_hi 77 77
credit_lo 1444 1444
send_slope 11860000 118
idle_slope 640000 6
Tested on SJA1105T, SJA1105S and SJA1110A, at 1Gbps and 100Mbps.
Fixes: 4d7525085a9b ("net: dsa: sja1105: offload the Credit-Based Shaper qdisc")
Reported-by: Yanan Yang <yanan.yang@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-09-06 00:53:36 +03:00
|
|
|
/* User space is in kbits/sec, while the hardware in bytes/sec times
|
|
|
|
* link speed. Since the given offload->sendslope is good only for the
|
|
|
|
* current link speed anyway, and user space is likely to reprogram it
|
|
|
|
* when that changes, don't even bother to track the port's link speed,
|
|
|
|
* but deduce the port transmit rate from idleslope - sendslope.
|
|
|
|
*/
|
|
|
|
port_transmit_rate_kbps = offload->idleslope - offload->sendslope;
|
|
|
|
cbs->idle_slope = div_s64(offload->idleslope * BYTES_PER_KBIT,
|
|
|
|
port_transmit_rate_kbps);
|
|
|
|
cbs->send_slope = div_s64(abs(offload->sendslope * BYTES_PER_KBIT),
|
|
|
|
port_transmit_rate_kbps);
|
2020-05-28 03:27:58 +03:00
|
|
|
/* Convert the negative values from 64-bit 2's complement
|
|
|
|
* to 32-bit 2's complement (for the case of 0x80000000 whose
|
|
|
|
* negative is still negative).
|
|
|
|
*/
|
|
|
|
cbs->credit_lo &= GENMASK_ULL(31, 0);
|
|
|
|
cbs->send_slope &= GENMASK_ULL(31, 0);
|
|
|
|
|
|
|
|
return sja1105_dynamic_config_write(priv, BLK_IDX_CBS, index, cbs,
|
|
|
|
true);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_reload_cbs(struct sja1105_private *priv)
|
|
|
|
{
|
|
|
|
int rc = 0, i;
|
|
|
|
|
2021-06-24 18:52:07 +03:00
|
|
|
/* The credit based shapers are only allocated if
|
|
|
|
* CONFIG_NET_SCH_CBS is enabled.
|
|
|
|
*/
|
|
|
|
if (!priv->cbs)
|
|
|
|
return 0;
|
|
|
|
|
2020-05-28 03:27:58 +03:00
|
|
|
for (i = 0; i < priv->info->num_cbs_shapers; i++) {
|
|
|
|
struct sja1105_cbs_entry *cbs = &priv->cbs[i];
|
|
|
|
|
|
|
|
if (!cbs->idle_slope && !cbs->send_slope)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_CBS, i, cbs,
|
|
|
|
true);
|
|
|
|
if (rc)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2019-11-12 23:22:00 +02:00
|
|
|
static const char * const sja1105_reset_reasons[] = {
|
|
|
|
[SJA1105_VLAN_FILTERING] = "VLAN filtering",
|
|
|
|
[SJA1105_AGEING_TIME] = "Ageing time",
|
|
|
|
[SJA1105_SCHEDULING] = "Time-aware scheduling",
|
2020-03-27 21:55:45 +02:00
|
|
|
[SJA1105_BEST_EFFORT_POLICING] = "Best-effort policing",
|
2020-05-05 22:20:55 +03:00
|
|
|
[SJA1105_VIRTUAL_LINKS] = "Virtual links",
|
2019-11-12 23:22:00 +02:00
|
|
|
};
|
|
|
|
|
2019-05-02 23:23:34 +03:00
|
|
|
/* For situations where we need to change a setting at runtime that is only
|
|
|
|
* available through the static configuration, resetting the switch in order
|
|
|
|
* to upload the new static config is unavoidable. Back up the settings we
|
|
|
|
* modify at runtime (currently only MAC) and restore them after uploading,
|
|
|
|
* such that this operation is relatively seamless.
|
|
|
|
*/
|
2019-11-12 23:22:00 +02:00
|
|
|
int sja1105_static_config_reload(struct sja1105_private *priv,
|
|
|
|
enum sja1105_reset_reason reason)
|
2019-05-02 23:23:34 +03:00
|
|
|
{
|
2019-11-09 13:32:23 +02:00
|
|
|
struct ptp_system_timestamp ptp_sts_before;
|
|
|
|
struct ptp_system_timestamp ptp_sts_after;
|
2021-05-24 16:14:15 +03:00
|
|
|
int speed_mbps[SJA1105_MAX_NUM_PORTS];
|
2021-05-31 01:59:33 +03:00
|
|
|
u16 bmcr[SJA1105_MAX_NUM_PORTS] = {0};
|
2019-05-02 23:23:34 +03:00
|
|
|
struct sja1105_mac_config_entry *mac;
|
2019-11-09 13:32:23 +02:00
|
|
|
struct dsa_switch *ds = priv->ds;
|
|
|
|
s64 t1, t2, t3, t4;
|
|
|
|
s64 t12, t34;
|
2019-05-02 23:23:34 +03:00
|
|
|
int rc, i;
|
2019-11-09 13:32:23 +02:00
|
|
|
s64 now;
|
2019-05-02 23:23:34 +03:00
|
|
|
|
2023-09-08 16:33:52 +03:00
|
|
|
mutex_lock(&priv->fdb_lock);
|
2019-11-09 13:32:24 +02:00
|
|
|
mutex_lock(&priv->mgmt_lock);
|
|
|
|
|
2019-05-02 23:23:34 +03:00
|
|
|
mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
|
|
|
|
|
2019-06-08 16:03:44 +03:00
|
|
|
/* Back up the dynamic link speed changed by sja1105_adjust_port_config
|
|
|
|
* in order to temporarily restore it to SJA1105_SPEED_AUTO - which the
|
|
|
|
* switch wants to see in the static config in order to allow us to
|
|
|
|
* change it through the dynamic interface later.
|
2019-05-02 23:23:34 +03:00
|
|
|
*/
|
2021-05-24 16:14:13 +03:00
|
|
|
for (i = 0; i < ds->num_ports; i++) {
|
2021-05-31 01:59:37 +03:00
|
|
|
speed_mbps[i] = sja1105_port_speed_to_ethtool(priv,
|
|
|
|
mac[i].speed);
|
|
|
|
mac[i].speed = priv->info->port_speed[SJA1105_SPEED_AUTO];
|
2019-05-02 23:23:34 +03:00
|
|
|
|
2021-06-11 23:05:28 +03:00
|
|
|
if (priv->xpcs[i])
|
2022-04-30 19:30:36 +02:00
|
|
|
bmcr[i] = mdiobus_c45_read(priv->mdio_pcs, i,
|
|
|
|
MDIO_MMD_VEND2, MDIO_CTRL1);
|
2021-05-31 01:59:33 +03:00
|
|
|
}
|
2020-03-20 13:29:37 +02:00
|
|
|
|
2019-11-09 13:32:23 +02:00
|
|
|
/* No PTP operations can run right now */
|
|
|
|
mutex_lock(&priv->ptp_data.lock);
|
|
|
|
|
|
|
|
rc = __sja1105_ptp_gettimex(ds, &now, &ptp_sts_before);
|
2021-06-18 16:48:12 +03:00
|
|
|
if (rc < 0) {
|
|
|
|
mutex_unlock(&priv->ptp_data.lock);
|
|
|
|
goto out;
|
|
|
|
}
|
2019-11-09 13:32:23 +02:00
|
|
|
|
2019-05-02 23:23:34 +03:00
|
|
|
/* Reset switch and send updated static configuration */
|
|
|
|
rc = sja1105_static_config_upload(priv);
|
2021-06-18 16:48:12 +03:00
|
|
|
if (rc < 0) {
|
|
|
|
mutex_unlock(&priv->ptp_data.lock);
|
|
|
|
goto out;
|
|
|
|
}
|
2019-11-09 13:32:23 +02:00
|
|
|
|
|
|
|
rc = __sja1105_ptp_settime(ds, 0, &ptp_sts_after);
|
2021-06-18 16:48:12 +03:00
|
|
|
if (rc < 0) {
|
|
|
|
mutex_unlock(&priv->ptp_data.lock);
|
|
|
|
goto out;
|
|
|
|
}
|
2019-11-09 13:32:23 +02:00
|
|
|
|
|
|
|
t1 = timespec64_to_ns(&ptp_sts_before.pre_ts);
|
|
|
|
t2 = timespec64_to_ns(&ptp_sts_before.post_ts);
|
|
|
|
t3 = timespec64_to_ns(&ptp_sts_after.pre_ts);
|
|
|
|
t4 = timespec64_to_ns(&ptp_sts_after.post_ts);
|
|
|
|
/* Mid point, corresponds to pre-reset PTPCLKVAL */
|
|
|
|
t12 = t1 + (t2 - t1) / 2;
|
|
|
|
/* Mid point, corresponds to post-reset PTPCLKVAL, aka 0 */
|
|
|
|
t34 = t3 + (t4 - t3) / 2;
|
|
|
|
/* Advance PTPCLKVAL by the time it took since its readout */
|
|
|
|
now += (t34 - t12);
|
|
|
|
|
|
|
|
__sja1105_ptp_adjtime(ds, now);
|
|
|
|
|
|
|
|
mutex_unlock(&priv->ptp_data.lock);
|
2019-05-02 23:23:34 +03:00
|
|
|
|
2019-11-12 23:22:00 +02:00
|
|
|
dev_info(priv->ds->dev,
|
|
|
|
"Reset switch and programmed static config. Reason: %s\n",
|
|
|
|
sja1105_reset_reasons[reason]);
|
|
|
|
|
2019-05-02 23:23:34 +03:00
|
|
|
/* Configure the CGU (PLLs) for MII and RMII PHYs.
|
|
|
|
* For these interfaces there is no dynamic configuration
|
|
|
|
* needed, since PLLs have same settings at all speeds.
|
|
|
|
*/
|
net: dsa: sja1105: properly power down the microcontroller clock for SJA1110
It turns out that powering down the BASE_TIMER_CLK does not turn off the
microcontroller, just its timers, including the one for the watchdog.
So the embedded microcontroller is still running, and potentially still
doing things.
To prevent unwanted interference, we should power down the BASE_MCSS_CLK
as well (MCSS = microcontroller subsystem).
The trouble is that currently we turn off the BASE_TIMER_CLK for SJA1110
from the .clocking_setup() method, mostly because this is a Clock
Generation Unit (CGU) setting which was traditionally configured in that
method for SJA1105. But in SJA1105, the CGU was used for bringing up the
port clocks at the proper speeds, and in SJA1110 it's not (but rather
for initial configuration), so it's best that we rebrand the
sja1110_clocking_setup() method into what it really is - an implementation
of the .disable_microcontroller() method.
Since disabling the microcontroller only needs to be done once, at probe
time, we can choose the best place to do that as being in sja1105_setup(),
before we upload the static config to the device. This guarantees that
the static config being used by the switch afterwards is really ours.
Note that the procedure to upload a static config necessarily resets the
switch. This already did not reset the microcontroller, only the switch
core, so since the .disable_microcontroller() method is guaranteed to be
called by that point, if it's disabled, it remains disabled. Add a
comment to make that clear.
With the code movement for SJA1110 from .clocking_setup() to
.disable_microcontroller(), both methods are optional and are guarded by
"if" conditions.
Tested by enabling in the device tree the rev-mii switch port 0 that
goes towards the microcontroller, and flashing a firmware that would
have networking. Without this patch, the microcontroller can be pinged,
with this patch it cannot.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-18 14:52:54 +03:00
|
|
|
if (priv->info->clocking_setup) {
|
|
|
|
rc = priv->info->clocking_setup(priv);
|
|
|
|
if (rc < 0)
|
|
|
|
goto out;
|
|
|
|
}
|
2019-05-02 23:23:34 +03:00
|
|
|
|
2021-05-24 16:14:13 +03:00
|
|
|
for (i = 0; i < ds->num_ports; i++) {
|
2021-06-11 23:05:28 +03:00
|
|
|
struct dw_xpcs *xpcs = priv->xpcs[i];
|
2023-06-16 13:06:37 +01:00
|
|
|
unsigned int neg_mode;
|
2021-05-31 01:59:33 +03:00
|
|
|
|
2019-06-08 16:03:44 +03:00
|
|
|
rc = sja1105_adjust_port_config(priv, i, speed_mbps[i]);
|
2019-05-02 23:23:34 +03:00
|
|
|
if (rc < 0)
|
|
|
|
goto out;
|
2020-03-20 13:29:37 +02:00
|
|
|
|
2021-06-11 23:05:28 +03:00
|
|
|
if (!xpcs)
|
2021-05-31 01:59:33 +03:00
|
|
|
continue;
|
|
|
|
|
2021-06-11 23:05:28 +03:00
|
|
|
if (bmcr[i] & BMCR_ANENABLE)
|
2023-06-16 13:06:37 +01:00
|
|
|
neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED;
|
2021-06-11 23:05:28 +03:00
|
|
|
else
|
2023-06-16 13:06:37 +01:00
|
|
|
neg_mode = PHYLINK_PCS_NEG_OUTBAND;
|
2020-03-20 13:29:37 +02:00
|
|
|
|
2023-06-16 13:06:37 +01:00
|
|
|
rc = xpcs_do_config(xpcs, priv->phy_mode[i], NULL, neg_mode);
|
2021-06-11 23:05:28 +03:00
|
|
|
if (rc < 0)
|
|
|
|
goto out;
|
2020-03-20 13:29:37 +02:00
|
|
|
|
2023-06-16 13:06:37 +01:00
|
|
|
if (neg_mode == PHYLINK_PCS_NEG_OUTBAND) {
|
2020-03-20 13:29:37 +02:00
|
|
|
int speed = SPEED_UNKNOWN;
|
|
|
|
|
2021-06-11 23:05:31 +03:00
|
|
|
if (priv->phy_mode[i] == PHY_INTERFACE_MODE_2500BASEX)
|
|
|
|
speed = SPEED_2500;
|
|
|
|
else if (bmcr[i] & BMCR_SPEED1000)
|
2020-03-20 13:29:37 +02:00
|
|
|
speed = SPEED_1000;
|
2021-05-31 01:59:33 +03:00
|
|
|
else if (bmcr[i] & BMCR_SPEED100)
|
2020-03-20 13:29:37 +02:00
|
|
|
speed = SPEED_100;
|
2021-03-04 12:56:53 +02:00
|
|
|
else
|
2020-03-20 13:29:37 +02:00
|
|
|
speed = SPEED_10;
|
|
|
|
|
2023-06-16 13:06:37 +01:00
|
|
|
xpcs_link_up(&xpcs->pcs, neg_mode, priv->phy_mode[i],
|
2021-06-11 23:05:28 +03:00
|
|
|
speed, DUPLEX_FULL);
|
2020-03-20 13:29:37 +02:00
|
|
|
}
|
|
|
|
}
|
2020-05-28 03:27:58 +03:00
|
|
|
|
|
|
|
rc = sja1105_reload_cbs(priv);
|
|
|
|
if (rc < 0)
|
|
|
|
goto out;
|
2019-05-02 23:23:34 +03:00
|
|
|
out:
|
2019-11-09 13:32:24 +02:00
|
|
|
mutex_unlock(&priv->mgmt_lock);
|
2023-09-08 16:33:52 +03:00
|
|
|
mutex_unlock(&priv->fdb_lock);
|
2019-11-09 13:32:24 +02:00
|
|
|
|
2019-05-02 23:23:34 +03:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
static enum dsa_tag_protocol
|
2020-01-07 21:06:05 -08:00
|
|
|
sja1105_get_tag_protocol(struct dsa_switch *ds, int port,
|
|
|
|
enum dsa_tag_protocol mp)
|
2019-05-02 23:23:30 +03:00
|
|
|
{
|
net: dsa: add support for the SJA1110 native tagging protocol
The SJA1110 has improved a few things compared to SJA1105:
- To send a control packet from the host port with SJA1105, one needed
to program a one-shot "management route" over SPI. This is no longer
true with SJA1110, you can actually send "in-band control extensions"
in the packets sent by DSA, these are in fact DSA tags which contain
the destination port and switch ID.
- When receiving a control packet from the switch with SJA1105, the
source port and switch ID were written in bytes 3 and 4 of the
destination MAC address of the frame (which was a very poor shot at a
DSA header). If the control packet also had an RX timestamp, that
timestamp was sent in an actual follow-up packet, so there were
reordering concerns on multi-core/multi-queue DSA masters, where the
metadata frame with the RX timestamp might get processed before the
actual packet to which that timestamp belonged (there is no way to
pair a packet to its timestamp other than the order in which they were
received). On SJA1110, this is no longer true, control packets have
the source port, switch ID and timestamp all in the DSA tags.
- Timestamps from the switch were partial: to get a 64-bit timestamp as
required by PTP stacks, one would need to take the partial 24-bit or
32-bit timestamp from the packet, then read the current PTP time very
quickly, and then patch in the high bits of the current PTP time into
the captured partial timestamp, to reconstruct what the full 64-bit
timestamp must have been. That is awful because packet processing is
done in NAPI context, but reading the current PTP time is done over
SPI and therefore needs sleepable context.
But it also aggravated a few things:
- Not only is there a DSA header in SJA1110, but there is a DSA trailer
in fact, too. So DSA needs to be extended to support taggers which
have both a header and a trailer. Very unconventional - my understanding
is that the trailer exists because the timestamps couldn't be prepared
in time for putting them in the header area.
- Like SJA1105, not all packets sent to the CPU have the DSA tag added
to them, only control packets do:
* the ones which match the destination MAC filters/traps in
MAC_FLTRES1 and MAC_FLTRES0
* the ones which match FDB entries which have TRAP or TAKETS bits set
So we could in theory hack something up to request the switch to take
timestamps for all packets that reach the CPU, and those would be
DSA-tagged and contain the source port / switch ID by virtue of the
fact that there needs to be a timestamp trailer provided. BUT:
- The SJA1110 does not parse its own DSA tags in a way that is useful
for routing in cross-chip topologies, a la Marvell. And the sja1105
driver already supports cross-chip bridging from the SJA1105 days.
It does that by automatically setting up the DSA links as VLAN trunks
which contain all the necessary tag_8021q RX VLANs that must be
communicated between the switches that span the same bridge. So when
using tag_8021q on sja1105, it is possible to have 2 switches with
ports sw0p0, sw0p1, sw1p0, sw1p1, and 2 VLAN-unaware bridges br0 and
br1, and br0 can take sw0p0 and sw1p0, and br1 can take sw0p1 and
sw1p1, and forwarding will happen according to the expected rules of
the Linux bridge.
We like that, and we don't want that to go away, so as a matter of
fact, the SJA1110 tagger still needs to support tag_8021q.
So the sja1110 tagger is a hybrid between tag_8021q for data packets,
and the native hardware support for control packets.
On RX, packets have a 13-byte trailer if they contain an RX timestamp.
That trailer is padded in such a way that its byte 8 (the start of the
"residence time" field - not parsed by Linux because we don't care) is
aligned on a 16 byte boundary. So the padding has a variable length
between 0 and 15 bytes. The DSA header contains the offset of the
beginning of the padding relative to the beginning of the frame (and the
end of the padding is obviously the end of the packet minus 13 bytes,
the length of the trailer). So we discard it.
Packets which don't have a trailer contain the source port and switch ID
information in the header (they are "trap-to-host" packets). Packets
which have a trailer contain the source port and switch ID in the trailer.
On TX, the destination port mask and switch ID is always in the trailer,
so we always need to say in the header that a trailer is present.
The header needs a custom EtherType and this was chosen as 0xdadc, after
0xdada which is for Marvell and 0xdadb which is for VLANs in
VLAN-unaware mode on SJA1105 (and SJA1110 in fact too).
Because we use tag_8021q in concert with the native tagging protocol,
control packets will have 2 DSA tags.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-11 22:01:29 +03:00
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
|
|
|
|
return priv->info->tag_proto;
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
|
2019-06-08 15:04:30 +03:00
|
|
|
/* The TPID setting belongs to the General Parameters table,
|
|
|
|
* which can only be partially reconfigured at runtime (and not the TPID).
|
|
|
|
* So a switch reset is required.
|
|
|
|
*/
|
2021-02-13 22:43:19 +02:00
|
|
|
int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
|
|
|
|
struct netlink_ext_ack *extack)
|
2019-05-02 23:23:34 +03:00
|
|
|
{
|
2019-06-08 15:04:30 +03:00
|
|
|
struct sja1105_general_params_entry *general_params;
|
2019-05-02 23:23:34 +03:00
|
|
|
struct sja1105_private *priv = ds->priv;
|
2019-06-08 15:04:30 +03:00
|
|
|
struct sja1105_table *table;
|
2020-05-05 22:20:55 +03:00
|
|
|
struct sja1105_rule *rule;
|
2019-06-08 15:04:30 +03:00
|
|
|
u16 tpid, tpid2;
|
2019-05-02 23:23:34 +03:00
|
|
|
int rc;
|
|
|
|
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 02:01:50 +02:00
|
|
|
list_for_each_entry(rule, &priv->flow_block.rules, list) {
|
|
|
|
if (rule->type == SJA1105_RULE_VL) {
|
2021-02-13 22:43:19 +02:00
|
|
|
NL_SET_ERR_MSG_MOD(extack,
|
|
|
|
"Cannot change VLAN filtering with active VL rules");
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 02:01:50 +02:00
|
|
|
return -EBUSY;
|
2020-05-05 22:20:55 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-08 15:04:30 +03:00
|
|
|
if (enabled) {
|
2019-05-02 23:23:34 +03:00
|
|
|
/* Enable VLAN filtering. */
|
2019-12-27 03:11:13 +02:00
|
|
|
tpid = ETH_P_8021Q;
|
|
|
|
tpid2 = ETH_P_8021AD;
|
2019-06-08 15:04:30 +03:00
|
|
|
} else {
|
2019-05-02 23:23:34 +03:00
|
|
|
/* Disable VLAN filtering. */
|
2019-06-08 15:04:30 +03:00
|
|
|
tpid = ETH_P_SJA1105;
|
|
|
|
tpid2 = ETH_P_SJA1105;
|
|
|
|
}
|
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
|
|
|
|
general_params = table->entries;
|
2019-06-08 15:04:31 +03:00
|
|
|
/* EtherType used to identify inner tagged (C-tag) VLAN traffic */
|
2019-12-27 03:11:13 +02:00
|
|
|
general_params->tpid = tpid;
|
|
|
|
/* EtherType used to identify outer tagged (S-tag) VLAN traffic */
|
2019-06-08 15:04:30 +03:00
|
|
|
general_params->tpid2 = tpid2;
|
|
|
|
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
for (port = 0; port < ds->num_ports; port++) {
|
|
|
|
if (dsa_is_unused_port(ds, port))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
rc = sja1105_commit_pvid(ds, port);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
}
|
net: dsa: sja1105: avoid invalid state in sja1105_vlan_filtering
Be there 2 switches spi/spi2.0 and spi/spi2.1 in a cross-chip setup,
both under the same VLAN-filtering bridge, both in the
SJA1105_VLAN_BEST_EFFORT state.
If we try to change the VLAN state of one of the switches (to
SJA1105_VLAN_FILTERING_FULL) we get the following error:
devlink dev param set spi/spi2.1 name best_effort_vlan_filtering value
false cmode runtime
[ 38.325683] sja1105 spi2.1: Not allowed to overcommit frame memory.
L2 memory partitions and VL memory partitions share the
same space. The sum of all 16 memory partitions is not
allowed to be larger than 929 128-byte blocks (or 910
with retagging). Please adjust
l2-forwarding-parameters-table.part_spc and/or
vl-forwarding-parameters-table.partspc.
[ 38.356803] sja1105 spi2.1: Invalid config, cannot upload
This is because the spi/spi2.1 switch doesn't support tagging anymore in
the SJA1105_VLAN_FILTERING_FULL state, so it doesn't need to have any
retagging rules defined. Great, so it can use more frame memory
(retagging consumes extra memory).
But the built-in low-level static config checker from the sja1105 driver
says "not so fast, you've increased the frame memory to non-retagging
values, but you still kept the retagging rules in the static config".
So we need to rebuild the VLAN table immediately before re-uploading the
static config, operation which will take care, based on the new VLAN
state, of removing the retagging rules.
Fixes: 3f01c91aab92 ("net: dsa: sja1105: implement VLAN retagging for dsa_8021q sub-VLANs")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-05-27 20:20:38 +03:00
|
|
|
|
2019-11-12 23:22:00 +02:00
|
|
|
rc = sja1105_static_config_reload(priv, SJA1105_VLAN_FILTERING);
|
2019-05-02 23:23:34 +03:00
|
|
|
if (rc)
|
2021-02-13 22:43:19 +02:00
|
|
|
NL_SET_ERR_MSG_MOD(extack, "Failed to change VLAN Ethertype");
|
2019-05-02 23:23:34 +03:00
|
|
|
|
2021-07-19 20:14:42 +03:00
|
|
|
return rc;
|
2019-05-02 23:23:34 +03:00
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
static int sja1105_vlan_add(struct sja1105_private *priv, int port, u16 vid,
|
net: dsa: sja1105: prevent tag_8021q VLANs from being received on user ports
Currently it is possible for an attacker to craft packets with a fake
DSA tag and send them to us, and our user ports will accept them and
preserve that VLAN when transmitting towards the CPU. Then the tagger
will be misled into thinking that the packets came on a different port
than they really came on.
Up until recently there wasn't a good option to prevent this from
happening. In SJA1105P and later, the MAC Configuration Table introduced
two options called:
- DRPSITAG: Drop Single Inner Tagged Frames
- DRPSOTAG: Drop Single Outer Tagged Frames
Because the sja1105 driver classifies all VLANs as "outer VLANs" (S-Tags),
it would be in principle possible to enable the DRPSOTAG bit on ports
using tag_8021q, and drop on ingress all packets which have a VLAN tag.
When the switch is VLAN-unaware, this works, because it uses a custom
TPID of 0xdadb, so any "tagged" packets received on a user port are
probably a spoofing attempt. But when the switch overall is VLAN-aware,
and some ports are standalone (therefore they use tag_8021q), the TPID
is 0x8100, and the port can receive a mix of untagged and VLAN-tagged
packets. The untagged ones will be classified to the tag_8021q pvid, and
the tagged ones to the VLAN ID from the packet header. Yes, it is true
that since commit 4fbc08bd3665 ("net: dsa: sja1105: deny 8021q uppers on
ports") we no longer support this mixed mode, but that is a temporary
limitation which will eventually be lifted. It would be nice to not
introduce one more restriction via DRPSOTAG, which would make the
standalone ports of a VLAN-aware switch drop genuinely VLAN-tagged
packets.
Also, the DRPSOTAG bit is not available on the first generation of
switches (SJA1105E, SJA1105T). So since one of the key features of this
driver is compatibility across switch generations, this makes it an even
less desirable approach.
The breakthrough comes from commit bef0746cf4cc ("net: dsa: sja1105:
make sure untagged packets are dropped on ingress ports with no pvid"),
where it became obvious that untagged packets are not dropped even if
the ingress port is not in the VMEMB_PORT vector of that port's pvid.
However, VLAN-tagged packets are subject to VLAN ingress
checking/dropping. This means that instead of using the catch-all
DRPSOTAG bit introduced in SJA1105P, we can drop tagged packets on a
per-VLAN basis, and this is already compatible with SJA1105E/T.
This patch adds an "allowed_ingress" argument to sja1105_vlan_add(), and
we call it with "false" for tag_8021q VLANs on user ports. The tag_8021q
VLANs still need to be allowed, of course, on ingress to DSA ports and
CPU ports.
We also need to refine the drop_untagged check in sja1105_commit_pvid to
make it not freak out about this new configuration. Currently it will
try to keep the configuration consistent between untagged and pvid-tagged
packets, so if the pvid of a port is 1 but VLAN 1 is not in VMEMB_PORT,
packets tagged with VID 1 will behave the same as untagged packets, and
be dropped. This behavior is what we want for ports under a VLAN-aware
bridge, but for the ports with a tag_8021q pvid, we want untagged
packets to be accepted, but packets tagged with a header recognized by
the switch as a tag_8021q VLAN to be dropped. So only restrict the
drop_untagged check to apply to the bridge_pvid, not to the tag_8021q_pvid.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-24 20:15:00 +03:00
|
|
|
u16 flags, bool allowed_ingress)
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
{
|
|
|
|
struct sja1105_vlan_lookup_entry *vlan;
|
|
|
|
struct sja1105_table *table;
|
|
|
|
int match, rc;
|
net: dsa: tag_8021q: add a context structure
While working on another tag_8021q driver implementation, some things
became apparent:
- It is not mandatory for a DSA driver to offload the tag_8021q VLANs by
using the VLAN table per se. For example, it can add custom TCAM rules
that simply encapsulate RX traffic, and redirect & decapsulate rules
for TX traffic. For such a driver, it makes no sense to receive the
tag_8021q configuration through the same callback as it receives the
VLAN configuration from the bridge and the 8021q modules.
- Currently, sja1105 (the only tag_8021q user) sets a
priv->expect_dsa_8021q variable to distinguish between the bridge
calling, and tag_8021q calling. That can be improved, to say the
least.
- The crosschip bridging operations are, in fact, stateful already. The
list of crosschip_links must be kept by the caller and passed to the
relevant tag_8021q functions.
So it would be nice if the tag_8021q configuration was more
self-contained. This patch attempts to do that.
Create a struct dsa_8021q_context which encapsulates a struct
dsa_switch, and has 2 function pointers for adding and deleting a VLAN.
These will replace the previous channel to the driver, which was through
the .port_vlan_add and .port_vlan_del callbacks of dsa_switch_ops.
Also put the list of crosschip_links into this dsa_8021q_context.
Drivers that don't support cross-chip bridging can simply omit to
initialize this list, as long as they dont call any cross-chip function.
The sja1105_vlan_add and sja1105_vlan_del functions are refactored into
a smaller sja1105_vlan_add_one, which now has 2 entry points:
- sja1105_vlan_add, from struct dsa_switch_ops
- sja1105_dsa_8021q_vlan_add, from the tag_8021q ops
But even this change is fairly trivial. It just reflects the fact that
for sja1105, the VLANs from these 2 channels end up in the same hardware
table. However that is not necessarily true in the general sense (and
that's the reason for making this change).
The rest of the patch is mostly plain refactoring of "ds" -> "ctx". The
dsa_8021q_context structure needs to be propagated because adding a VLAN
is now done through the ops function pointers inside of it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-10 19:48:56 +03:00
|
|
|
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
|
|
|
|
|
|
|
|
match = sja1105_is_vlan_configured(priv, vid);
|
|
|
|
if (match < 0) {
|
|
|
|
rc = sja1105_table_resize(table, table->entry_count + 1);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
match = table->entry_count - 1;
|
net: dsa: tag_8021q: add a context structure
While working on another tag_8021q driver implementation, some things
became apparent:
- It is not mandatory for a DSA driver to offload the tag_8021q VLANs by
using the VLAN table per se. For example, it can add custom TCAM rules
that simply encapsulate RX traffic, and redirect & decapsulate rules
for TX traffic. For such a driver, it makes no sense to receive the
tag_8021q configuration through the same callback as it receives the
VLAN configuration from the bridge and the 8021q modules.
- Currently, sja1105 (the only tag_8021q user) sets a
priv->expect_dsa_8021q variable to distinguish between the bridge
calling, and tag_8021q calling. That can be improved, to say the
least.
- The crosschip bridging operations are, in fact, stateful already. The
list of crosschip_links must be kept by the caller and passed to the
relevant tag_8021q functions.
So it would be nice if the tag_8021q configuration was more
self-contained. This patch attempts to do that.
Create a struct dsa_8021q_context which encapsulates a struct
dsa_switch, and has 2 function pointers for adding and deleting a VLAN.
These will replace the previous channel to the driver, which was through
the .port_vlan_add and .port_vlan_del callbacks of dsa_switch_ops.
Also put the list of crosschip_links into this dsa_8021q_context.
Drivers that don't support cross-chip bridging can simply omit to
initialize this list, as long as they dont call any cross-chip function.
The sja1105_vlan_add and sja1105_vlan_del functions are refactored into
a smaller sja1105_vlan_add_one, which now has 2 entry points:
- sja1105_vlan_add, from struct dsa_switch_ops
- sja1105_dsa_8021q_vlan_add, from the tag_8021q ops
But even this change is fairly trivial. It just reflects the fact that
for sja1105, the VLANs from these 2 channels end up in the same hardware
table. However that is not necessarily true in the general sense (and
that's the reason for making this change).
The rest of the patch is mostly plain refactoring of "ds" -> "ctx". The
dsa_8021q_context structure needs to be propagated because adding a VLAN
is now done through the ops function pointers inside of it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-10 19:48:56 +03:00
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
/* Assign pointer after the resize (it's new memory) */
|
|
|
|
vlan = table->entries;
|
|
|
|
|
|
|
|
vlan[match].type_entry = SJA1110_VLAN_D_TAG;
|
|
|
|
vlan[match].vlanid = vid;
|
|
|
|
vlan[match].vlan_bc |= BIT(port);
|
net: dsa: sja1105: prevent tag_8021q VLANs from being received on user ports
Currently it is possible for an attacker to craft packets with a fake
DSA tag and send them to us, and our user ports will accept them and
preserve that VLAN when transmitting towards the CPU. Then the tagger
will be misled into thinking that the packets came on a different port
than they really came on.
Up until recently there wasn't a good option to prevent this from
happening. In SJA1105P and later, the MAC Configuration Table introduced
two options called:
- DRPSITAG: Drop Single Inner Tagged Frames
- DRPSOTAG: Drop Single Outer Tagged Frames
Because the sja1105 driver classifies all VLANs as "outer VLANs" (S-Tags),
it would be in principle possible to enable the DRPSOTAG bit on ports
using tag_8021q, and drop on ingress all packets which have a VLAN tag.
When the switch is VLAN-unaware, this works, because it uses a custom
TPID of 0xdadb, so any "tagged" packets received on a user port are
probably a spoofing attempt. But when the switch overall is VLAN-aware,
and some ports are standalone (therefore they use tag_8021q), the TPID
is 0x8100, and the port can receive a mix of untagged and VLAN-tagged
packets. The untagged ones will be classified to the tag_8021q pvid, and
the tagged ones to the VLAN ID from the packet header. Yes, it is true
that since commit 4fbc08bd3665 ("net: dsa: sja1105: deny 8021q uppers on
ports") we no longer support this mixed mode, but that is a temporary
limitation which will eventually be lifted. It would be nice to not
introduce one more restriction via DRPSOTAG, which would make the
standalone ports of a VLAN-aware switch drop genuinely VLAN-tagged
packets.
Also, the DRPSOTAG bit is not available on the first generation of
switches (SJA1105E, SJA1105T). So since one of the key features of this
driver is compatibility across switch generations, this makes it an even
less desirable approach.
The breakthrough comes from commit bef0746cf4cc ("net: dsa: sja1105:
make sure untagged packets are dropped on ingress ports with no pvid"),
where it became obvious that untagged packets are not dropped even if
the ingress port is not in the VMEMB_PORT vector of that port's pvid.
However, VLAN-tagged packets are subject to VLAN ingress
checking/dropping. This means that instead of using the catch-all
DRPSOTAG bit introduced in SJA1105P, we can drop tagged packets on a
per-VLAN basis, and this is already compatible with SJA1105E/T.
This patch adds an "allowed_ingress" argument to sja1105_vlan_add(), and
we call it with "false" for tag_8021q VLANs on user ports. The tag_8021q
VLANs still need to be allowed, of course, on ingress to DSA ports and
CPU ports.
We also need to refine the drop_untagged check in sja1105_commit_pvid to
make it not freak out about this new configuration. Currently it will
try to keep the configuration consistent between untagged and pvid-tagged
packets, so if the pvid of a port is 1 but VLAN 1 is not in VMEMB_PORT,
packets tagged with VID 1 will behave the same as untagged packets, and
be dropped. This behavior is what we want for ports under a VLAN-aware
bridge, but for the ports with a tag_8021q pvid, we want untagged
packets to be accepted, but packets tagged with a header recognized by
the switch as a tag_8021q VLAN to be dropped. So only restrict the
drop_untagged check to apply to the bridge_pvid, not to the tag_8021q_pvid.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-24 20:15:00 +03:00
|
|
|
|
|
|
|
if (allowed_ingress)
|
|
|
|
vlan[match].vmemb_port |= BIT(port);
|
|
|
|
else
|
|
|
|
vlan[match].vmemb_port &= ~BIT(port);
|
|
|
|
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
|
|
|
|
vlan[match].tag_port &= ~BIT(port);
|
|
|
|
else
|
|
|
|
vlan[match].tag_port |= BIT(port);
|
net: dsa: tag_8021q: add a context structure
While working on another tag_8021q driver implementation, some things
became apparent:
- It is not mandatory for a DSA driver to offload the tag_8021q VLANs by
using the VLAN table per se. For example, it can add custom TCAM rules
that simply encapsulate RX traffic, and redirect & decapsulate rules
for TX traffic. For such a driver, it makes no sense to receive the
tag_8021q configuration through the same callback as it receives the
VLAN configuration from the bridge and the 8021q modules.
- Currently, sja1105 (the only tag_8021q user) sets a
priv->expect_dsa_8021q variable to distinguish between the bridge
calling, and tag_8021q calling. That can be improved, to say the
least.
- The crosschip bridging operations are, in fact, stateful already. The
list of crosschip_links must be kept by the caller and passed to the
relevant tag_8021q functions.
So it would be nice if the tag_8021q configuration was more
self-contained. This patch attempts to do that.
Create a struct dsa_8021q_context which encapsulates a struct
dsa_switch, and has 2 function pointers for adding and deleting a VLAN.
These will replace the previous channel to the driver, which was through
the .port_vlan_add and .port_vlan_del callbacks of dsa_switch_ops.
Also put the list of crosschip_links into this dsa_8021q_context.
Drivers that don't support cross-chip bridging can simply omit to
initialize this list, as long as they dont call any cross-chip function.
The sja1105_vlan_add and sja1105_vlan_del functions are refactored into
a smaller sja1105_vlan_add_one, which now has 2 entry points:
- sja1105_vlan_add, from struct dsa_switch_ops
- sja1105_dsa_8021q_vlan_add, from the tag_8021q ops
But even this change is fairly trivial. It just reflects the fact that
for sja1105, the VLANs from these 2 channels end up in the same hardware
table. However that is not necessarily true in the general sense (and
that's the reason for making this change).
The rest of the patch is mostly plain refactoring of "ds" -> "ctx". The
dsa_8021q_context structure needs to be propagated because adding a VLAN
is now done through the ops function pointers inside of it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-10 19:48:56 +03:00
|
|
|
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
return sja1105_dynamic_config_write(priv, BLK_IDX_VLAN_LOOKUP, vid,
|
|
|
|
&vlan[match], true);
|
net: dsa: tag_8021q: add a context structure
While working on another tag_8021q driver implementation, some things
became apparent:
- It is not mandatory for a DSA driver to offload the tag_8021q VLANs by
using the VLAN table per se. For example, it can add custom TCAM rules
that simply encapsulate RX traffic, and redirect & decapsulate rules
for TX traffic. For such a driver, it makes no sense to receive the
tag_8021q configuration through the same callback as it receives the
VLAN configuration from the bridge and the 8021q modules.
- Currently, sja1105 (the only tag_8021q user) sets a
priv->expect_dsa_8021q variable to distinguish between the bridge
calling, and tag_8021q calling. That can be improved, to say the
least.
- The crosschip bridging operations are, in fact, stateful already. The
list of crosschip_links must be kept by the caller and passed to the
relevant tag_8021q functions.
So it would be nice if the tag_8021q configuration was more
self-contained. This patch attempts to do that.
Create a struct dsa_8021q_context which encapsulates a struct
dsa_switch, and has 2 function pointers for adding and deleting a VLAN.
These will replace the previous channel to the driver, which was through
the .port_vlan_add and .port_vlan_del callbacks of dsa_switch_ops.
Also put the list of crosschip_links into this dsa_8021q_context.
Drivers that don't support cross-chip bridging can simply omit to
initialize this list, as long as they dont call any cross-chip function.
The sja1105_vlan_add and sja1105_vlan_del functions are refactored into
a smaller sja1105_vlan_add_one, which now has 2 entry points:
- sja1105_vlan_add, from struct dsa_switch_ops
- sja1105_dsa_8021q_vlan_add, from the tag_8021q ops
But even this change is fairly trivial. It just reflects the fact that
for sja1105, the VLANs from these 2 channels end up in the same hardware
table. However that is not necessarily true in the general sense (and
that's the reason for making this change).
The rest of the patch is mostly plain refactoring of "ds" -> "ctx". The
dsa_8021q_context structure needs to be propagated because adding a VLAN
is now done through the ops function pointers inside of it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-10 19:48:56 +03:00
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
static int sja1105_vlan_del(struct sja1105_private *priv, int port, u16 vid)
|
net: dsa: tag_8021q: add a context structure
While working on another tag_8021q driver implementation, some things
became apparent:
- It is not mandatory for a DSA driver to offload the tag_8021q VLANs by
using the VLAN table per se. For example, it can add custom TCAM rules
that simply encapsulate RX traffic, and redirect & decapsulate rules
for TX traffic. For such a driver, it makes no sense to receive the
tag_8021q configuration through the same callback as it receives the
VLAN configuration from the bridge and the 8021q modules.
- Currently, sja1105 (the only tag_8021q user) sets a
priv->expect_dsa_8021q variable to distinguish between the bridge
calling, and tag_8021q calling. That can be improved, to say the
least.
- The crosschip bridging operations are, in fact, stateful already. The
list of crosschip_links must be kept by the caller and passed to the
relevant tag_8021q functions.
So it would be nice if the tag_8021q configuration was more
self-contained. This patch attempts to do that.
Create a struct dsa_8021q_context which encapsulates a struct
dsa_switch, and has 2 function pointers for adding and deleting a VLAN.
These will replace the previous channel to the driver, which was through
the .port_vlan_add and .port_vlan_del callbacks of dsa_switch_ops.
Also put the list of crosschip_links into this dsa_8021q_context.
Drivers that don't support cross-chip bridging can simply omit to
initialize this list, as long as they dont call any cross-chip function.
The sja1105_vlan_add and sja1105_vlan_del functions are refactored into
a smaller sja1105_vlan_add_one, which now has 2 entry points:
- sja1105_vlan_add, from struct dsa_switch_ops
- sja1105_dsa_8021q_vlan_add, from the tag_8021q ops
But even this change is fairly trivial. It just reflects the fact that
for sja1105, the VLANs from these 2 channels end up in the same hardware
table. However that is not necessarily true in the general sense (and
that's the reason for making this change).
The rest of the patch is mostly plain refactoring of "ds" -> "ctx". The
dsa_8021q_context structure needs to be propagated because adding a VLAN
is now done through the ops function pointers inside of it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-10 19:48:56 +03:00
|
|
|
{
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
struct sja1105_vlan_lookup_entry *vlan;
|
|
|
|
struct sja1105_table *table;
|
|
|
|
bool keep = true;
|
|
|
|
int match, rc;
|
net: dsa: tag_8021q: add a context structure
While working on another tag_8021q driver implementation, some things
became apparent:
- It is not mandatory for a DSA driver to offload the tag_8021q VLANs by
using the VLAN table per se. For example, it can add custom TCAM rules
that simply encapsulate RX traffic, and redirect & decapsulate rules
for TX traffic. For such a driver, it makes no sense to receive the
tag_8021q configuration through the same callback as it receives the
VLAN configuration from the bridge and the 8021q modules.
- Currently, sja1105 (the only tag_8021q user) sets a
priv->expect_dsa_8021q variable to distinguish between the bridge
calling, and tag_8021q calling. That can be improved, to say the
least.
- The crosschip bridging operations are, in fact, stateful already. The
list of crosschip_links must be kept by the caller and passed to the
relevant tag_8021q functions.
So it would be nice if the tag_8021q configuration was more
self-contained. This patch attempts to do that.
Create a struct dsa_8021q_context which encapsulates a struct
dsa_switch, and has 2 function pointers for adding and deleting a VLAN.
These will replace the previous channel to the driver, which was through
the .port_vlan_add and .port_vlan_del callbacks of dsa_switch_ops.
Also put the list of crosschip_links into this dsa_8021q_context.
Drivers that don't support cross-chip bridging can simply omit to
initialize this list, as long as they dont call any cross-chip function.
The sja1105_vlan_add and sja1105_vlan_del functions are refactored into
a smaller sja1105_vlan_add_one, which now has 2 entry points:
- sja1105_vlan_add, from struct dsa_switch_ops
- sja1105_dsa_8021q_vlan_add, from the tag_8021q ops
But even this change is fairly trivial. It just reflects the fact that
for sja1105, the VLANs from these 2 channels end up in the same hardware
table. However that is not necessarily true in the general sense (and
that's the reason for making this change).
The rest of the patch is mostly plain refactoring of "ds" -> "ctx". The
dsa_8021q_context structure needs to be propagated because adding a VLAN
is now done through the ops function pointers inside of it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-10 19:48:56 +03:00
|
|
|
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
|
|
|
|
|
|
|
|
match = sja1105_is_vlan_configured(priv, vid);
|
|
|
|
/* Can't delete a missing entry. */
|
|
|
|
if (match < 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Assign pointer after the resize (it's new memory) */
|
|
|
|
vlan = table->entries;
|
|
|
|
|
|
|
|
vlan[match].vlanid = vid;
|
|
|
|
vlan[match].vlan_bc &= ~BIT(port);
|
|
|
|
vlan[match].vmemb_port &= ~BIT(port);
|
|
|
|
/* Also unset tag_port, just so we don't have a confusing bitmap
|
|
|
|
* (no practical purpose).
|
|
|
|
*/
|
|
|
|
vlan[match].tag_port &= ~BIT(port);
|
|
|
|
|
|
|
|
/* If there's no port left as member of this VLAN,
|
|
|
|
* it's time for it to go.
|
|
|
|
*/
|
|
|
|
if (!vlan[match].vmemb_port)
|
|
|
|
keep = false;
|
|
|
|
|
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_VLAN_LOOKUP, vid,
|
|
|
|
&vlan[match], keep);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
if (!keep)
|
|
|
|
return sja1105_table_delete_entry(table, match);
|
net: dsa: tag_8021q: add a context structure
While working on another tag_8021q driver implementation, some things
became apparent:
- It is not mandatory for a DSA driver to offload the tag_8021q VLANs by
using the VLAN table per se. For example, it can add custom TCAM rules
that simply encapsulate RX traffic, and redirect & decapsulate rules
for TX traffic. For such a driver, it makes no sense to receive the
tag_8021q configuration through the same callback as it receives the
VLAN configuration from the bridge and the 8021q modules.
- Currently, sja1105 (the only tag_8021q user) sets a
priv->expect_dsa_8021q variable to distinguish between the bridge
calling, and tag_8021q calling. That can be improved, to say the
least.
- The crosschip bridging operations are, in fact, stateful already. The
list of crosschip_links must be kept by the caller and passed to the
relevant tag_8021q functions.
So it would be nice if the tag_8021q configuration was more
self-contained. This patch attempts to do that.
Create a struct dsa_8021q_context which encapsulates a struct
dsa_switch, and has 2 function pointers for adding and deleting a VLAN.
These will replace the previous channel to the driver, which was through
the .port_vlan_add and .port_vlan_del callbacks of dsa_switch_ops.
Also put the list of crosschip_links into this dsa_8021q_context.
Drivers that don't support cross-chip bridging can simply omit to
initialize this list, as long as they dont call any cross-chip function.
The sja1105_vlan_add and sja1105_vlan_del functions are refactored into
a smaller sja1105_vlan_add_one, which now has 2 entry points:
- sja1105_vlan_add, from struct dsa_switch_ops
- sja1105_dsa_8021q_vlan_add, from the tag_8021q ops
But even this change is fairly trivial. It just reflects the fact that
for sja1105, the VLANs from these 2 channels end up in the same hardware
table. However that is not necessarily true in the general sense (and
that's the reason for making this change).
The rest of the patch is mostly plain refactoring of "ds" -> "ctx". The
dsa_8021q_context structure needs to be propagated because adding a VLAN
is now done through the ops function pointers inside of it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-10 19:48:56 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
static int sja1105_bridge_vlan_add(struct dsa_switch *ds, int port,
|
|
|
|
const struct switchdev_obj_port_vlan *vlan,
|
|
|
|
struct netlink_ext_ack *extack)
|
2019-05-02 23:23:34 +03:00
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
net: dsa: sja1105: add support for imprecise RX
This is already common knowledge by now, but the sja1105 does not have
hardware support for DSA tagging for data plane packets, and tag_8021q
sets up a unique pvid per port, transmitted as VLAN-tagged towards the
CPU, for the source port to be decoded nonetheless.
When the port is part of a VLAN-aware bridge, the pvid committed to
hardware is taken from the bridge and not from tag_8021q, so we need to
work with that the best we can.
Configure the switches to send all packets to the CPU as VLAN-tagged
(even ones that were originally untagged on the wire) and make use of
dsa_untag_bridge_pvid() to get rid of it before we send those packets up
the network stack.
With the classified VLAN used by hardware known to the tagger, we first
peek at the VID in an attempt to figure out if the packet was received
from a VLAN-unaware port (standalone or under a VLAN-unaware bridge),
case in which we can continue to call dsa_8021q_rcv(). If that is not
the case, the packet probably came from a VLAN-aware bridge. So we call
the DSA helper that finds for us a "designated bridge port" - one that
is a member of the VLAN ID from the packet, and is in the proper STP
state - basically these are all checks performed by br_handle_frame() in
the software RX data path.
The bridge will accept the packet as valid even if the source port was
maybe wrong. So it will maybe learn the MAC SA of the packet on the
wrong port, and its software FDB will be out of sync with the hardware
FDB. So replies towards this same MAC DA will not work, because the
bridge will send towards a different netdev.
This is where the bridge data plane offload ("imprecise TX") added by
the next patch comes in handy. The software FDB is wrong, true, but the
hardware FDB isn't, and by offloading the bridge forwarding plane we
have a chance to right a wrong, and have the hardware look up the FDB
for us for the reply packet. So it all cancels out.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:34 +03:00
|
|
|
u16 flags = vlan->flags;
|
2019-05-02 23:23:34 +03:00
|
|
|
int rc;
|
|
|
|
|
2021-07-19 20:14:42 +03:00
|
|
|
/* Be sure to deny alterations to the configuration done by tag_8021q.
|
2021-01-09 02:01:53 +02:00
|
|
|
*/
|
2021-07-19 20:14:42 +03:00
|
|
|
if (vid_is_dsa_8021q(vlan->vid)) {
|
2021-02-13 22:43:18 +02:00
|
|
|
NL_SET_ERR_MSG_MOD(extack,
|
net: dsa: tag_8021q: merge RX and TX VLANs
In the old Shared VLAN Learning mode of operation that tag_8021q
previously used for forwarding, we needed to have distinct concepts for
an RX and a TX VLAN.
An RX VLAN could be installed on all ports that were members of a given
bridge, so that autonomous forwarding could still work, while a TX VLAN
was dedicated for precise packet steering, so it just contained the CPU
port and one egress port.
Now that tag_8021q uses Independent VLAN Learning and imprecise RX/TX
all over, those lines have been blurred and we no longer have the need
to do precise TX towards a port that is in a bridge. As for standalone
ports, it is fine to use the same VLAN ID for both RX and TX.
This patch changes the tag_8021q format by shifting the VLAN range it
reserves, and halving it. Previously, our DIR bits were encoding the
VLAN direction (RX/TX) and were set to either 1 or 2. This meant that
tag_8021q reserved 2K VLANs, or 50% of the available range.
Change the DIR bits to a hardcoded value of 3 now, which makes tag_8021q
reserve only 1K VLANs, and a different range now (the last 1K). This is
done so that we leave the old format in place in case we need to return
to it.
In terms of code, the vid_is_dsa_8021q_rxvlan and vid_is_dsa_8021q_txvlan
functions go away. Any vid_is_dsa_8021q is both a TX and an RX VLAN, and
they are no longer distinct. For example, felix which did different
things for different VLAN types, now needs to handle the RX and the TX
logic for the same VLAN.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 11:22:20 +02:00
|
|
|
"Range 3072-4095 reserved for dsa_8021q operation");
|
2021-01-09 02:01:53 +02:00
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
|
2021-08-04 16:54:33 +03:00
|
|
|
/* Always install bridge VLANs as egress-tagged on CPU and DSA ports */
|
|
|
|
if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
|
net: dsa: sja1105: add support for imprecise RX
This is already common knowledge by now, but the sja1105 does not have
hardware support for DSA tagging for data plane packets, and tag_8021q
sets up a unique pvid per port, transmitted as VLAN-tagged towards the
CPU, for the source port to be decoded nonetheless.
When the port is part of a VLAN-aware bridge, the pvid committed to
hardware is taken from the bridge and not from tag_8021q, so we need to
work with that the best we can.
Configure the switches to send all packets to the CPU as VLAN-tagged
(even ones that were originally untagged on the wire) and make use of
dsa_untag_bridge_pvid() to get rid of it before we send those packets up
the network stack.
With the classified VLAN used by hardware known to the tagger, we first
peek at the VID in an attempt to figure out if the packet was received
from a VLAN-unaware port (standalone or under a VLAN-unaware bridge),
case in which we can continue to call dsa_8021q_rcv(). If that is not
the case, the packet probably came from a VLAN-aware bridge. So we call
the DSA helper that finds for us a "designated bridge port" - one that
is a member of the VLAN ID from the packet, and is in the proper STP
state - basically these are all checks performed by br_handle_frame() in
the software RX data path.
The bridge will accept the packet as valid even if the source port was
maybe wrong. So it will maybe learn the MAC SA of the packet on the
wrong port, and its software FDB will be out of sync with the hardware
FDB. So replies towards this same MAC DA will not work, because the
bridge will send towards a different netdev.
This is where the bridge data plane offload ("imprecise TX") added by
the next patch comes in handy. The software FDB is wrong, true, but the
hardware FDB isn't, and by offloading the bridge forwarding plane we
have a chance to right a wrong, and have the hardware look up the FDB
for us for the reply packet. So it all cancels out.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:34 +03:00
|
|
|
flags = 0;
|
|
|
|
|
net: dsa: sja1105: prevent tag_8021q VLANs from being received on user ports
Currently it is possible for an attacker to craft packets with a fake
DSA tag and send them to us, and our user ports will accept them and
preserve that VLAN when transmitting towards the CPU. Then the tagger
will be misled into thinking that the packets came on a different port
than they really came on.
Up until recently there wasn't a good option to prevent this from
happening. In SJA1105P and later, the MAC Configuration Table introduced
two options called:
- DRPSITAG: Drop Single Inner Tagged Frames
- DRPSOTAG: Drop Single Outer Tagged Frames
Because the sja1105 driver classifies all VLANs as "outer VLANs" (S-Tags),
it would be in principle possible to enable the DRPSOTAG bit on ports
using tag_8021q, and drop on ingress all packets which have a VLAN tag.
When the switch is VLAN-unaware, this works, because it uses a custom
TPID of 0xdadb, so any "tagged" packets received on a user port are
probably a spoofing attempt. But when the switch overall is VLAN-aware,
and some ports are standalone (therefore they use tag_8021q), the TPID
is 0x8100, and the port can receive a mix of untagged and VLAN-tagged
packets. The untagged ones will be classified to the tag_8021q pvid, and
the tagged ones to the VLAN ID from the packet header. Yes, it is true
that since commit 4fbc08bd3665 ("net: dsa: sja1105: deny 8021q uppers on
ports") we no longer support this mixed mode, but that is a temporary
limitation which will eventually be lifted. It would be nice to not
introduce one more restriction via DRPSOTAG, which would make the
standalone ports of a VLAN-aware switch drop genuinely VLAN-tagged
packets.
Also, the DRPSOTAG bit is not available on the first generation of
switches (SJA1105E, SJA1105T). So since one of the key features of this
driver is compatibility across switch generations, this makes it an even
less desirable approach.
The breakthrough comes from commit bef0746cf4cc ("net: dsa: sja1105:
make sure untagged packets are dropped on ingress ports with no pvid"),
where it became obvious that untagged packets are not dropped even if
the ingress port is not in the VMEMB_PORT vector of that port's pvid.
However, VLAN-tagged packets are subject to VLAN ingress
checking/dropping. This means that instead of using the catch-all
DRPSOTAG bit introduced in SJA1105P, we can drop tagged packets on a
per-VLAN basis, and this is already compatible with SJA1105E/T.
This patch adds an "allowed_ingress" argument to sja1105_vlan_add(), and
we call it with "false" for tag_8021q VLANs on user ports. The tag_8021q
VLANs still need to be allowed, of course, on ingress to DSA ports and
CPU ports.
We also need to refine the drop_untagged check in sja1105_commit_pvid to
make it not freak out about this new configuration. Currently it will
try to keep the configuration consistent between untagged and pvid-tagged
packets, so if the pvid of a port is 1 but VLAN 1 is not in VMEMB_PORT,
packets tagged with VID 1 will behave the same as untagged packets, and
be dropped. This behavior is what we want for ports under a VLAN-aware
bridge, but for the ports with a tag_8021q pvid, we want untagged
packets to be accepted, but packets tagged with a header recognized by
the switch as a tag_8021q VLAN to be dropped. So only restrict the
drop_untagged check to apply to the bridge_pvid, not to the tag_8021q_pvid.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-24 20:15:00 +03:00
|
|
|
rc = sja1105_vlan_add(priv, port, vlan->vid, flags, true);
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
if (rc)
|
2021-01-09 02:01:53 +02:00
|
|
|
return rc;
|
net: dsa: sja1105: save/restore VLANs using a delta commit method
Managing the VLAN table that is present in hardware will become very
difficult once we add a third operating state
(best_effort_vlan_filtering). That is because correct cleanup (not too
little, not too much) becomes virtually impossible, when VLANs can be
added from the bridge layer, from dsa_8021q for basic tagging, for
cross-chip bridging, as well as retagging rules for sub-VLANs and
cross-chip sub-VLANs. So we need to rethink VLAN interaction with the
switch in a more scalable way.
In preparation for that, use the priv->expect_dsa_8021q boolean to
classify any VLAN request received through .port_vlan_add or
.port_vlan_del towards either one of 2 internal lists: bridge VLANs and
dsa_8021q VLANs.
Then, implement a central sja1105_build_vlan_table method that creates a
VLAN configuration from scratch based on the 2 lists of VLANs kept by
the driver, and based on the VLAN awareness state. Currently, if we are
VLAN-unaware, install the dsa_8021q VLANs, otherwise the bridge VLANs.
Then, implement a delta commit procedure that identifies which VLANs
from this new configuration are actually different from the config
previously committed to hardware. We apply the delta through the dynamic
configuration interface (we don't reset the switch). The result is that
the hardware should see the exact sequence of operations as before this
patch.
This also helps remove the "br" argument passed to
dsa_8021q_crosschip_bridge_join, which it was only using to figure out
whether it should commit the configuration back to us or not, based on
the VLAN awareness state of the bridge. We can simplify that, by always
allowing those VLANs inside of our dsa_8021q_vlans list, and committing
those to hardware when necessary.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-05-12 20:20:29 +03:00
|
|
|
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
if (vlan->flags & BRIDGE_VLAN_INFO_PVID)
|
|
|
|
priv->bridge_pvid[port] = vlan->vid;
|
net: dsa: sja1105: save/restore VLANs using a delta commit method
Managing the VLAN table that is present in hardware will become very
difficult once we add a third operating state
(best_effort_vlan_filtering). That is because correct cleanup (not too
little, not too much) becomes virtually impossible, when VLANs can be
added from the bridge layer, from dsa_8021q for basic tagging, for
cross-chip bridging, as well as retagging rules for sub-VLANs and
cross-chip sub-VLANs. So we need to rethink VLAN interaction with the
switch in a more scalable way.
In preparation for that, use the priv->expect_dsa_8021q boolean to
classify any VLAN request received through .port_vlan_add or
.port_vlan_del towards either one of 2 internal lists: bridge VLANs and
dsa_8021q VLANs.
Then, implement a central sja1105_build_vlan_table method that creates a
VLAN configuration from scratch based on the 2 lists of VLANs kept by
the driver, and based on the VLAN awareness state. Currently, if we are
VLAN-unaware, install the dsa_8021q VLANs, otherwise the bridge VLANs.
Then, implement a delta commit procedure that identifies which VLANs
from this new configuration are actually different from the config
previously committed to hardware. We apply the delta through the dynamic
configuration interface (we don't reset the switch). The result is that
the hardware should see the exact sequence of operations as before this
patch.
This also helps remove the "br" argument passed to
dsa_8021q_crosschip_bridge_join, which it was only using to figure out
whether it should commit the configuration back to us or not, based on
the VLAN awareness state of the bridge. We can simplify that, by always
allowing those VLANs inside of our dsa_8021q_vlans list, and committing
those to hardware when necessary.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-05-12 20:20:29 +03:00
|
|
|
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
return sja1105_commit_pvid(ds, port);
|
2019-05-02 23:23:34 +03:00
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
static int sja1105_bridge_vlan_del(struct dsa_switch *ds, int port,
|
|
|
|
const struct switchdev_obj_port_vlan *vlan)
|
2019-05-02 23:23:34 +03:00
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
net: dsa: sja1105: make sure untagged packets are dropped on ingress ports with no pvid
Surprisingly, this configuration:
ip link add br0 type bridge vlan_filtering 1
ip link set swp2 master br0
bridge vlan del dev swp2 vid 1
still has the sja1105 switch sending untagged packets to the CPU (and
failing to decode them, since dsa_find_designated_bridge_port_by_vid
searches by VID 1 and rightfully finds no bridge VLAN 1 on a port).
Dumping the switch configuration, the VLANs are managed properly:
- the pvid of swp2 is 1 in the MAC Configuration Table, but
- only the CPU port is in the port membership of VLANID 1 in the VLAN
Lookup Table
When the ingress packets are tagged with VID 1, they are properly
dropped. But when they are untagged, they are able to reach the CPU
port. Also, when the pvid in the MAC Configuration Table is changed to
e.g. 55 (an unused VLAN), the untagged packets are also dropped.
So it looks like:
- the switch bypasses ingress VLAN membership checks for untagged traffic
- the reason why the untagged traffic is dropped when I make the pvid 55
is due to the lack of valid destination ports in VLAN 55, rather than
an ingress membership violation
- the ingress VLAN membership cheks are only done for VLAN-tagged traffic
Interesting. It looks like there is an explicit bit to drop untagged
traffic, so we should probably be using that to preserve user expectations.
Note that only VLAN-aware ports should drop untagged packets due to no
pvid - when VLAN-unaware, the software bridge doesn't do this even if
there is no pvid on any bridge port and on the bridge itself. So the new
sja1105_drop_untagged() function cannot simply be called with "false"
from sja1105_bridge_vlan_add() and with "true" from sja1105_bridge_vlan_del.
Instead, we need to also consider the VLAN awareness state. That means
we need to hook the "drop untagged" setting in all the same places where
the "commit pvid" logic is, and it needs to factor in all the state when
flipping the "drop untagged" bit: is our current pvid in the VLAN Lookup
Table, and is the current port in that VLAN's port membership list?
VLAN-unaware ports will never drop untagged frames because these checks
always succeed by construction, and the tag_8021q VLANs cannot be changed
by the user.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-29 00:54:28 +03:00
|
|
|
int rc;
|
net: dsa: sja1105: save/restore VLANs using a delta commit method
Managing the VLAN table that is present in hardware will become very
difficult once we add a third operating state
(best_effort_vlan_filtering). That is because correct cleanup (not too
little, not too much) becomes virtually impossible, when VLANs can be
added from the bridge layer, from dsa_8021q for basic tagging, for
cross-chip bridging, as well as retagging rules for sub-VLANs and
cross-chip sub-VLANs. So we need to rethink VLAN interaction with the
switch in a more scalable way.
In preparation for that, use the priv->expect_dsa_8021q boolean to
classify any VLAN request received through .port_vlan_add or
.port_vlan_del towards either one of 2 internal lists: bridge VLANs and
dsa_8021q VLANs.
Then, implement a central sja1105_build_vlan_table method that creates a
VLAN configuration from scratch based on the 2 lists of VLANs kept by
the driver, and based on the VLAN awareness state. Currently, if we are
VLAN-unaware, install the dsa_8021q VLANs, otherwise the bridge VLANs.
Then, implement a delta commit procedure that identifies which VLANs
from this new configuration are actually different from the config
previously committed to hardware. We apply the delta through the dynamic
configuration interface (we don't reset the switch). The result is that
the hardware should see the exact sequence of operations as before this
patch.
This also helps remove the "br" argument passed to
dsa_8021q_crosschip_bridge_join, which it was only using to figure out
whether it should commit the configuration back to us or not, based on
the VLAN awareness state of the bridge. We can simplify that, by always
allowing those VLANs inside of our dsa_8021q_vlans list, and committing
those to hardware when necessary.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-05-12 20:20:29 +03:00
|
|
|
|
net: dsa: sja1105: make sure untagged packets are dropped on ingress ports with no pvid
Surprisingly, this configuration:
ip link add br0 type bridge vlan_filtering 1
ip link set swp2 master br0
bridge vlan del dev swp2 vid 1
still has the sja1105 switch sending untagged packets to the CPU (and
failing to decode them, since dsa_find_designated_bridge_port_by_vid
searches by VID 1 and rightfully finds no bridge VLAN 1 on a port).
Dumping the switch configuration, the VLANs are managed properly:
- the pvid of swp2 is 1 in the MAC Configuration Table, but
- only the CPU port is in the port membership of VLANID 1 in the VLAN
Lookup Table
When the ingress packets are tagged with VID 1, they are properly
dropped. But when they are untagged, they are able to reach the CPU
port. Also, when the pvid in the MAC Configuration Table is changed to
e.g. 55 (an unused VLAN), the untagged packets are also dropped.
So it looks like:
- the switch bypasses ingress VLAN membership checks for untagged traffic
- the reason why the untagged traffic is dropped when I make the pvid 55
is due to the lack of valid destination ports in VLAN 55, rather than
an ingress membership violation
- the ingress VLAN membership cheks are only done for VLAN-tagged traffic
Interesting. It looks like there is an explicit bit to drop untagged
traffic, so we should probably be using that to preserve user expectations.
Note that only VLAN-aware ports should drop untagged packets due to no
pvid - when VLAN-unaware, the software bridge doesn't do this even if
there is no pvid on any bridge port and on the bridge itself. So the new
sja1105_drop_untagged() function cannot simply be called with "false"
from sja1105_bridge_vlan_add() and with "true" from sja1105_bridge_vlan_del.
Instead, we need to also consider the VLAN awareness state. That means
we need to hook the "drop untagged" setting in all the same places where
the "commit pvid" logic is, and it needs to factor in all the state when
flipping the "drop untagged" bit: is our current pvid in the VLAN Lookup
Table, and is the current port in that VLAN's port membership list?
VLAN-unaware ports will never drop untagged frames because these checks
always succeed by construction, and the tag_8021q VLANs cannot be changed
by the user.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-29 00:54:28 +03:00
|
|
|
rc = sja1105_vlan_del(priv, port, vlan->vid);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
/* In case the pvid was deleted, make sure that untagged packets will
|
|
|
|
* be dropped.
|
|
|
|
*/
|
|
|
|
return sja1105_commit_pvid(ds, port);
|
2019-05-02 23:23:34 +03:00
|
|
|
}
|
|
|
|
|
net: dsa: tag_8021q: add a context structure
While working on another tag_8021q driver implementation, some things
became apparent:
- It is not mandatory for a DSA driver to offload the tag_8021q VLANs by
using the VLAN table per se. For example, it can add custom TCAM rules
that simply encapsulate RX traffic, and redirect & decapsulate rules
for TX traffic. For such a driver, it makes no sense to receive the
tag_8021q configuration through the same callback as it receives the
VLAN configuration from the bridge and the 8021q modules.
- Currently, sja1105 (the only tag_8021q user) sets a
priv->expect_dsa_8021q variable to distinguish between the bridge
calling, and tag_8021q calling. That can be improved, to say the
least.
- The crosschip bridging operations are, in fact, stateful already. The
list of crosschip_links must be kept by the caller and passed to the
relevant tag_8021q functions.
So it would be nice if the tag_8021q configuration was more
self-contained. This patch attempts to do that.
Create a struct dsa_8021q_context which encapsulates a struct
dsa_switch, and has 2 function pointers for adding and deleting a VLAN.
These will replace the previous channel to the driver, which was through
the .port_vlan_add and .port_vlan_del callbacks of dsa_switch_ops.
Also put the list of crosschip_links into this dsa_8021q_context.
Drivers that don't support cross-chip bridging can simply omit to
initialize this list, as long as they dont call any cross-chip function.
The sja1105_vlan_add and sja1105_vlan_del functions are refactored into
a smaller sja1105_vlan_add_one, which now has 2 entry points:
- sja1105_vlan_add, from struct dsa_switch_ops
- sja1105_dsa_8021q_vlan_add, from the tag_8021q ops
But even this change is fairly trivial. It just reflects the fact that
for sja1105, the VLANs from these 2 channels end up in the same hardware
table. However that is not necessarily true in the general sense (and
that's the reason for making this change).
The rest of the patch is mostly plain refactoring of "ds" -> "ctx". The
dsa_8021q_context structure needs to be propagated because adding a VLAN
is now done through the ops function pointers inside of it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-10 19:48:56 +03:00
|
|
|
static int sja1105_dsa_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid,
|
|
|
|
u16 flags)
|
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
net: dsa: sja1105: prevent tag_8021q VLANs from being received on user ports
Currently it is possible for an attacker to craft packets with a fake
DSA tag and send them to us, and our user ports will accept them and
preserve that VLAN when transmitting towards the CPU. Then the tagger
will be misled into thinking that the packets came on a different port
than they really came on.
Up until recently there wasn't a good option to prevent this from
happening. In SJA1105P and later, the MAC Configuration Table introduced
two options called:
- DRPSITAG: Drop Single Inner Tagged Frames
- DRPSOTAG: Drop Single Outer Tagged Frames
Because the sja1105 driver classifies all VLANs as "outer VLANs" (S-Tags),
it would be in principle possible to enable the DRPSOTAG bit on ports
using tag_8021q, and drop on ingress all packets which have a VLAN tag.
When the switch is VLAN-unaware, this works, because it uses a custom
TPID of 0xdadb, so any "tagged" packets received on a user port are
probably a spoofing attempt. But when the switch overall is VLAN-aware,
and some ports are standalone (therefore they use tag_8021q), the TPID
is 0x8100, and the port can receive a mix of untagged and VLAN-tagged
packets. The untagged ones will be classified to the tag_8021q pvid, and
the tagged ones to the VLAN ID from the packet header. Yes, it is true
that since commit 4fbc08bd3665 ("net: dsa: sja1105: deny 8021q uppers on
ports") we no longer support this mixed mode, but that is a temporary
limitation which will eventually be lifted. It would be nice to not
introduce one more restriction via DRPSOTAG, which would make the
standalone ports of a VLAN-aware switch drop genuinely VLAN-tagged
packets.
Also, the DRPSOTAG bit is not available on the first generation of
switches (SJA1105E, SJA1105T). So since one of the key features of this
driver is compatibility across switch generations, this makes it an even
less desirable approach.
The breakthrough comes from commit bef0746cf4cc ("net: dsa: sja1105:
make sure untagged packets are dropped on ingress ports with no pvid"),
where it became obvious that untagged packets are not dropped even if
the ingress port is not in the VMEMB_PORT vector of that port's pvid.
However, VLAN-tagged packets are subject to VLAN ingress
checking/dropping. This means that instead of using the catch-all
DRPSOTAG bit introduced in SJA1105P, we can drop tagged packets on a
per-VLAN basis, and this is already compatible with SJA1105E/T.
This patch adds an "allowed_ingress" argument to sja1105_vlan_add(), and
we call it with "false" for tag_8021q VLANs on user ports. The tag_8021q
VLANs still need to be allowed, of course, on ingress to DSA ports and
CPU ports.
We also need to refine the drop_untagged check in sja1105_commit_pvid to
make it not freak out about this new configuration. Currently it will
try to keep the configuration consistent between untagged and pvid-tagged
packets, so if the pvid of a port is 1 but VLAN 1 is not in VMEMB_PORT,
packets tagged with VID 1 will behave the same as untagged packets, and
be dropped. This behavior is what we want for ports under a VLAN-aware
bridge, but for the ports with a tag_8021q pvid, we want untagged
packets to be accepted, but packets tagged with a header recognized by
the switch as a tag_8021q VLAN to be dropped. So only restrict the
drop_untagged check to apply to the bridge_pvid, not to the tag_8021q_pvid.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-24 20:15:00 +03:00
|
|
|
bool allowed_ingress = true;
|
net: dsa: tag_8021q: add a context structure
While working on another tag_8021q driver implementation, some things
became apparent:
- It is not mandatory for a DSA driver to offload the tag_8021q VLANs by
using the VLAN table per se. For example, it can add custom TCAM rules
that simply encapsulate RX traffic, and redirect & decapsulate rules
for TX traffic. For such a driver, it makes no sense to receive the
tag_8021q configuration through the same callback as it receives the
VLAN configuration from the bridge and the 8021q modules.
- Currently, sja1105 (the only tag_8021q user) sets a
priv->expect_dsa_8021q variable to distinguish between the bridge
calling, and tag_8021q calling. That can be improved, to say the
least.
- The crosschip bridging operations are, in fact, stateful already. The
list of crosschip_links must be kept by the caller and passed to the
relevant tag_8021q functions.
So it would be nice if the tag_8021q configuration was more
self-contained. This patch attempts to do that.
Create a struct dsa_8021q_context which encapsulates a struct
dsa_switch, and has 2 function pointers for adding and deleting a VLAN.
These will replace the previous channel to the driver, which was through
the .port_vlan_add and .port_vlan_del callbacks of dsa_switch_ops.
Also put the list of crosschip_links into this dsa_8021q_context.
Drivers that don't support cross-chip bridging can simply omit to
initialize this list, as long as they dont call any cross-chip function.
The sja1105_vlan_add and sja1105_vlan_del functions are refactored into
a smaller sja1105_vlan_add_one, which now has 2 entry points:
- sja1105_vlan_add, from struct dsa_switch_ops
- sja1105_dsa_8021q_vlan_add, from the tag_8021q ops
But even this change is fairly trivial. It just reflects the fact that
for sja1105, the VLANs from these 2 channels end up in the same hardware
table. However that is not necessarily true in the general sense (and
that's the reason for making this change).
The rest of the patch is mostly plain refactoring of "ds" -> "ctx". The
dsa_8021q_context structure needs to be propagated because adding a VLAN
is now done through the ops function pointers inside of it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-10 19:48:56 +03:00
|
|
|
int rc;
|
|
|
|
|
net: dsa: sja1105: prevent tag_8021q VLANs from being received on user ports
Currently it is possible for an attacker to craft packets with a fake
DSA tag and send them to us, and our user ports will accept them and
preserve that VLAN when transmitting towards the CPU. Then the tagger
will be misled into thinking that the packets came on a different port
than they really came on.
Up until recently there wasn't a good option to prevent this from
happening. In SJA1105P and later, the MAC Configuration Table introduced
two options called:
- DRPSITAG: Drop Single Inner Tagged Frames
- DRPSOTAG: Drop Single Outer Tagged Frames
Because the sja1105 driver classifies all VLANs as "outer VLANs" (S-Tags),
it would be in principle possible to enable the DRPSOTAG bit on ports
using tag_8021q, and drop on ingress all packets which have a VLAN tag.
When the switch is VLAN-unaware, this works, because it uses a custom
TPID of 0xdadb, so any "tagged" packets received on a user port are
probably a spoofing attempt. But when the switch overall is VLAN-aware,
and some ports are standalone (therefore they use tag_8021q), the TPID
is 0x8100, and the port can receive a mix of untagged and VLAN-tagged
packets. The untagged ones will be classified to the tag_8021q pvid, and
the tagged ones to the VLAN ID from the packet header. Yes, it is true
that since commit 4fbc08bd3665 ("net: dsa: sja1105: deny 8021q uppers on
ports") we no longer support this mixed mode, but that is a temporary
limitation which will eventually be lifted. It would be nice to not
introduce one more restriction via DRPSOTAG, which would make the
standalone ports of a VLAN-aware switch drop genuinely VLAN-tagged
packets.
Also, the DRPSOTAG bit is not available on the first generation of
switches (SJA1105E, SJA1105T). So since one of the key features of this
driver is compatibility across switch generations, this makes it an even
less desirable approach.
The breakthrough comes from commit bef0746cf4cc ("net: dsa: sja1105:
make sure untagged packets are dropped on ingress ports with no pvid"),
where it became obvious that untagged packets are not dropped even if
the ingress port is not in the VMEMB_PORT vector of that port's pvid.
However, VLAN-tagged packets are subject to VLAN ingress
checking/dropping. This means that instead of using the catch-all
DRPSOTAG bit introduced in SJA1105P, we can drop tagged packets on a
per-VLAN basis, and this is already compatible with SJA1105E/T.
This patch adds an "allowed_ingress" argument to sja1105_vlan_add(), and
we call it with "false" for tag_8021q VLANs on user ports. The tag_8021q
VLANs still need to be allowed, of course, on ingress to DSA ports and
CPU ports.
We also need to refine the drop_untagged check in sja1105_commit_pvid to
make it not freak out about this new configuration. Currently it will
try to keep the configuration consistent between untagged and pvid-tagged
packets, so if the pvid of a port is 1 but VLAN 1 is not in VMEMB_PORT,
packets tagged with VID 1 will behave the same as untagged packets, and
be dropped. This behavior is what we want for ports under a VLAN-aware
bridge, but for the ports with a tag_8021q pvid, we want untagged
packets to be accepted, but packets tagged with a header recognized by
the switch as a tag_8021q VLAN to be dropped. So only restrict the
drop_untagged check to apply to the bridge_pvid, not to the tag_8021q_pvid.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-24 20:15:00 +03:00
|
|
|
/* Prevent attackers from trying to inject a DSA tag from
|
|
|
|
* the outside world.
|
|
|
|
*/
|
|
|
|
if (dsa_is_user_port(ds, port))
|
|
|
|
allowed_ingress = false;
|
|
|
|
|
|
|
|
rc = sja1105_vlan_add(priv, port, vid, flags, allowed_ingress);
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
if (rc)
|
net: dsa: tag_8021q: add a context structure
While working on another tag_8021q driver implementation, some things
became apparent:
- It is not mandatory for a DSA driver to offload the tag_8021q VLANs by
using the VLAN table per se. For example, it can add custom TCAM rules
that simply encapsulate RX traffic, and redirect & decapsulate rules
for TX traffic. For such a driver, it makes no sense to receive the
tag_8021q configuration through the same callback as it receives the
VLAN configuration from the bridge and the 8021q modules.
- Currently, sja1105 (the only tag_8021q user) sets a
priv->expect_dsa_8021q variable to distinguish between the bridge
calling, and tag_8021q calling. That can be improved, to say the
least.
- The crosschip bridging operations are, in fact, stateful already. The
list of crosschip_links must be kept by the caller and passed to the
relevant tag_8021q functions.
So it would be nice if the tag_8021q configuration was more
self-contained. This patch attempts to do that.
Create a struct dsa_8021q_context which encapsulates a struct
dsa_switch, and has 2 function pointers for adding and deleting a VLAN.
These will replace the previous channel to the driver, which was through
the .port_vlan_add and .port_vlan_del callbacks of dsa_switch_ops.
Also put the list of crosschip_links into this dsa_8021q_context.
Drivers that don't support cross-chip bridging can simply omit to
initialize this list, as long as they dont call any cross-chip function.
The sja1105_vlan_add and sja1105_vlan_del functions are refactored into
a smaller sja1105_vlan_add_one, which now has 2 entry points:
- sja1105_vlan_add, from struct dsa_switch_ops
- sja1105_dsa_8021q_vlan_add, from the tag_8021q ops
But even this change is fairly trivial. It just reflects the fact that
for sja1105, the VLANs from these 2 channels end up in the same hardware
table. However that is not necessarily true in the general sense (and
that's the reason for making this change).
The rest of the patch is mostly plain refactoring of "ds" -> "ctx". The
dsa_8021q_context structure needs to be propagated because adding a VLAN
is now done through the ops function pointers inside of it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-10 19:48:56 +03:00
|
|
|
return rc;
|
|
|
|
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
if (flags & BRIDGE_VLAN_INFO_PVID)
|
|
|
|
priv->tag_8021q_pvid[port] = vid;
|
|
|
|
|
|
|
|
return sja1105_commit_pvid(ds, port);
|
net: dsa: tag_8021q: add a context structure
While working on another tag_8021q driver implementation, some things
became apparent:
- It is not mandatory for a DSA driver to offload the tag_8021q VLANs by
using the VLAN table per se. For example, it can add custom TCAM rules
that simply encapsulate RX traffic, and redirect & decapsulate rules
for TX traffic. For such a driver, it makes no sense to receive the
tag_8021q configuration through the same callback as it receives the
VLAN configuration from the bridge and the 8021q modules.
- Currently, sja1105 (the only tag_8021q user) sets a
priv->expect_dsa_8021q variable to distinguish between the bridge
calling, and tag_8021q calling. That can be improved, to say the
least.
- The crosschip bridging operations are, in fact, stateful already. The
list of crosschip_links must be kept by the caller and passed to the
relevant tag_8021q functions.
So it would be nice if the tag_8021q configuration was more
self-contained. This patch attempts to do that.
Create a struct dsa_8021q_context which encapsulates a struct
dsa_switch, and has 2 function pointers for adding and deleting a VLAN.
These will replace the previous channel to the driver, which was through
the .port_vlan_add and .port_vlan_del callbacks of dsa_switch_ops.
Also put the list of crosschip_links into this dsa_8021q_context.
Drivers that don't support cross-chip bridging can simply omit to
initialize this list, as long as they dont call any cross-chip function.
The sja1105_vlan_add and sja1105_vlan_del functions are refactored into
a smaller sja1105_vlan_add_one, which now has 2 entry points:
- sja1105_vlan_add, from struct dsa_switch_ops
- sja1105_dsa_8021q_vlan_add, from the tag_8021q ops
But even this change is fairly trivial. It just reflects the fact that
for sja1105, the VLANs from these 2 channels end up in the same hardware
table. However that is not necessarily true in the general sense (and
that's the reason for making this change).
The rest of the patch is mostly plain refactoring of "ds" -> "ctx". The
dsa_8021q_context structure needs to be propagated because adding a VLAN
is now done through the ops function pointers inside of it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-10 19:48:56 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_dsa_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid)
|
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
return sja1105_vlan_del(priv, port, vid);
|
net: dsa: tag_8021q: add a context structure
While working on another tag_8021q driver implementation, some things
became apparent:
- It is not mandatory for a DSA driver to offload the tag_8021q VLANs by
using the VLAN table per se. For example, it can add custom TCAM rules
that simply encapsulate RX traffic, and redirect & decapsulate rules
for TX traffic. For such a driver, it makes no sense to receive the
tag_8021q configuration through the same callback as it receives the
VLAN configuration from the bridge and the 8021q modules.
- Currently, sja1105 (the only tag_8021q user) sets a
priv->expect_dsa_8021q variable to distinguish between the bridge
calling, and tag_8021q calling. That can be improved, to say the
least.
- The crosschip bridging operations are, in fact, stateful already. The
list of crosschip_links must be kept by the caller and passed to the
relevant tag_8021q functions.
So it would be nice if the tag_8021q configuration was more
self-contained. This patch attempts to do that.
Create a struct dsa_8021q_context which encapsulates a struct
dsa_switch, and has 2 function pointers for adding and deleting a VLAN.
These will replace the previous channel to the driver, which was through
the .port_vlan_add and .port_vlan_del callbacks of dsa_switch_ops.
Also put the list of crosschip_links into this dsa_8021q_context.
Drivers that don't support cross-chip bridging can simply omit to
initialize this list, as long as they dont call any cross-chip function.
The sja1105_vlan_add and sja1105_vlan_del functions are refactored into
a smaller sja1105_vlan_add_one, which now has 2 entry points:
- sja1105_vlan_add, from struct dsa_switch_ops
- sja1105_dsa_8021q_vlan_add, from the tag_8021q ops
But even this change is fairly trivial. It just reflects the fact that
for sja1105, the VLANs from these 2 channels end up in the same hardware
table. However that is not necessarily true in the general sense (and
that's the reason for making this change).
The rest of the patch is mostly plain refactoring of "ds" -> "ctx". The
dsa_8021q_context structure needs to be propagated because adding a VLAN
is now done through the ops function pointers inside of it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-10 19:48:56 +03:00
|
|
|
}
|
|
|
|
|
2021-07-26 19:55:32 +03:00
|
|
|
static int sja1105_prechangeupper(struct dsa_switch *ds, int port,
|
|
|
|
struct netdev_notifier_changeupper_info *info)
|
|
|
|
{
|
|
|
|
struct netlink_ext_ack *extack = info->info.extack;
|
|
|
|
struct net_device *upper = info->upper_dev;
|
2021-07-26 19:55:33 +03:00
|
|
|
struct dsa_switch_tree *dst = ds->dst;
|
|
|
|
struct dsa_port *dp;
|
2021-07-26 19:55:32 +03:00
|
|
|
|
|
|
|
if (is_vlan_dev(upper)) {
|
|
|
|
NL_SET_ERR_MSG_MOD(extack, "8021q uppers are not supported");
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
|
2021-07-26 19:55:33 +03:00
|
|
|
if (netif_is_bridge_master(upper)) {
|
|
|
|
list_for_each_entry(dp, &dst->ports, list) {
|
2021-12-06 18:57:53 +02:00
|
|
|
struct net_device *br = dsa_port_bridge_dev_get(dp);
|
|
|
|
|
|
|
|
if (br && br != upper && br_vlan_enabled(br)) {
|
2021-07-26 19:55:33 +03:00
|
|
|
NL_SET_ERR_MSG_MOD(extack,
|
|
|
|
"Only one VLAN-aware bridge is supported");
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-26 19:55:32 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-05-05 13:19:27 +03:00
|
|
|
static int sja1105_mgmt_xmit(struct dsa_switch *ds, int port, int slot,
|
2019-06-08 15:04:35 +03:00
|
|
|
struct sk_buff *skb, bool takets)
|
2019-05-05 13:19:27 +03:00
|
|
|
{
|
|
|
|
struct sja1105_mgmt_entry mgmt_route = {0};
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
struct ethhdr *hdr;
|
|
|
|
int timeout = 10;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
hdr = eth_hdr(skb);
|
|
|
|
|
|
|
|
mgmt_route.macaddr = ether_addr_to_u64(hdr->h_dest);
|
|
|
|
mgmt_route.destports = BIT(port);
|
|
|
|
mgmt_route.enfport = 1;
|
2019-06-08 15:04:35 +03:00
|
|
|
mgmt_route.tsreg = 0;
|
|
|
|
mgmt_route.takets = takets;
|
2019-05-05 13:19:27 +03:00
|
|
|
|
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_MGMT_ROUTE,
|
|
|
|
slot, &mgmt_route, true);
|
|
|
|
if (rc < 0) {
|
|
|
|
kfree_skb(skb);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Transfer skb to the host port. */
|
2023-10-23 11:17:28 -07:00
|
|
|
dsa_enqueue_skb(skb, dsa_to_port(ds, port)->user);
|
2019-05-05 13:19:27 +03:00
|
|
|
|
|
|
|
/* Wait until the switch has processed the frame */
|
|
|
|
do {
|
|
|
|
rc = sja1105_dynamic_config_read(priv, BLK_IDX_MGMT_ROUTE,
|
|
|
|
slot, &mgmt_route);
|
|
|
|
if (rc < 0) {
|
|
|
|
dev_err_ratelimited(priv->ds->dev,
|
|
|
|
"failed to poll for mgmt route\n");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* UM10944: The ENFPORT flag of the respective entry is
|
|
|
|
* cleared when a match is found. The host can use this
|
|
|
|
* flag as an acknowledgment.
|
|
|
|
*/
|
|
|
|
cpu_relax();
|
|
|
|
} while (mgmt_route.enfport && --timeout);
|
|
|
|
|
|
|
|
if (!timeout) {
|
|
|
|
/* Clean up the management route so that a follow-up
|
|
|
|
* frame may not match on it by mistake.
|
2019-06-03 00:15:33 +03:00
|
|
|
* This is only hardware supported on P/Q/R/S - on E/T it is
|
|
|
|
* a no-op and we are silently discarding the -EOPNOTSUPP.
|
2019-05-05 13:19:27 +03:00
|
|
|
*/
|
|
|
|
sja1105_dynamic_config_write(priv, BLK_IDX_MGMT_ROUTE,
|
|
|
|
slot, &mgmt_route, false);
|
|
|
|
dev_err_ratelimited(priv->ds->dev, "xmit timed out\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
return NETDEV_TX_OK;
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: bring deferred xmit implementation in line with ocelot-8021q
When the ocelot-8021q driver was converted to deferred xmit as part of
commit 8d5f7954b7c8 ("net: dsa: felix: break at first CPU port during
init and teardown"), the deferred implementation was deliberately made
subtly different from what sja1105 has.
The implementation differences lied on the following observations:
- There might be a race between these two lines in tag_sja1105.c:
skb_queue_tail(&sp->xmit_queue, skb_get(skb));
kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
and the skb dequeue logic in sja1105_port_deferred_xmit(). For
example, the xmit_work might be already queued, however the work item
has just finished walking through the skb queue. Because we don't
check the return code from kthread_queue_work, we don't do anything if
the work item is already queued.
However, nobody will take that skb and send it, at least until the
next timestampable skb is sent. This creates additional (and
avoidable) TX timestamping latency.
To close that race, what the ocelot-8021q driver does is it doesn't
keep a single work item per port, and a skb timestamping queue, but
rather dynamically allocates a work item per packet.
- It is also unnecessary to have more than one kthread that does the
work. So delete the per-port kthread allocations and replace them with
a single kthread which is global to the switch.
This change brings the two implementations in line by applying those
observations to the sja1105 driver as well.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-12-10 01:34:40 +02:00
|
|
|
#define work_to_xmit_work(w) \
|
|
|
|
container_of((w), struct sja1105_deferred_xmit_work, work)
|
net: dsa: Make deferred_xmit private to sja1105
There are 3 things that are wrong with the DSA deferred xmit mechanism:
1. Its introduction has made the DSA hotpath ever so slightly more
inefficient for everybody, since DSA_SKB_CB(skb)->deferred_xmit needs
to be initialized to false for every transmitted frame, in order to
figure out whether the driver requested deferral or not (a very rare
occasion, rare even for the only driver that does use this mechanism:
sja1105). That was necessary to avoid kfree_skb from freeing the skb.
2. Because L2 PTP is a link-local protocol like STP, it requires
management routes and deferred xmit with this switch. But as opposed
to STP, the deferred work mechanism needs to schedule the packet
rather quickly for the TX timstamp to be collected in time and sent
to user space. But there is no provision for controlling the
scheduling priority of this deferred xmit workqueue. Too bad this is
a rather specific requirement for a feature that nobody else uses
(more below).
3. Perhaps most importantly, it makes the DSA core adhere a bit too
much to the NXP company-wide policy "Innovate Where It Doesn't
Matter". The sja1105 is probably the only DSA switch that requires
some frames sent from the CPU to be routed to the slave port via an
out-of-band configuration (register write) rather than in-band (DSA
tag). And there are indeed very good reasons to not want to do that:
if that out-of-band register is at the other end of a slow bus such
as SPI, then you limit that Ethernet flow's throughput to effectively
the throughput of the SPI bus. So hardware vendors should definitely
not be encouraged to design this way. We do _not_ want more
widespread use of this mechanism.
Luckily we have a solution for each of the 3 issues:
For 1, we can just remove that variable in the skb->cb and counteract
the effect of kfree_skb with skb_get, much to the same effect. The
advantage, of course, being that anybody who doesn't use deferred xmit
doesn't need to do any extra operation in the hotpath.
For 2, we can create a kernel thread for each port's deferred xmit work.
If the user switch ports are named swp0, swp1, swp2, the kernel threads
will be named swp0_xmit, swp1_xmit, swp2_xmit (there appears to be a 15
character length limit on kernel thread names). With this, the user can
change the scheduling priority with chrt $(pidof swp2_xmit).
For 3, we can actually move the entire implementation to the sja1105
driver.
So this patch deletes the generic implementation from the DSA core and
adds a new one, more adequate to the requirements of PTP TX
timestamping, in sja1105_main.c.
Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-04 02:37:10 +02:00
|
|
|
|
2019-05-05 13:19:27 +03:00
|
|
|
/* Deferred work is unfortunately necessary because setting up the management
|
|
|
|
* route cannot be done from atomit context (SPI transfer takes a sleepable
|
|
|
|
* lock on the bus)
|
|
|
|
*/
|
net: dsa: Make deferred_xmit private to sja1105
There are 3 things that are wrong with the DSA deferred xmit mechanism:
1. Its introduction has made the DSA hotpath ever so slightly more
inefficient for everybody, since DSA_SKB_CB(skb)->deferred_xmit needs
to be initialized to false for every transmitted frame, in order to
figure out whether the driver requested deferral or not (a very rare
occasion, rare even for the only driver that does use this mechanism:
sja1105). That was necessary to avoid kfree_skb from freeing the skb.
2. Because L2 PTP is a link-local protocol like STP, it requires
management routes and deferred xmit with this switch. But as opposed
to STP, the deferred work mechanism needs to schedule the packet
rather quickly for the TX timstamp to be collected in time and sent
to user space. But there is no provision for controlling the
scheduling priority of this deferred xmit workqueue. Too bad this is
a rather specific requirement for a feature that nobody else uses
(more below).
3. Perhaps most importantly, it makes the DSA core adhere a bit too
much to the NXP company-wide policy "Innovate Where It Doesn't
Matter". The sja1105 is probably the only DSA switch that requires
some frames sent from the CPU to be routed to the slave port via an
out-of-band configuration (register write) rather than in-band (DSA
tag). And there are indeed very good reasons to not want to do that:
if that out-of-band register is at the other end of a slow bus such
as SPI, then you limit that Ethernet flow's throughput to effectively
the throughput of the SPI bus. So hardware vendors should definitely
not be encouraged to design this way. We do _not_ want more
widespread use of this mechanism.
Luckily we have a solution for each of the 3 issues:
For 1, we can just remove that variable in the skb->cb and counteract
the effect of kfree_skb with skb_get, much to the same effect. The
advantage, of course, being that anybody who doesn't use deferred xmit
doesn't need to do any extra operation in the hotpath.
For 2, we can create a kernel thread for each port's deferred xmit work.
If the user switch ports are named swp0, swp1, swp2, the kernel threads
will be named swp0_xmit, swp1_xmit, swp2_xmit (there appears to be a 15
character length limit on kernel thread names). With this, the user can
change the scheduling priority with chrt $(pidof swp2_xmit).
For 3, we can actually move the entire implementation to the sja1105
driver.
So this patch deletes the generic implementation from the DSA core and
adds a new one, more adequate to the requirements of PTP TX
timestamping, in sja1105_main.c.
Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-04 02:37:10 +02:00
|
|
|
static void sja1105_port_deferred_xmit(struct kthread_work *work)
|
2019-05-05 13:19:27 +03:00
|
|
|
{
|
net: dsa: sja1105: bring deferred xmit implementation in line with ocelot-8021q
When the ocelot-8021q driver was converted to deferred xmit as part of
commit 8d5f7954b7c8 ("net: dsa: felix: break at first CPU port during
init and teardown"), the deferred implementation was deliberately made
subtly different from what sja1105 has.
The implementation differences lied on the following observations:
- There might be a race between these two lines in tag_sja1105.c:
skb_queue_tail(&sp->xmit_queue, skb_get(skb));
kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
and the skb dequeue logic in sja1105_port_deferred_xmit(). For
example, the xmit_work might be already queued, however the work item
has just finished walking through the skb queue. Because we don't
check the return code from kthread_queue_work, we don't do anything if
the work item is already queued.
However, nobody will take that skb and send it, at least until the
next timestampable skb is sent. This creates additional (and
avoidable) TX timestamping latency.
To close that race, what the ocelot-8021q driver does is it doesn't
keep a single work item per port, and a skb timestamping queue, but
rather dynamically allocates a work item per packet.
- It is also unnecessary to have more than one kthread that does the
work. So delete the per-port kthread allocations and replace them with
a single kthread which is global to the switch.
This change brings the two implementations in line by applying those
observations to the sja1105 driver as well.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-12-10 01:34:40 +02:00
|
|
|
struct sja1105_deferred_xmit_work *xmit_work = work_to_xmit_work(work);
|
|
|
|
struct sk_buff *clone, *skb = xmit_work->skb;
|
|
|
|
struct dsa_switch *ds = xmit_work->dp->ds;
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
int port = xmit_work->dp->index;
|
2019-05-05 13:19:27 +03:00
|
|
|
|
net: dsa: sja1105: bring deferred xmit implementation in line with ocelot-8021q
When the ocelot-8021q driver was converted to deferred xmit as part of
commit 8d5f7954b7c8 ("net: dsa: felix: break at first CPU port during
init and teardown"), the deferred implementation was deliberately made
subtly different from what sja1105 has.
The implementation differences lied on the following observations:
- There might be a race between these two lines in tag_sja1105.c:
skb_queue_tail(&sp->xmit_queue, skb_get(skb));
kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
and the skb dequeue logic in sja1105_port_deferred_xmit(). For
example, the xmit_work might be already queued, however the work item
has just finished walking through the skb queue. Because we don't
check the return code from kthread_queue_work, we don't do anything if
the work item is already queued.
However, nobody will take that skb and send it, at least until the
next timestampable skb is sent. This creates additional (and
avoidable) TX timestamping latency.
To close that race, what the ocelot-8021q driver does is it doesn't
keep a single work item per port, and a skb timestamping queue, but
rather dynamically allocates a work item per packet.
- It is also unnecessary to have more than one kthread that does the
work. So delete the per-port kthread allocations and replace them with
a single kthread which is global to the switch.
This change brings the two implementations in line by applying those
observations to the sja1105 driver as well.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-12-10 01:34:40 +02:00
|
|
|
clone = SJA1105_SKB_CB(skb)->clone;
|
2019-06-08 15:04:35 +03:00
|
|
|
|
net: dsa: sja1105: bring deferred xmit implementation in line with ocelot-8021q
When the ocelot-8021q driver was converted to deferred xmit as part of
commit 8d5f7954b7c8 ("net: dsa: felix: break at first CPU port during
init and teardown"), the deferred implementation was deliberately made
subtly different from what sja1105 has.
The implementation differences lied on the following observations:
- There might be a race between these two lines in tag_sja1105.c:
skb_queue_tail(&sp->xmit_queue, skb_get(skb));
kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
and the skb dequeue logic in sja1105_port_deferred_xmit(). For
example, the xmit_work might be already queued, however the work item
has just finished walking through the skb queue. Because we don't
check the return code from kthread_queue_work, we don't do anything if
the work item is already queued.
However, nobody will take that skb and send it, at least until the
next timestampable skb is sent. This creates additional (and
avoidable) TX timestamping latency.
To close that race, what the ocelot-8021q driver does is it doesn't
keep a single work item per port, and a skb timestamping queue, but
rather dynamically allocates a work item per packet.
- It is also unnecessary to have more than one kthread that does the
work. So delete the per-port kthread allocations and replace them with
a single kthread which is global to the switch.
This change brings the two implementations in line by applying those
observations to the sja1105 driver as well.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-12-10 01:34:40 +02:00
|
|
|
mutex_lock(&priv->mgmt_lock);
|
2019-06-08 15:04:35 +03:00
|
|
|
|
net: dsa: sja1105: bring deferred xmit implementation in line with ocelot-8021q
When the ocelot-8021q driver was converted to deferred xmit as part of
commit 8d5f7954b7c8 ("net: dsa: felix: break at first CPU port during
init and teardown"), the deferred implementation was deliberately made
subtly different from what sja1105 has.
The implementation differences lied on the following observations:
- There might be a race between these two lines in tag_sja1105.c:
skb_queue_tail(&sp->xmit_queue, skb_get(skb));
kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
and the skb dequeue logic in sja1105_port_deferred_xmit(). For
example, the xmit_work might be already queued, however the work item
has just finished walking through the skb queue. Because we don't
check the return code from kthread_queue_work, we don't do anything if
the work item is already queued.
However, nobody will take that skb and send it, at least until the
next timestampable skb is sent. This creates additional (and
avoidable) TX timestamping latency.
To close that race, what the ocelot-8021q driver does is it doesn't
keep a single work item per port, and a skb timestamping queue, but
rather dynamically allocates a work item per packet.
- It is also unnecessary to have more than one kthread that does the
work. So delete the per-port kthread allocations and replace them with
a single kthread which is global to the switch.
This change brings the two implementations in line by applying those
observations to the sja1105 driver as well.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-12-10 01:34:40 +02:00
|
|
|
sja1105_mgmt_xmit(ds, port, 0, skb, !!clone);
|
2019-06-08 15:04:35 +03:00
|
|
|
|
net: dsa: sja1105: bring deferred xmit implementation in line with ocelot-8021q
When the ocelot-8021q driver was converted to deferred xmit as part of
commit 8d5f7954b7c8 ("net: dsa: felix: break at first CPU port during
init and teardown"), the deferred implementation was deliberately made
subtly different from what sja1105 has.
The implementation differences lied on the following observations:
- There might be a race between these two lines in tag_sja1105.c:
skb_queue_tail(&sp->xmit_queue, skb_get(skb));
kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
and the skb dequeue logic in sja1105_port_deferred_xmit(). For
example, the xmit_work might be already queued, however the work item
has just finished walking through the skb queue. Because we don't
check the return code from kthread_queue_work, we don't do anything if
the work item is already queued.
However, nobody will take that skb and send it, at least until the
next timestampable skb is sent. This creates additional (and
avoidable) TX timestamping latency.
To close that race, what the ocelot-8021q driver does is it doesn't
keep a single work item per port, and a skb timestamping queue, but
rather dynamically allocates a work item per packet.
- It is also unnecessary to have more than one kthread that does the
work. So delete the per-port kthread allocations and replace them with
a single kthread which is global to the switch.
This change brings the two implementations in line by applying those
observations to the sja1105 driver as well.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-12-10 01:34:40 +02:00
|
|
|
/* The clone, if there, was made by dsa_skb_tx_timestamp */
|
|
|
|
if (clone)
|
|
|
|
sja1105_ptp_txtstamp_skb(ds, port, clone);
|
2019-06-08 15:04:35 +03:00
|
|
|
|
net: dsa: sja1105: bring deferred xmit implementation in line with ocelot-8021q
When the ocelot-8021q driver was converted to deferred xmit as part of
commit 8d5f7954b7c8 ("net: dsa: felix: break at first CPU port during
init and teardown"), the deferred implementation was deliberately made
subtly different from what sja1105 has.
The implementation differences lied on the following observations:
- There might be a race between these two lines in tag_sja1105.c:
skb_queue_tail(&sp->xmit_queue, skb_get(skb));
kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
and the skb dequeue logic in sja1105_port_deferred_xmit(). For
example, the xmit_work might be already queued, however the work item
has just finished walking through the skb queue. Because we don't
check the return code from kthread_queue_work, we don't do anything if
the work item is already queued.
However, nobody will take that skb and send it, at least until the
next timestampable skb is sent. This creates additional (and
avoidable) TX timestamping latency.
To close that race, what the ocelot-8021q driver does is it doesn't
keep a single work item per port, and a skb timestamping queue, but
rather dynamically allocates a work item per packet.
- It is also unnecessary to have more than one kthread that does the
work. So delete the per-port kthread allocations and replace them with
a single kthread which is global to the switch.
This change brings the two implementations in line by applying those
observations to the sja1105 driver as well.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-12-10 01:34:40 +02:00
|
|
|
mutex_unlock(&priv->mgmt_lock);
|
|
|
|
|
|
|
|
kfree(xmit_work);
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
|
2021-12-10 01:34:44 +02:00
|
|
|
static int sja1105_connect_tag_protocol(struct dsa_switch *ds,
|
|
|
|
enum dsa_tag_protocol proto)
|
|
|
|
{
|
2021-12-14 03:45:35 +02:00
|
|
|
struct sja1105_private *priv = ds->priv;
|
2021-12-10 01:34:44 +02:00
|
|
|
struct sja1105_tagger_data *tagger_data;
|
|
|
|
|
2021-12-14 03:45:35 +02:00
|
|
|
if (proto != priv->info->tag_proto)
|
2021-12-10 01:34:44 +02:00
|
|
|
return -EPROTONOSUPPORT;
|
2021-12-14 03:45:35 +02:00
|
|
|
|
|
|
|
tagger_data = sja1105_tagger_data(ds);
|
|
|
|
tagger_data->xmit_work_fn = sja1105_port_deferred_xmit;
|
|
|
|
tagger_data->meta_tstamp_handler = sja1110_process_meta_tstamp;
|
|
|
|
|
|
|
|
return 0;
|
2021-12-10 01:34:44 +02:00
|
|
|
}
|
|
|
|
|
2019-05-02 23:23:36 +03:00
|
|
|
/* The MAXAGE setting belongs to the L2 Forwarding Parameters table,
|
|
|
|
* which cannot be reconfigured at runtime. So a switch reset is required.
|
|
|
|
*/
|
|
|
|
static int sja1105_set_ageing_time(struct dsa_switch *ds,
|
|
|
|
unsigned int ageing_time)
|
|
|
|
{
|
|
|
|
struct sja1105_l2_lookup_params_entry *l2_lookup_params;
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
struct sja1105_table *table;
|
|
|
|
unsigned int maxage;
|
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP_PARAMS];
|
|
|
|
l2_lookup_params = table->entries;
|
|
|
|
|
|
|
|
maxage = SJA1105_AGEING_TIME_MS(ageing_time);
|
|
|
|
|
|
|
|
if (l2_lookup_params->maxage == maxage)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
l2_lookup_params->maxage = maxage;
|
|
|
|
|
2019-11-12 23:22:00 +02:00
|
|
|
return sja1105_static_config_reload(priv, SJA1105_AGEING_TIME);
|
2019-05-02 23:23:36 +03:00
|
|
|
}
|
|
|
|
|
2020-03-27 21:55:45 +02:00
|
|
|
static int sja1105_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
|
|
|
|
{
|
|
|
|
struct sja1105_l2_policing_entry *policing;
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
|
|
|
|
new_mtu += VLAN_ETH_HLEN + ETH_FCS_LEN;
|
|
|
|
|
2021-08-04 16:54:34 +03:00
|
|
|
if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
|
2020-03-27 21:55:45 +02:00
|
|
|
new_mtu += VLAN_HLEN;
|
|
|
|
|
|
|
|
policing = priv->static_config.tables[BLK_IDX_L2_POLICING].entries;
|
|
|
|
|
2020-03-29 14:52:01 +03:00
|
|
|
if (policing[port].maxlen == new_mtu)
|
2020-03-27 21:55:45 +02:00
|
|
|
return 0;
|
|
|
|
|
2020-03-29 14:52:01 +03:00
|
|
|
policing[port].maxlen = new_mtu;
|
2020-03-27 21:55:45 +02:00
|
|
|
|
|
|
|
return sja1105_static_config_reload(priv, SJA1105_BEST_EFFORT_POLICING);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_get_max_mtu(struct dsa_switch *ds, int port)
|
|
|
|
{
|
|
|
|
return 2043 - VLAN_ETH_HLEN - ETH_FCS_LEN;
|
|
|
|
}
|
|
|
|
|
2019-09-15 05:00:02 +03:00
|
|
|
static int sja1105_port_setup_tc(struct dsa_switch *ds, int port,
|
|
|
|
enum tc_setup_type type,
|
|
|
|
void *type_data)
|
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case TC_SETUP_QDISC_TAPRIO:
|
|
|
|
return sja1105_setup_tc_taprio(ds, port, type_data);
|
2020-05-28 03:27:58 +03:00
|
|
|
case TC_SETUP_QDISC_CBS:
|
|
|
|
return sja1105_setup_tc_cbs(ds, port, type_data);
|
2019-09-15 05:00:02 +03:00
|
|
|
default:
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-04 03:33:47 +03:00
|
|
|
/* We have a single mirror (@to) port, but can configure ingress and egress
|
|
|
|
* mirroring on all other (@from) ports.
|
|
|
|
* We need to allow mirroring rules only as long as the @to port is always the
|
|
|
|
* same, and we need to unset the @to port from mirr_port only when there is no
|
|
|
|
* mirroring rule that references it.
|
|
|
|
*/
|
|
|
|
static int sja1105_mirror_apply(struct sja1105_private *priv, int from, int to,
|
|
|
|
bool ingress, bool enabled)
|
|
|
|
{
|
|
|
|
struct sja1105_general_params_entry *general_params;
|
|
|
|
struct sja1105_mac_config_entry *mac;
|
2021-05-24 16:14:13 +03:00
|
|
|
struct dsa_switch *ds = priv->ds;
|
2019-10-04 03:33:47 +03:00
|
|
|
struct sja1105_table *table;
|
|
|
|
bool already_enabled;
|
|
|
|
u64 new_mirr_port;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
|
|
|
|
general_params = table->entries;
|
|
|
|
|
|
|
|
mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
|
|
|
|
|
2021-05-24 16:14:13 +03:00
|
|
|
already_enabled = (general_params->mirr_port != ds->num_ports);
|
2019-10-04 03:33:47 +03:00
|
|
|
if (already_enabled && enabled && general_params->mirr_port != to) {
|
|
|
|
dev_err(priv->ds->dev,
|
|
|
|
"Delete mirroring rules towards port %llu first\n",
|
|
|
|
general_params->mirr_port);
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
|
|
|
|
new_mirr_port = to;
|
|
|
|
if (!enabled) {
|
|
|
|
bool keep = false;
|
|
|
|
int port;
|
|
|
|
|
|
|
|
/* Anybody still referencing mirr_port? */
|
2021-05-24 16:14:13 +03:00
|
|
|
for (port = 0; port < ds->num_ports; port++) {
|
2019-10-04 03:33:47 +03:00
|
|
|
if (mac[port].ing_mirr || mac[port].egr_mirr) {
|
|
|
|
keep = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Unset already_enabled for next time */
|
|
|
|
if (!keep)
|
2021-05-24 16:14:13 +03:00
|
|
|
new_mirr_port = ds->num_ports;
|
2019-10-04 03:33:47 +03:00
|
|
|
}
|
|
|
|
if (new_mirr_port != general_params->mirr_port) {
|
|
|
|
general_params->mirr_port = new_mirr_port;
|
|
|
|
|
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_GENERAL_PARAMS,
|
|
|
|
0, general_params, true);
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ingress)
|
|
|
|
mac[from].ing_mirr = enabled;
|
|
|
|
else
|
|
|
|
mac[from].egr_mirr = enabled;
|
|
|
|
|
|
|
|
return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, from,
|
|
|
|
&mac[from], true);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_mirror_add(struct dsa_switch *ds, int port,
|
|
|
|
struct dsa_mall_mirror_tc_entry *mirror,
|
2022-03-16 22:41:43 +02:00
|
|
|
bool ingress, struct netlink_ext_ack *extack)
|
2019-10-04 03:33:47 +03:00
|
|
|
{
|
|
|
|
return sja1105_mirror_apply(ds->priv, port, mirror->to_local_port,
|
|
|
|
ingress, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sja1105_mirror_del(struct dsa_switch *ds, int port,
|
|
|
|
struct dsa_mall_mirror_tc_entry *mirror)
|
|
|
|
{
|
|
|
|
sja1105_mirror_apply(ds->priv, port, mirror->to_local_port,
|
|
|
|
mirror->ingress, false);
|
|
|
|
}
|
|
|
|
|
2020-03-29 14:52:01 +03:00
|
|
|
static int sja1105_port_policer_add(struct dsa_switch *ds, int port,
|
|
|
|
struct dsa_mall_policer_tc_entry *policer)
|
|
|
|
{
|
|
|
|
struct sja1105_l2_policing_entry *policing;
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
|
|
|
|
policing = priv->static_config.tables[BLK_IDX_L2_POLICING].entries;
|
|
|
|
|
|
|
|
/* In hardware, every 8 microseconds the credit level is incremented by
|
|
|
|
* the value of RATE bytes divided by 64, up to a maximum of SMAX
|
|
|
|
* bytes.
|
|
|
|
*/
|
|
|
|
policing[port].rate = div_u64(512 * policer->rate_bytes_per_sec,
|
|
|
|
1000000);
|
2020-06-29 14:54:16 +08:00
|
|
|
policing[port].smax = policer->burst;
|
2020-03-29 14:52:01 +03:00
|
|
|
|
|
|
|
return sja1105_static_config_reload(priv, SJA1105_BEST_EFFORT_POLICING);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sja1105_port_policer_del(struct dsa_switch *ds, int port)
|
|
|
|
{
|
|
|
|
struct sja1105_l2_policing_entry *policing;
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
|
|
|
|
policing = priv->static_config.tables[BLK_IDX_L2_POLICING].entries;
|
|
|
|
|
|
|
|
policing[port].rate = SJA1105_RATE_MBPS(1000);
|
|
|
|
policing[port].smax = 65535;
|
|
|
|
|
|
|
|
sja1105_static_config_reload(priv, SJA1105_BEST_EFFORT_POLICING);
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
static int sja1105_port_set_learning(struct sja1105_private *priv, int port,
|
|
|
|
bool enabled)
|
|
|
|
{
|
|
|
|
struct sja1105_mac_config_entry *mac;
|
|
|
|
|
|
|
|
mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
|
|
|
|
|
2021-02-16 13:41:18 +02:00
|
|
|
mac[port].dyn_learn = enabled;
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
|
2021-08-08 17:35:26 +03:00
|
|
|
return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
|
|
|
|
&mac[port], true);
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_port_ucast_bcast_flood(struct sja1105_private *priv, int to,
|
|
|
|
struct switchdev_brport_flags flags)
|
|
|
|
{
|
2021-02-16 13:41:19 +02:00
|
|
|
if (flags.mask & BR_FLOOD) {
|
|
|
|
if (flags.val & BR_FLOOD)
|
|
|
|
priv->ucast_egress_floods |= BIT(to);
|
|
|
|
else
|
2021-03-04 12:56:54 +02:00
|
|
|
priv->ucast_egress_floods &= ~BIT(to);
|
2021-02-16 13:41:19 +02:00
|
|
|
}
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
|
2021-02-16 13:41:19 +02:00
|
|
|
if (flags.mask & BR_BCAST_FLOOD) {
|
|
|
|
if (flags.val & BR_BCAST_FLOOD)
|
|
|
|
priv->bcast_egress_floods |= BIT(to);
|
|
|
|
else
|
2021-03-04 12:56:54 +02:00
|
|
|
priv->bcast_egress_floods &= ~BIT(to);
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
}
|
|
|
|
|
2021-02-16 13:41:19 +02:00
|
|
|
return sja1105_manage_flood_domains(priv);
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_port_mcast_flood(struct sja1105_private *priv, int to,
|
|
|
|
struct switchdev_brport_flags flags,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct sja1105_l2_lookup_entry *l2_lookup;
|
|
|
|
struct sja1105_table *table;
|
2023-09-08 16:33:51 +03:00
|
|
|
int match, rc;
|
|
|
|
|
|
|
|
mutex_lock(&priv->fdb_lock);
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
|
|
|
|
table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP];
|
|
|
|
l2_lookup = table->entries;
|
|
|
|
|
|
|
|
for (match = 0; match < table->entry_count; match++)
|
|
|
|
if (l2_lookup[match].macaddr == SJA1105_UNKNOWN_MULTICAST &&
|
|
|
|
l2_lookup[match].mask_macaddr == SJA1105_UNKNOWN_MULTICAST)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (match == table->entry_count) {
|
|
|
|
NL_SET_ERR_MSG_MOD(extack,
|
|
|
|
"Could not find FDB entry for unknown multicast");
|
2023-09-08 16:33:51 +03:00
|
|
|
rc = -ENOSPC;
|
|
|
|
goto out;
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (flags.val & BR_MCAST_FLOOD)
|
|
|
|
l2_lookup[match].destports |= BIT(to);
|
|
|
|
else
|
|
|
|
l2_lookup[match].destports &= ~BIT(to);
|
|
|
|
|
2023-09-08 16:33:51 +03:00
|
|
|
rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
|
|
|
|
l2_lookup[match].index,
|
|
|
|
&l2_lookup[match], true);
|
|
|
|
out:
|
|
|
|
mutex_unlock(&priv->fdb_lock);
|
|
|
|
|
|
|
|
return rc;
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_port_pre_bridge_flags(struct dsa_switch *ds, int port,
|
|
|
|
struct switchdev_brport_flags flags,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
|
|
|
|
if (flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
|
|
|
|
BR_BCAST_FLOOD))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (flags.mask & (BR_FLOOD | BR_MCAST_FLOOD) &&
|
|
|
|
!priv->info->can_limit_mcast_flood) {
|
|
|
|
bool multicast = !!(flags.val & BR_MCAST_FLOOD);
|
|
|
|
bool unicast = !!(flags.val & BR_FLOOD);
|
|
|
|
|
|
|
|
if (unicast != multicast) {
|
|
|
|
NL_SET_ERR_MSG_MOD(extack,
|
|
|
|
"This chip cannot configure multicast flooding independently of unicast");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_port_bridge_flags(struct dsa_switch *ds, int port,
|
|
|
|
struct switchdev_brport_flags flags,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
if (flags.mask & BR_LEARNING) {
|
|
|
|
bool learn_ena = !!(flags.val & BR_LEARNING);
|
|
|
|
|
|
|
|
rc = sja1105_port_set_learning(priv, port, learn_ena);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flags.mask & (BR_FLOOD | BR_BCAST_FLOOD)) {
|
|
|
|
rc = sja1105_port_ucast_bcast_flood(priv, port, flags);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* For chips that can't offload BR_MCAST_FLOOD independently, there
|
|
|
|
* is nothing to do here, we ensured the configuration is in sync by
|
|
|
|
* offloading BR_FLOOD.
|
|
|
|
*/
|
|
|
|
if (flags.mask & BR_MCAST_FLOOD && priv->info->can_limit_mcast_flood) {
|
|
|
|
rc = sja1105_port_mcast_flood(priv, port, flags,
|
|
|
|
extack);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: reorganize probe, remove, setup and teardown ordering
The sja1105 driver's initialization and teardown sequence is a chaotic
mess that has gathered a lot of cruft over time. It works because there
is no strict dependency between the functions, but it could be improved.
The basic principle that teardown should be the exact reverse of setup
is obviously not held. We have initialization steps (sja1105_tas_setup,
sja1105_flower_setup) in the probe method that are torn down in the DSA
.teardown method instead of driver unbind time.
We also have code after the dsa_register_switch() call, which implicitly
means after the .setup() method has finished, which is pretty unusual.
Also, sja1105_teardown() has calls set up in a different order than the
error path of sja1105_setup(): see the reversed ordering between
sja1105_ptp_clock_unregister and sja1105_mdiobus_unregister.
Also, sja1105_static_config_load() is called towards the end of
sja1105_setup(), but sja1105_static_config_free() is also towards the
end of the error path and teardown path. The static_config_load() call
should be earlier.
Also, making and breaking the connections between struct sja1105_port
and struct dsa_port could be refactored into dedicated functions, makes
the code easier to follow.
We move some code from the DSA .setup() method into the probe method,
like the device tree parsing, and we move some code from the probe
method into the DSA .setup() method to be symmetric with its placement
in the DSA .teardown() method, which is nice because the unbind function
has a single call to dsa_unregister_switch(). Example of the latter type
of code movement are the connections between ports mentioned above, they
are now in the .setup() method.
Finally, due to fact that the kthread_init_worker() call is no longer
in sja1105_probe() - located towards the bottom of the file - but in
sja1105_setup() - located much higher - there is an inverse ordering
with the worker function declaration, sja1105_port_deferred_xmit. To
avoid that, the entire sja1105_setup() and sja1105_teardown() functions
are moved towards the bottom of the file.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 15:00:35 +03:00
|
|
|
/* The programming model for the SJA1105 switch is "all-at-once" via static
|
|
|
|
* configuration tables. Some of these can be dynamically modified at runtime,
|
|
|
|
* but not the xMII mode parameters table.
|
|
|
|
* Furthermode, some PHYs may not have crystals for generating their clocks
|
|
|
|
* (e.g. RMII). Instead, their 50MHz clock is supplied via the SJA1105 port's
|
|
|
|
* ref_clk pin. So port clocking needs to be initialized early, before
|
|
|
|
* connecting to PHYs is attempted, otherwise they won't respond through MDIO.
|
|
|
|
* Setting correct PHY link speed does not matter now.
|
2023-10-23 11:17:28 -07:00
|
|
|
* But dsa_user_phy_setup is called later than sja1105_setup, so the PHY
|
net: dsa: sja1105: reorganize probe, remove, setup and teardown ordering
The sja1105 driver's initialization and teardown sequence is a chaotic
mess that has gathered a lot of cruft over time. It works because there
is no strict dependency between the functions, but it could be improved.
The basic principle that teardown should be the exact reverse of setup
is obviously not held. We have initialization steps (sja1105_tas_setup,
sja1105_flower_setup) in the probe method that are torn down in the DSA
.teardown method instead of driver unbind time.
We also have code after the dsa_register_switch() call, which implicitly
means after the .setup() method has finished, which is pretty unusual.
Also, sja1105_teardown() has calls set up in a different order than the
error path of sja1105_setup(): see the reversed ordering between
sja1105_ptp_clock_unregister and sja1105_mdiobus_unregister.
Also, sja1105_static_config_load() is called towards the end of
sja1105_setup(), but sja1105_static_config_free() is also towards the
end of the error path and teardown path. The static_config_load() call
should be earlier.
Also, making and breaking the connections between struct sja1105_port
and struct dsa_port could be refactored into dedicated functions, makes
the code easier to follow.
We move some code from the DSA .setup() method into the probe method,
like the device tree parsing, and we move some code from the probe
method into the DSA .setup() method to be symmetric with its placement
in the DSA .teardown() method, which is nice because the unbind function
has a single call to dsa_unregister_switch(). Example of the latter type
of code movement are the connections between ports mentioned above, they
are now in the .setup() method.
Finally, due to fact that the kthread_init_worker() call is no longer
in sja1105_probe() - located towards the bottom of the file - but in
sja1105_setup() - located much higher - there is an inverse ordering
with the worker function declaration, sja1105_port_deferred_xmit. To
avoid that, the entire sja1105_setup() and sja1105_teardown() functions
are moved towards the bottom of the file.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 15:00:35 +03:00
|
|
|
* bindings are not yet parsed by DSA core. We need to parse early so that we
|
|
|
|
* can populate the xMII mode parameters table.
|
|
|
|
*/
|
|
|
|
static int sja1105_setup(struct dsa_switch *ds)
|
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
if (priv->info->disable_microcontroller) {
|
|
|
|
rc = priv->info->disable_microcontroller(priv);
|
|
|
|
if (rc < 0) {
|
|
|
|
dev_err(ds->dev,
|
|
|
|
"Failed to disable microcontroller: %pe\n",
|
|
|
|
ERR_PTR(rc));
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create and send configuration down to device */
|
|
|
|
rc = sja1105_static_config_load(priv);
|
|
|
|
if (rc < 0) {
|
|
|
|
dev_err(ds->dev, "Failed to load static config: %d\n", rc);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Configure the CGU (PHY link modes and speeds) */
|
|
|
|
if (priv->info->clocking_setup) {
|
|
|
|
rc = priv->info->clocking_setup(priv);
|
|
|
|
if (rc < 0) {
|
|
|
|
dev_err(ds->dev,
|
|
|
|
"Failed to configure MII clocking: %pe\n",
|
|
|
|
ERR_PTR(rc));
|
|
|
|
goto out_static_config_free;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
sja1105_tas_setup(ds);
|
|
|
|
sja1105_flower_setup(ds);
|
|
|
|
|
|
|
|
rc = sja1105_ptp_clock_register(ds);
|
|
|
|
if (rc < 0) {
|
|
|
|
dev_err(ds->dev, "Failed to register PTP clock: %d\n", rc);
|
|
|
|
goto out_flower_teardown;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = sja1105_mdiobus_register(ds);
|
|
|
|
if (rc < 0) {
|
|
|
|
dev_err(ds->dev, "Failed to register MDIO bus: %pe\n",
|
|
|
|
ERR_PTR(rc));
|
|
|
|
goto out_ptp_clock_unregister;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = sja1105_devlink_setup(ds);
|
|
|
|
if (rc < 0)
|
|
|
|
goto out_mdiobus_unregister;
|
|
|
|
|
|
|
|
rtnl_lock();
|
|
|
|
rc = dsa_tag_8021q_register(ds, htons(ETH_P_8021Q));
|
|
|
|
rtnl_unlock();
|
|
|
|
if (rc)
|
|
|
|
goto out_devlink_teardown;
|
|
|
|
|
|
|
|
/* On SJA1105, VLAN filtering per se is always enabled in hardware.
|
|
|
|
* The only thing we can do to disable it is lie about what the 802.1Q
|
|
|
|
* EtherType is.
|
|
|
|
* So it will still try to apply VLAN filtering, but all ingress
|
|
|
|
* traffic (except frames received with EtherType of ETH_P_SJA1105)
|
|
|
|
* will be internally tagged with a distorted VLAN header where the
|
|
|
|
* TPID is ETH_P_SJA1105, and the VLAN ID is the port pvid.
|
|
|
|
*/
|
|
|
|
ds->vlan_filtering_is_global = true;
|
|
|
|
ds->untag_bridge_pvid = true;
|
2022-02-25 11:22:24 +02:00
|
|
|
ds->fdb_isolation = true;
|
net: dsa: sja1105: reorganize probe, remove, setup and teardown ordering
The sja1105 driver's initialization and teardown sequence is a chaotic
mess that has gathered a lot of cruft over time. It works because there
is no strict dependency between the functions, but it could be improved.
The basic principle that teardown should be the exact reverse of setup
is obviously not held. We have initialization steps (sja1105_tas_setup,
sja1105_flower_setup) in the probe method that are torn down in the DSA
.teardown method instead of driver unbind time.
We also have code after the dsa_register_switch() call, which implicitly
means after the .setup() method has finished, which is pretty unusual.
Also, sja1105_teardown() has calls set up in a different order than the
error path of sja1105_setup(): see the reversed ordering between
sja1105_ptp_clock_unregister and sja1105_mdiobus_unregister.
Also, sja1105_static_config_load() is called towards the end of
sja1105_setup(), but sja1105_static_config_free() is also towards the
end of the error path and teardown path. The static_config_load() call
should be earlier.
Also, making and breaking the connections between struct sja1105_port
and struct dsa_port could be refactored into dedicated functions, makes
the code easier to follow.
We move some code from the DSA .setup() method into the probe method,
like the device tree parsing, and we move some code from the probe
method into the DSA .setup() method to be symmetric with its placement
in the DSA .teardown() method, which is nice because the unbind function
has a single call to dsa_unregister_switch(). Example of the latter type
of code movement are the connections between ports mentioned above, they
are now in the .setup() method.
Finally, due to fact that the kthread_init_worker() call is no longer
in sja1105_probe() - located towards the bottom of the file - but in
sja1105_setup() - located much higher - there is an inverse ordering
with the worker function declaration, sja1105_port_deferred_xmit. To
avoid that, the entire sja1105_setup() and sja1105_teardown() functions
are moved towards the bottom of the file.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 15:00:35 +03:00
|
|
|
/* tag_8021q has 3 bits for the VBID, and the value 0 is reserved */
|
2021-12-06 18:57:48 +02:00
|
|
|
ds->max_num_bridges = 7;
|
net: dsa: sja1105: reorganize probe, remove, setup and teardown ordering
The sja1105 driver's initialization and teardown sequence is a chaotic
mess that has gathered a lot of cruft over time. It works because there
is no strict dependency between the functions, but it could be improved.
The basic principle that teardown should be the exact reverse of setup
is obviously not held. We have initialization steps (sja1105_tas_setup,
sja1105_flower_setup) in the probe method that are torn down in the DSA
.teardown method instead of driver unbind time.
We also have code after the dsa_register_switch() call, which implicitly
means after the .setup() method has finished, which is pretty unusual.
Also, sja1105_teardown() has calls set up in a different order than the
error path of sja1105_setup(): see the reversed ordering between
sja1105_ptp_clock_unregister and sja1105_mdiobus_unregister.
Also, sja1105_static_config_load() is called towards the end of
sja1105_setup(), but sja1105_static_config_free() is also towards the
end of the error path and teardown path. The static_config_load() call
should be earlier.
Also, making and breaking the connections between struct sja1105_port
and struct dsa_port could be refactored into dedicated functions, makes
the code easier to follow.
We move some code from the DSA .setup() method into the probe method,
like the device tree parsing, and we move some code from the probe
method into the DSA .setup() method to be symmetric with its placement
in the DSA .teardown() method, which is nice because the unbind function
has a single call to dsa_unregister_switch(). Example of the latter type
of code movement are the connections between ports mentioned above, they
are now in the .setup() method.
Finally, due to fact that the kthread_init_worker() call is no longer
in sja1105_probe() - located towards the bottom of the file - but in
sja1105_setup() - located much higher - there is an inverse ordering
with the worker function declaration, sja1105_port_deferred_xmit. To
avoid that, the entire sja1105_setup() and sja1105_teardown() functions
are moved towards the bottom of the file.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 15:00:35 +03:00
|
|
|
|
|
|
|
/* Advertise the 8 egress queues */
|
|
|
|
ds->num_tx_queues = SJA1105_NUM_TC;
|
|
|
|
|
|
|
|
ds->mtu_enforcement_ingress = true;
|
|
|
|
ds->assisted_learning_on_cpu_port = true;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_devlink_teardown:
|
|
|
|
sja1105_devlink_teardown(ds);
|
|
|
|
out_mdiobus_unregister:
|
|
|
|
sja1105_mdiobus_unregister(ds);
|
|
|
|
out_ptp_clock_unregister:
|
|
|
|
sja1105_ptp_clock_unregister(ds);
|
|
|
|
out_flower_teardown:
|
|
|
|
sja1105_flower_teardown(ds);
|
|
|
|
sja1105_tas_teardown(ds);
|
|
|
|
out_static_config_free:
|
|
|
|
sja1105_static_config_free(&priv->static_config);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sja1105_teardown(struct dsa_switch *ds)
|
|
|
|
{
|
|
|
|
struct sja1105_private *priv = ds->priv;
|
|
|
|
|
|
|
|
rtnl_lock();
|
|
|
|
dsa_tag_8021q_unregister(ds);
|
|
|
|
rtnl_unlock();
|
|
|
|
|
|
|
|
sja1105_devlink_teardown(ds);
|
|
|
|
sja1105_mdiobus_unregister(ds);
|
|
|
|
sja1105_ptp_clock_unregister(ds);
|
|
|
|
sja1105_flower_teardown(ds);
|
|
|
|
sja1105_tas_teardown(ds);
|
|
|
|
sja1105_static_config_free(&priv->static_config);
|
|
|
|
}
|
|
|
|
|
2024-04-12 16:15:13 +01:00
|
|
|
static const struct phylink_mac_ops sja1105_phylink_mac_ops = {
|
|
|
|
.mac_select_pcs = sja1105_mac_select_pcs,
|
|
|
|
.mac_config = sja1105_mac_config,
|
|
|
|
.mac_link_up = sja1105_mac_link_up,
|
|
|
|
.mac_link_down = sja1105_mac_link_down,
|
|
|
|
};
|
|
|
|
|
net: dsa: sja1105: break dependency between dsa_port_is_sja1105 and switch driver
It's nice to be able to test a tagging protocol with dsa_loop, but not
at the cost of losing the ability of building the tagging protocol and
switch driver as modules, because as things stand, there is a circular
dependency between the two. Tagging protocol drivers cannot depend on
switch drivers, that is a hard fact.
The reasoning behind the blamed patch was that accessing dp->priv should
first make sure that the structure behind that pointer is what we really
think it is.
Currently the "sja1105" and "sja1110" tagging protocols only operate
with the sja1105 switch driver, just like any other tagging protocol and
switch combination. The only way to mix and match them is by modifying
the code, and this applies to dsa_loop as well (by default that uses
DSA_TAG_PROTO_NONE). So while in principle there is an issue, in
practice there isn't one.
Until we extend dsa_loop to allow user space configuration, treat the
problem as a non-issue and just say that DSA ports found by tag_sja1105
are always sja1105 ports, which is in fact true. But keep the
dsa_port_is_sja1105 function so that it's easy to patch it during
testing, and rely on dead code elimination.
Fixes: 994d2cbb08ca ("net: dsa: tag_sja1105: be dsa_loop-safe")
Link: https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-09-22 17:37:26 +03:00
|
|
|
static const struct dsa_switch_ops sja1105_switch_ops = {
|
2019-05-02 23:23:30 +03:00
|
|
|
.get_tag_protocol = sja1105_get_tag_protocol,
|
2021-12-10 01:34:44 +02:00
|
|
|
.connect_tag_protocol = sja1105_connect_tag_protocol,
|
2019-05-02 23:23:30 +03:00
|
|
|
.setup = sja1105_setup,
|
2019-06-08 15:04:42 +03:00
|
|
|
.teardown = sja1105_teardown,
|
2019-05-02 23:23:36 +03:00
|
|
|
.set_ageing_time = sja1105_set_ageing_time,
|
2020-03-27 21:55:45 +02:00
|
|
|
.port_change_mtu = sja1105_change_mtu,
|
|
|
|
.port_max_mtu = sja1105_get_max_mtu,
|
2022-02-25 11:56:02 +00:00
|
|
|
.phylink_get_caps = sja1105_phylink_get_caps,
|
2019-05-02 23:23:35 +03:00
|
|
|
.get_strings = sja1105_get_strings,
|
|
|
|
.get_ethtool_stats = sja1105_get_ethtool_stats,
|
|
|
|
.get_sset_count = sja1105_get_sset_count,
|
net: dsa: sja1105: Add support for the PTP clock
The design of this PHC driver is influenced by the switch's behavior
w.r.t. timestamping. It exposes two PTP counters, one free-running
(PTPTSCLK) and the other offset- and frequency-corrected in hardware
through PTPCLKVAL, PTPCLKADD and PTPCLKRATE. The MACs can sample either
of these for frame timestamps.
However, the user manual warns that taking timestamps based on the
corrected clock is less than useful, as the switch can deliver corrupted
timestamps in a variety of circumstances.
Therefore, this PHC uses the free-running PTPTSCLK together with a
timecounter/cyclecounter structure that translates it into a software
time domain. Thus, the settime/adjtime and adjfine callbacks are
hardware no-ops.
The timestamps (introduced in a further patch) will also be translated
to the correct time domain before being handed over to the userspace PTP
stack.
The introduction of a second set of PHC operations that operate on the
hardware PTPCLKVAL/PTPCLKADD/PTPCLKRATE in the future is somewhat
unavoidable, as the TTEthernet core uses the corrected PTP time domain.
However, the free-running counter + timecounter structure combination
will suffice for now, as the resulting timestamps yield a sub-50 ns
synchronization offset in steady state using linuxptp.
For this patch, in absence of frame timestamping, the operations of the
switch PHC were tested by syncing it to the system time as a local slave
clock with:
phc2sys -s CLOCK_REALTIME -c swp2 -O 0 -m -S 0.01
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-08 15:04:34 +03:00
|
|
|
.get_ts_info = sja1105_get_ts_info,
|
2019-05-02 23:23:31 +03:00
|
|
|
.port_fdb_dump = sja1105_fdb_dump,
|
|
|
|
.port_fdb_add = sja1105_fdb_add,
|
|
|
|
.port_fdb_del = sja1105_fdb_del,
|
2021-08-08 17:35:27 +03:00
|
|
|
.port_fast_age = sja1105_fast_age,
|
2019-05-02 23:23:30 +03:00
|
|
|
.port_bridge_join = sja1105_bridge_join,
|
|
|
|
.port_bridge_leave = sja1105_bridge_leave,
|
net: dsa: sja1105: offload bridge port flags to device
The chip can configure unicast flooding, broadcast flooding and learning.
Learning is per port, while flooding is per {ingress, egress} port pair
and we need to configure the same value for all possible ingress ports
towards the requested one.
While multicast flooding is not officially supported, we can hack it by
using a feature of the second generation (P/Q/R/S) devices, which is that
FDB entries are maskable, and multicast addresses always have an odd
first octet. So by putting a match-all for 00:01:00:00:00:00 addr and
00:01:00:00:00:00 mask at the end of the FDB, we make sure that it is
always checked last, and does not take precedence in front of any other
MDB. So it behaves effectively as an unknown multicast entry.
For the first generation switches, this feature is not available, so
unknown multicast will always be treated the same as unknown unicast.
So the only thing we can do is request the user to offload the settings
for these 2 flags in tandem, i.e.
ip link set swp2 type bridge_slave flood off
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
ip link set swp2 type bridge_slave flood off mcast_flood off
ip link set swp2 type bridge_slave mcast_flood on
Error: sja1105: This chip cannot configure multicast flooding independently of unicast.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 17:16:00 +02:00
|
|
|
.port_pre_bridge_flags = sja1105_port_pre_bridge_flags,
|
|
|
|
.port_bridge_flags = sja1105_port_bridge_flags,
|
2019-05-05 13:19:28 +03:00
|
|
|
.port_stp_state_set = sja1105_bridge_stp_state_set,
|
2019-05-02 23:23:34 +03:00
|
|
|
.port_vlan_filtering = sja1105_vlan_filtering,
|
net: dsa: sja1105: delete vlan delta save/restore logic
With the best_effort_vlan_filtering mode now gone, the driver does not
have 3 operating modes anymore (VLAN-unaware, VLAN-aware and best effort),
but only 2.
The idea is that we will gain support for network stack I/O through a
VLAN-aware bridge, using the data plane offload framework (imprecise RX,
imprecise TX). So the VLAN-aware use case will be more functional.
But standalone ports that are part of the same switch when some other
ports are under a VLAN-aware bridge should work too. Termination on
those should work through the tag_8021q RX VLAN and TX VLAN.
This was not possible using the old logic, because:
- in VLAN-unaware mode, only the tag_8021q VLANs were committed to hw
- in VLAN-aware mode, only the bridge VLANs were committed to hw
- in best-effort VLAN mode, both the tag_8021q and bridge VLANs were
committed to hw
The strategy for the new VLAN-aware mode is to allow the bridge and the
tag_8021q VLANs to coexist in the VLAN table at the same time.
[ yes, we need to make sure that the bridge cannot install a tag_8021q
VLAN, but ]
This means that the save/restore logic introduced by commit ec5ae61076d0
("net: dsa: sja1105: save/restore VLANs using a delta commit method")
does not serve a purpose any longer. We can delete it and restore the
old code that simply adds a VLAN to the VLAN table and calls it a day.
Note that we keep the sja1105_commit_pvid() function from those days,
but adapt it slightly. Ports that are under a VLAN-aware bridge use the
bridge's pvid, ports that are standalone or under a VLAN-unaware bridge
use the tag_8021q pvid, for local termination or VLAN-unaware forwarding.
Now, when the vlan_filtering property is toggled for the bridge, the
pvid of the ports beneath it is the only thing that's changing, we no
longer delete some VLANs and restore others.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-26 19:55:31 +03:00
|
|
|
.port_vlan_add = sja1105_bridge_vlan_add,
|
|
|
|
.port_vlan_del = sja1105_bridge_vlan_del,
|
2019-05-02 23:23:31 +03:00
|
|
|
.port_mdb_add = sja1105_mdb_add,
|
|
|
|
.port_mdb_del = sja1105_mdb_del,
|
2019-06-08 15:04:43 +03:00
|
|
|
.port_hwtstamp_get = sja1105_hwtstamp_get,
|
|
|
|
.port_hwtstamp_set = sja1105_hwtstamp_set,
|
2019-06-08 15:04:42 +03:00
|
|
|
.port_rxtstamp = sja1105_port_rxtstamp,
|
2019-06-08 15:04:35 +03:00
|
|
|
.port_txtstamp = sja1105_port_txtstamp,
|
2019-09-15 05:00:02 +03:00
|
|
|
.port_setup_tc = sja1105_port_setup_tc,
|
2019-10-04 03:33:47 +03:00
|
|
|
.port_mirror_add = sja1105_mirror_add,
|
|
|
|
.port_mirror_del = sja1105_mirror_del,
|
2020-03-29 14:52:01 +03:00
|
|
|
.port_policer_add = sja1105_port_policer_add,
|
|
|
|
.port_policer_del = sja1105_port_policer_del,
|
2020-03-29 14:52:02 +03:00
|
|
|
.cls_flower_add = sja1105_cls_flower_add,
|
|
|
|
.cls_flower_del = sja1105_cls_flower_del,
|
net: dsa: sja1105: implement tc-gate using time-triggered virtual links
Restrict the TTEthernet hardware support on this switch to operate as
closely as possible to IEEE 802.1Qci as possible. This means that it can
perform PTP-time-based ingress admission control on streams identified
by {DMAC, VID, PCP}, which is useful when trying to ensure the
determinism of traffic scheduled via IEEE 802.1Qbv.
The oddity comes from the fact that in hardware (and in TTEthernet at
large), virtual links always need a full-blown action, including not
only the type of policing, but also the list of destination ports. So in
practice, a single tc-gate action will result in all packets getting
dropped. Additional actions (either "trap" or "redirect") need to be
specified in the same filter rule such that the conforming packets are
actually forwarded somewhere.
Apart from the VL Lookup, Policing and Forwarding tables which need to
be programmed for each flow (virtual link), the Schedule engine also
needs to be told to open/close the admission gates for each individual
virtual link. A fairly accurate (and detailed) description of how that
works is already present in sja1105_tas.c, since it is already used to
trigger the egress gates for the tc-taprio offload (IEEE 802.1Qbv). Key
point here, we remember that the schedule engine supports 8
"subschedules" (execution threads that iterate through the global
schedule in parallel, and that no 2 hardware threads must execute a
schedule entry at the same time). For tc-taprio, each egress port used
one of these 8 subschedules, leaving a total of 4 subschedules unused.
In principle we could have allocated 1 subschedule for the tc-gate
offload of each ingress port, but actually the schedules of all virtual
links installed on each ingress port would have needed to be merged
together, before they could have been programmed to hardware. So
simplify our life and just merge the entire tc-gate configuration, for
all virtual links on all ingress ports, into a single subschedule. Be
sure to check that against the usual hardware scheduling conflicts, and
program it to hardware alongside any tc-taprio subschedule that may be
present.
The following scenarios were tested:
1. Quantitative testing:
tc qdisc add dev swp2 clsact
tc filter add dev swp2 ingress flower skip_sw \
dst_mac 42:be:24:9b:76:20 \
action gate index 1 base-time 0 \
sched-entry OPEN 1200 -1 -1 \
sched-entry CLOSE 1200 -1 -1 \
action trap
ping 192.168.1.2 -f
PING 192.168.1.2 (192.168.1.2) 56(84) bytes of data.
.............................
--- 192.168.1.2 ping statistics ---
948 packets transmitted, 467 received, 50.7384% packet loss, time 9671ms
2. Qualitative testing (with a phase-aligned schedule - the clocks are
synchronized by ptp4l, not shown here):
Receiver (sja1105):
tc qdisc add dev swp2 clsact
now=$(phc_ctl /dev/ptp1 get | awk '/clock time is/ {print $5}') && \
sec=$(echo $now | awk -F. '{print $1}') && \
base_time="$(((sec + 2) * 1000000000))" && \
echo "base time ${base_time}"
tc filter add dev swp2 ingress flower skip_sw \
dst_mac 42:be:24:9b:76:20 \
action gate base-time ${base_time} \
sched-entry OPEN 60000 -1 -1 \
sched-entry CLOSE 40000 -1 -1 \
action trap
Sender (enetc):
now=$(phc_ctl /dev/ptp0 get | awk '/clock time is/ {print $5}') && \
sec=$(echo $now | awk -F. '{print $1}') && \
base_time="$(((sec + 2) * 1000000000))" && \
echo "base time ${base_time}"
tc qdisc add dev eno0 parent root taprio \
num_tc 8 \
map 0 1 2 3 4 5 6 7 \
queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
base-time ${base_time} \
sched-entry S 01 50000 \
sched-entry S 00 50000 \
flags 2
ping -A 192.168.1.1
PING 192.168.1.1 (192.168.1.1): 56 data bytes
...
^C
--- 192.168.1.1 ping statistics ---
1425 packets transmitted, 1424 packets received, 0% packet loss
round-trip min/avg/max = 0.322/0.361/0.990 ms
And just for comparison, with the tc-taprio schedule deleted:
ping -A 192.168.1.1
PING 192.168.1.1 (192.168.1.1): 56 data bytes
...
^C
--- 192.168.1.1 ping statistics ---
33 packets transmitted, 19 packets received, 42% packet loss
round-trip min/avg/max = 0.336/0.464/0.597 ms
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-05-05 22:20:56 +03:00
|
|
|
.cls_flower_stats = sja1105_cls_flower_stats,
|
2020-09-26 02:04:21 +03:00
|
|
|
.devlink_info_get = sja1105_devlink_info_get,
|
2021-07-19 20:14:49 +03:00
|
|
|
.tag_8021q_vlan_add = sja1105_dsa_8021q_vlan_add,
|
|
|
|
.tag_8021q_vlan_del = sja1105_dsa_8021q_vlan_del,
|
2021-07-26 19:55:32 +03:00
|
|
|
.port_prechangeupper = sja1105_prechangeupper,
|
2019-05-02 23:23:30 +03:00
|
|
|
};
|
|
|
|
|
net: dsa: sja1105: use detected device id instead of DT one on mismatch
Although we can detect the chip revision 100% at runtime, it is useful
to specify it in the device tree compatible string too, because
otherwise there would be no way to assess the correctness of device tree
bindings statically, without booting a board (only some switch versions
have internal RGMII delays and/or an SGMII port).
But for testing the P/Q/R/S support, what I have is a reworked board
with the SJA1105T replaced by a pin-compatible SJA1105Q, and I don't
want to keep a separate device tree blob just for this one-off board.
Since just the chip has been replaced, its RGMII delay setup is
inherently the same (meaning: delays added by the PHY on the slave
ports, and by PCB traces on the fixed-link CPU port).
For this board, I'd rather have the driver shout at me, but go ahead and
use what it found even if it doesn't match what it's been told is there.
[ 2.970826] sja1105 spi0.1: Device tree specifies chip SJA1105T but found SJA1105Q, please fix it!
[ 2.980010] sja1105 spi0.1: Probed switch chip: SJA1105Q
[ 3.005082] sja1105 spi0.1: Enabled switch tagging
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-08-03 19:48:23 +03:00
|
|
|
static const struct of_device_id sja1105_dt_ids[];
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
static int sja1105_check_device_id(struct sja1105_private *priv)
|
|
|
|
{
|
|
|
|
const struct sja1105_regs *regs = priv->info->regs;
|
|
|
|
u8 prod_id[SJA1105_SIZE_DEVICE_ID] = {0};
|
|
|
|
struct device *dev = &priv->spidev->dev;
|
net: dsa: sja1105: use detected device id instead of DT one on mismatch
Although we can detect the chip revision 100% at runtime, it is useful
to specify it in the device tree compatible string too, because
otherwise there would be no way to assess the correctness of device tree
bindings statically, without booting a board (only some switch versions
have internal RGMII delays and/or an SGMII port).
But for testing the P/Q/R/S support, what I have is a reworked board
with the SJA1105T replaced by a pin-compatible SJA1105Q, and I don't
want to keep a separate device tree blob just for this one-off board.
Since just the chip has been replaced, its RGMII delay setup is
inherently the same (meaning: delays added by the PHY on the slave
ports, and by PCB traces on the fixed-link CPU port).
For this board, I'd rather have the driver shout at me, but go ahead and
use what it found even if it doesn't match what it's been told is there.
[ 2.970826] sja1105 spi0.1: Device tree specifies chip SJA1105T but found SJA1105Q, please fix it!
[ 2.980010] sja1105 spi0.1: Probed switch chip: SJA1105Q
[ 3.005082] sja1105 spi0.1: Enabled switch tagging
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-08-03 19:48:23 +03:00
|
|
|
const struct of_device_id *match;
|
2019-10-01 22:18:00 +03:00
|
|
|
u32 device_id;
|
2019-05-02 23:23:30 +03:00
|
|
|
u64 part_no;
|
|
|
|
int rc;
|
|
|
|
|
2019-11-09 13:32:22 +02:00
|
|
|
rc = sja1105_xfer_u32(priv, SPI_READ, regs->device_id, &device_id,
|
|
|
|
NULL);
|
2019-05-02 23:23:30 +03:00
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
|
2019-10-01 22:18:01 +03:00
|
|
|
rc = sja1105_xfer_buf(priv, SPI_READ, regs->prod_id, prod_id,
|
|
|
|
SJA1105_SIZE_DEVICE_ID);
|
2019-05-02 23:23:30 +03:00
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
sja1105_unpack(prod_id, &part_no, 19, 4, SJA1105_SIZE_DEVICE_ID);
|
|
|
|
|
2020-08-21 15:25:16 -07:00
|
|
|
for (match = sja1105_dt_ids; match->compatible[0]; match++) {
|
net: dsa: sja1105: use detected device id instead of DT one on mismatch
Although we can detect the chip revision 100% at runtime, it is useful
to specify it in the device tree compatible string too, because
otherwise there would be no way to assess the correctness of device tree
bindings statically, without booting a board (only some switch versions
have internal RGMII delays and/or an SGMII port).
But for testing the P/Q/R/S support, what I have is a reworked board
with the SJA1105T replaced by a pin-compatible SJA1105Q, and I don't
want to keep a separate device tree blob just for this one-off board.
Since just the chip has been replaced, its RGMII delay setup is
inherently the same (meaning: delays added by the PHY on the slave
ports, and by PCB traces on the fixed-link CPU port).
For this board, I'd rather have the driver shout at me, but go ahead and
use what it found even if it doesn't match what it's been told is there.
[ 2.970826] sja1105 spi0.1: Device tree specifies chip SJA1105T but found SJA1105Q, please fix it!
[ 2.980010] sja1105 spi0.1: Probed switch chip: SJA1105Q
[ 3.005082] sja1105 spi0.1: Enabled switch tagging
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-08-03 19:48:23 +03:00
|
|
|
const struct sja1105_info *info = match->data;
|
|
|
|
|
|
|
|
/* Is what's been probed in our match table at all? */
|
|
|
|
if (info->device_id != device_id || info->part_no != part_no)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* But is it what's in the device tree? */
|
|
|
|
if (priv->info->device_id != device_id ||
|
|
|
|
priv->info->part_no != part_no) {
|
|
|
|
dev_warn(dev, "Device tree specifies chip %s but found %s, please fix it!\n",
|
|
|
|
priv->info->name, info->name);
|
|
|
|
/* It isn't. No problem, pick that up. */
|
|
|
|
priv->info = info;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: use detected device id instead of DT one on mismatch
Although we can detect the chip revision 100% at runtime, it is useful
to specify it in the device tree compatible string too, because
otherwise there would be no way to assess the correctness of device tree
bindings statically, without booting a board (only some switch versions
have internal RGMII delays and/or an SGMII port).
But for testing the P/Q/R/S support, what I have is a reworked board
with the SJA1105T replaced by a pin-compatible SJA1105Q, and I don't
want to keep a separate device tree blob just for this one-off board.
Since just the chip has been replaced, its RGMII delay setup is
inherently the same (meaning: delays added by the PHY on the slave
ports, and by PCB traces on the fixed-link CPU port).
For this board, I'd rather have the driver shout at me, but go ahead and
use what it found even if it doesn't match what it's been told is there.
[ 2.970826] sja1105 spi0.1: Device tree specifies chip SJA1105T but found SJA1105Q, please fix it!
[ 2.980010] sja1105 spi0.1: Probed switch chip: SJA1105Q
[ 3.005082] sja1105 spi0.1: Enabled switch tagging
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-08-03 19:48:23 +03:00
|
|
|
dev_err(dev, "Unexpected {device ID, part number}: 0x%x 0x%llx\n",
|
|
|
|
device_id, part_no);
|
|
|
|
|
|
|
|
return -ENODEV;
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int sja1105_probe(struct spi_device *spi)
|
|
|
|
{
|
|
|
|
struct device *dev = &spi->dev;
|
|
|
|
struct sja1105_private *priv;
|
2021-05-21 00:16:57 +03:00
|
|
|
size_t max_xfer, max_msg;
|
2019-05-02 23:23:30 +03:00
|
|
|
struct dsa_switch *ds;
|
net: dsa: sja1105: reorganize probe, remove, setup and teardown ordering
The sja1105 driver's initialization and teardown sequence is a chaotic
mess that has gathered a lot of cruft over time. It works because there
is no strict dependency between the functions, but it could be improved.
The basic principle that teardown should be the exact reverse of setup
is obviously not held. We have initialization steps (sja1105_tas_setup,
sja1105_flower_setup) in the probe method that are torn down in the DSA
.teardown method instead of driver unbind time.
We also have code after the dsa_register_switch() call, which implicitly
means after the .setup() method has finished, which is pretty unusual.
Also, sja1105_teardown() has calls set up in a different order than the
error path of sja1105_setup(): see the reversed ordering between
sja1105_ptp_clock_unregister and sja1105_mdiobus_unregister.
Also, sja1105_static_config_load() is called towards the end of
sja1105_setup(), but sja1105_static_config_free() is also towards the
end of the error path and teardown path. The static_config_load() call
should be earlier.
Also, making and breaking the connections between struct sja1105_port
and struct dsa_port could be refactored into dedicated functions, makes
the code easier to follow.
We move some code from the DSA .setup() method into the probe method,
like the device tree parsing, and we move some code from the probe
method into the DSA .setup() method to be symmetric with its placement
in the DSA .teardown() method, which is nice because the unbind function
has a single call to dsa_unregister_switch(). Example of the latter type
of code movement are the connections between ports mentioned above, they
are now in the .setup() method.
Finally, due to fact that the kthread_init_worker() call is no longer
in sja1105_probe() - located towards the bottom of the file - but in
sja1105_setup() - located much higher - there is an inverse ordering
with the worker function declaration, sja1105_port_deferred_xmit. To
avoid that, the entire sja1105_setup() and sja1105_teardown() functions
are moved towards the bottom of the file.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 15:00:35 +03:00
|
|
|
int rc;
|
2019-05-02 23:23:30 +03:00
|
|
|
|
|
|
|
if (!dev->of_node) {
|
|
|
|
dev_err(dev, "No DTS bindings for SJA1105 driver\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-09-22 18:10:29 +03:00
|
|
|
rc = sja1105_hw_reset(dev, 1, 1);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
priv = devm_kzalloc(dev, sizeof(struct sja1105_private), GFP_KERNEL);
|
|
|
|
if (!priv)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
/* Populate our driver private structure (priv) based on
|
|
|
|
* the device tree node that was probed (spi)
|
|
|
|
*/
|
|
|
|
priv->spidev = spi;
|
|
|
|
spi_set_drvdata(spi, priv);
|
|
|
|
|
|
|
|
/* Configure the SPI bus */
|
|
|
|
spi->bits_per_word = 8;
|
|
|
|
rc = spi_setup(spi);
|
|
|
|
if (rc < 0) {
|
|
|
|
dev_err(dev, "Could not init SPI\n");
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2021-05-21 00:16:57 +03:00
|
|
|
/* In sja1105_xfer, we send spi_messages composed of two spi_transfers:
|
|
|
|
* a small one for the message header and another one for the current
|
|
|
|
* chunk of the packed buffer.
|
|
|
|
* Check that the restrictions imposed by the SPI controller are
|
|
|
|
* respected: the chunk buffer is smaller than the max transfer size,
|
|
|
|
* and the total length of the chunk plus its message header is smaller
|
|
|
|
* than the max message size.
|
|
|
|
* We do that during probe time since the maximum transfer size is a
|
|
|
|
* runtime invariant.
|
|
|
|
*/
|
|
|
|
max_xfer = spi_max_transfer_size(spi);
|
|
|
|
max_msg = spi_max_message_size(spi);
|
|
|
|
|
|
|
|
/* We need to send at least one 64-bit word of SPI payload per message
|
|
|
|
* in order to be able to make useful progress.
|
|
|
|
*/
|
|
|
|
if (max_msg < SJA1105_SIZE_SPI_MSG_HEADER + 8) {
|
|
|
|
dev_err(dev, "SPI master cannot send large enough buffers, aborting\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
priv->max_xfer_len = SJA1105_SIZE_SPI_MSG_MAXLEN;
|
|
|
|
if (priv->max_xfer_len > max_xfer)
|
|
|
|
priv->max_xfer_len = max_xfer;
|
|
|
|
if (priv->max_xfer_len > max_msg - SJA1105_SIZE_SPI_MSG_HEADER)
|
|
|
|
priv->max_xfer_len = max_msg - SJA1105_SIZE_SPI_MSG_HEADER;
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
priv->info = of_device_get_match_data(dev);
|
|
|
|
|
|
|
|
/* Detect hardware device */
|
|
|
|
rc = sja1105_check_device_id(priv);
|
|
|
|
if (rc < 0) {
|
|
|
|
dev_err(dev, "Device ID check failed: %d\n", rc);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
dev_info(dev, "Probed switch chip: %s\n", priv->info->name);
|
|
|
|
|
2019-10-21 16:51:30 -04:00
|
|
|
ds = devm_kzalloc(dev, sizeof(*ds), GFP_KERNEL);
|
2019-05-02 23:23:30 +03:00
|
|
|
if (!ds)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2019-10-21 16:51:30 -04:00
|
|
|
ds->dev = dev;
|
net: dsa: sja1105: add support for the SJA1110 switch family
The SJA1110 is basically an SJA1105 with more ports, some integrated
PHYs (100base-T1 and 100base-TX) and an embedded microcontroller which
can be disabled, and the switch core can be controlled by a host running
Linux, over SPI.
This patch contains:
- the static and dynamic config packing functions, for the tables that
are common with SJA1105
- one more static config tables which is "unique" to the SJA1110
(actually it is a rehash of stuff that was placed somewhere else in
SJA1105): the PCP Remapping Table
- a reset and clock configuration procedure for the SJA1110 switch.
This resets just the switch subsystem, and gates off the clock which
powers on the embedded microcontroller.
- an RGMII delay configuration procedure for SJA1110, which is very
similar to SJA1105, but different enough for us to be unable to reuse
it (this is a pattern that repeats itself)
- some adaptations to dynamic config table entries which are no longer
programmed in the same way. For example, to delete a VLAN, you used to
write an entry through the dynamic reconfiguration interface with the
desired VLAN ID, and with the VALIDENT bit set to false. Now, the VLAN
table entries contain a TYPE_ENTRY field, which must be set to zero
(in a backwards-incompatible way) in order for the entry to be deleted,
or to some other entry for the VLAN to match "inner tagged" or "outer
tagged" packets.
- a similar thing for the static config: the xMII Mode Parameters Table
encoding for SGMII and MII (the latter just when attached to a
100base-TX PHY) just isn't what it used to be in SJA1105. They are
identical, except there is an extra "special" bit which needs to be
set. Set it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-08 12:25:36 +03:00
|
|
|
ds->num_ports = priv->info->num_ports;
|
2019-05-02 23:23:30 +03:00
|
|
|
ds->ops = &sja1105_switch_ops;
|
2024-04-12 16:15:13 +01:00
|
|
|
ds->phylink_mac_ops = &sja1105_phylink_mac_ops;
|
2019-05-02 23:23:30 +03:00
|
|
|
ds->priv = priv;
|
|
|
|
priv->ds = ds;
|
|
|
|
|
2019-10-21 16:51:28 -04:00
|
|
|
mutex_init(&priv->ptp_data.lock);
|
2021-10-24 20:17:50 +03:00
|
|
|
mutex_init(&priv->dynamic_config_lock);
|
2019-10-21 16:51:28 -04:00
|
|
|
mutex_init(&priv->mgmt_lock);
|
2023-09-08 16:33:51 +03:00
|
|
|
mutex_init(&priv->fdb_lock);
|
2021-12-10 01:34:43 +02:00
|
|
|
spin_lock_init(&priv->ts_id_lock);
|
2019-10-21 16:51:28 -04:00
|
|
|
|
net: dsa: sja1105: reorganize probe, remove, setup and teardown ordering
The sja1105 driver's initialization and teardown sequence is a chaotic
mess that has gathered a lot of cruft over time. It works because there
is no strict dependency between the functions, but it could be improved.
The basic principle that teardown should be the exact reverse of setup
is obviously not held. We have initialization steps (sja1105_tas_setup,
sja1105_flower_setup) in the probe method that are torn down in the DSA
.teardown method instead of driver unbind time.
We also have code after the dsa_register_switch() call, which implicitly
means after the .setup() method has finished, which is pretty unusual.
Also, sja1105_teardown() has calls set up in a different order than the
error path of sja1105_setup(): see the reversed ordering between
sja1105_ptp_clock_unregister and sja1105_mdiobus_unregister.
Also, sja1105_static_config_load() is called towards the end of
sja1105_setup(), but sja1105_static_config_free() is also towards the
end of the error path and teardown path. The static_config_load() call
should be earlier.
Also, making and breaking the connections between struct sja1105_port
and struct dsa_port could be refactored into dedicated functions, makes
the code easier to follow.
We move some code from the DSA .setup() method into the probe method,
like the device tree parsing, and we move some code from the probe
method into the DSA .setup() method to be symmetric with its placement
in the DSA .teardown() method, which is nice because the unbind function
has a single call to dsa_unregister_switch(). Example of the latter type
of code movement are the connections between ports mentioned above, they
are now in the .setup() method.
Finally, due to fact that the kthread_init_worker() call is no longer
in sja1105_probe() - located towards the bottom of the file - but in
sja1105_setup() - located much higher - there is an inverse ordering
with the worker function declaration, sja1105_port_deferred_xmit. To
avoid that, the entire sja1105_setup() and sja1105_teardown() functions
are moved towards the bottom of the file.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 15:00:35 +03:00
|
|
|
rc = sja1105_parse_dt(priv);
|
|
|
|
if (rc < 0) {
|
|
|
|
dev_err(ds->dev, "Failed to parse DT: %d\n", rc);
|
|
|
|
return rc;
|
|
|
|
}
|
2019-10-21 16:51:28 -04:00
|
|
|
|
2020-05-28 03:27:58 +03:00
|
|
|
if (IS_ENABLED(CONFIG_NET_SCH_CBS)) {
|
|
|
|
priv->cbs = devm_kcalloc(dev, priv->info->num_cbs_shapers,
|
|
|
|
sizeof(struct sja1105_cbs_entry),
|
|
|
|
GFP_KERNEL);
|
net: dsa: sja1105: reorganize probe, remove, setup and teardown ordering
The sja1105 driver's initialization and teardown sequence is a chaotic
mess that has gathered a lot of cruft over time. It works because there
is no strict dependency between the functions, but it could be improved.
The basic principle that teardown should be the exact reverse of setup
is obviously not held. We have initialization steps (sja1105_tas_setup,
sja1105_flower_setup) in the probe method that are torn down in the DSA
.teardown method instead of driver unbind time.
We also have code after the dsa_register_switch() call, which implicitly
means after the .setup() method has finished, which is pretty unusual.
Also, sja1105_teardown() has calls set up in a different order than the
error path of sja1105_setup(): see the reversed ordering between
sja1105_ptp_clock_unregister and sja1105_mdiobus_unregister.
Also, sja1105_static_config_load() is called towards the end of
sja1105_setup(), but sja1105_static_config_free() is also towards the
end of the error path and teardown path. The static_config_load() call
should be earlier.
Also, making and breaking the connections between struct sja1105_port
and struct dsa_port could be refactored into dedicated functions, makes
the code easier to follow.
We move some code from the DSA .setup() method into the probe method,
like the device tree parsing, and we move some code from the probe
method into the DSA .setup() method to be symmetric with its placement
in the DSA .teardown() method, which is nice because the unbind function
has a single call to dsa_unregister_switch(). Example of the latter type
of code movement are the connections between ports mentioned above, they
are now in the .setup() method.
Finally, due to fact that the kthread_init_worker() call is no longer
in sja1105_probe() - located towards the bottom of the file - but in
sja1105_setup() - located much higher - there is an inverse ordering
with the worker function declaration, sja1105_port_deferred_xmit. To
avoid that, the entire sja1105_setup() and sja1105_teardown() functions
are moved towards the bottom of the file.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 15:00:35 +03:00
|
|
|
if (!priv->cbs)
|
|
|
|
return -ENOMEM;
|
2020-05-28 03:27:58 +03:00
|
|
|
}
|
|
|
|
|
net: dsa: sja1105: reorganize probe, remove, setup and teardown ordering
The sja1105 driver's initialization and teardown sequence is a chaotic
mess that has gathered a lot of cruft over time. It works because there
is no strict dependency between the functions, but it could be improved.
The basic principle that teardown should be the exact reverse of setup
is obviously not held. We have initialization steps (sja1105_tas_setup,
sja1105_flower_setup) in the probe method that are torn down in the DSA
.teardown method instead of driver unbind time.
We also have code after the dsa_register_switch() call, which implicitly
means after the .setup() method has finished, which is pretty unusual.
Also, sja1105_teardown() has calls set up in a different order than the
error path of sja1105_setup(): see the reversed ordering between
sja1105_ptp_clock_unregister and sja1105_mdiobus_unregister.
Also, sja1105_static_config_load() is called towards the end of
sja1105_setup(), but sja1105_static_config_free() is also towards the
end of the error path and teardown path. The static_config_load() call
should be earlier.
Also, making and breaking the connections between struct sja1105_port
and struct dsa_port could be refactored into dedicated functions, makes
the code easier to follow.
We move some code from the DSA .setup() method into the probe method,
like the device tree parsing, and we move some code from the probe
method into the DSA .setup() method to be symmetric with its placement
in the DSA .teardown() method, which is nice because the unbind function
has a single call to dsa_unregister_switch(). Example of the latter type
of code movement are the connections between ports mentioned above, they
are now in the .setup() method.
Finally, due to fact that the kthread_init_worker() call is no longer
in sja1105_probe() - located towards the bottom of the file - but in
sja1105_setup() - located much higher - there is an inverse ordering
with the worker function declaration, sja1105_port_deferred_xmit. To
avoid that, the entire sja1105_setup() and sja1105_teardown() functions
are moved towards the bottom of the file.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 15:00:35 +03:00
|
|
|
return dsa_register_switch(priv->ds);
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
|
2022-01-23 18:52:01 +01:00
|
|
|
static void sja1105_remove(struct spi_device *spi)
|
2019-05-02 23:23:30 +03:00
|
|
|
{
|
|
|
|
struct sja1105_private *priv = spi_get_drvdata(spi);
|
2021-07-19 20:14:46 +03:00
|
|
|
|
net: dsa: be compatible with masters which unregister on shutdown
Lino reports that on his system with bcmgenet as DSA master and KSZ9897
as a switch, rebooting or shutting down never works properly.
What does the bcmgenet driver have special to trigger this, that other
DSA masters do not? It has an implementation of ->shutdown which simply
calls its ->remove implementation. Otherwise said, it unregisters its
network interface on shutdown.
This message can be seen in a loop, and it hangs the reboot process there:
unregister_netdevice: waiting for eth0 to become free. Usage count = 3
So why 3?
A usage count of 1 is normal for a registered network interface, and any
virtual interface which links itself as an upper of that will increment
it via dev_hold. In the case of DSA, this is the call path:
dsa_slave_create
-> netdev_upper_dev_link
-> __netdev_upper_dev_link
-> __netdev_adjacent_dev_insert
-> dev_hold
So a DSA switch with 3 interfaces will result in a usage count elevated
by two, and netdev_wait_allrefs will wait until they have gone away.
Other stacked interfaces, like VLAN, watch NETDEV_UNREGISTER events and
delete themselves, but DSA cannot just vanish and go poof, at most it
can unbind itself from the switch devices, but that must happen strictly
earlier compared to when the DSA master unregisters its net_device, so
reacting on the NETDEV_UNREGISTER event is way too late.
It seems that it is a pretty established pattern to have a driver's
->shutdown hook redirect to its ->remove hook, so the same code is
executed regardless of whether the driver is unbound from the device, or
the system is just shutting down. As Florian puts it, it is quite a big
hammer for bcmgenet to unregister its net_device during shutdown, but
having a common code path with the driver unbind helps ensure it is well
tested.
So DSA, for better or for worse, has to live with that and engage in an
arms race of implementing the ->shutdown hook too, from all individual
drivers, and do something sane when paired with masters that unregister
their net_device there. The only sane thing to do, of course, is to
unlink from the master.
However, complications arise really quickly.
The pattern of redirecting ->shutdown to ->remove is not unique to
bcmgenet or even to net_device drivers. In fact, SPI controllers do it
too (see dspi_shutdown -> dspi_remove), and presumably, I2C controllers
and MDIO controllers do it too (this is something I have not researched
too deeply, but even if this is not the case today, it is certainly
plausible to happen in the future, and must be taken into consideration).
Since DSA switches might be SPI devices, I2C devices, MDIO devices, the
insane implication is that for the exact same DSA switch device, we
might have both ->shutdown and ->remove getting called.
So we need to do something with that insane environment. The pattern
I've come up with is "if this, then not that", so if either ->shutdown
or ->remove gets called, we set the device's drvdata to NULL, and in the
other hook, we check whether the drvdata is NULL and just do nothing.
This is probably not necessary for platform devices, just for devices on
buses, but I would really insist for consistency among drivers, because
when code is copy-pasted, it is not always copy-pasted from the best
sources.
So depending on whether the DSA switch's ->remove or ->shutdown will get
called first, we cannot really guarantee even for the same driver if
rebooting will result in the same code path on all platforms. But
nonetheless, we need to do something minimally reasonable on ->shutdown
too to fix the bug. Of course, the ->remove will do more (a full
teardown of the tree, with all data structures freed, and this is why
the bug was not caught for so long). The new ->shutdown method is kept
separate from dsa_unregister_switch not because we couldn't have
unregistered the switch, but simply in the interest of doing something
quick and to the point.
The big question is: does the DSA switch's ->shutdown get called earlier
than the DSA master's ->shutdown? If not, there is still a risk that we
might still trigger the WARN_ON in unregister_netdevice that says we are
attempting to unregister a net_device which has uppers. That's no good.
Although the reference to the master net_device won't physically go away
even if DSA's ->shutdown comes afterwards, remember we have a dev_hold
on it.
The answer to that question lies in this comment above device_link_add:
* A side effect of the link creation is re-ordering of dpm_list and the
* devices_kset list by moving the consumer device and all devices depending
* on it to the ends of these lists (that does not happen to devices that have
* not been registered when this function is called).
so the fact that DSA uses device_link_add towards its master is not
exactly for nothing. device_shutdown() walks devices_kset from the back,
so this is our guarantee that DSA's shutdown happens before the master's
shutdown.
Fixes: 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings")
Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/
Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-09-17 16:34:33 +03:00
|
|
|
if (!priv)
|
2022-01-23 18:52:01 +01:00
|
|
|
return;
|
net: dsa: be compatible with masters which unregister on shutdown
Lino reports that on his system with bcmgenet as DSA master and KSZ9897
as a switch, rebooting or shutting down never works properly.
What does the bcmgenet driver have special to trigger this, that other
DSA masters do not? It has an implementation of ->shutdown which simply
calls its ->remove implementation. Otherwise said, it unregisters its
network interface on shutdown.
This message can be seen in a loop, and it hangs the reboot process there:
unregister_netdevice: waiting for eth0 to become free. Usage count = 3
So why 3?
A usage count of 1 is normal for a registered network interface, and any
virtual interface which links itself as an upper of that will increment
it via dev_hold. In the case of DSA, this is the call path:
dsa_slave_create
-> netdev_upper_dev_link
-> __netdev_upper_dev_link
-> __netdev_adjacent_dev_insert
-> dev_hold
So a DSA switch with 3 interfaces will result in a usage count elevated
by two, and netdev_wait_allrefs will wait until they have gone away.
Other stacked interfaces, like VLAN, watch NETDEV_UNREGISTER events and
delete themselves, but DSA cannot just vanish and go poof, at most it
can unbind itself from the switch devices, but that must happen strictly
earlier compared to when the DSA master unregisters its net_device, so
reacting on the NETDEV_UNREGISTER event is way too late.
It seems that it is a pretty established pattern to have a driver's
->shutdown hook redirect to its ->remove hook, so the same code is
executed regardless of whether the driver is unbound from the device, or
the system is just shutting down. As Florian puts it, it is quite a big
hammer for bcmgenet to unregister its net_device during shutdown, but
having a common code path with the driver unbind helps ensure it is well
tested.
So DSA, for better or for worse, has to live with that and engage in an
arms race of implementing the ->shutdown hook too, from all individual
drivers, and do something sane when paired with masters that unregister
their net_device there. The only sane thing to do, of course, is to
unlink from the master.
However, complications arise really quickly.
The pattern of redirecting ->shutdown to ->remove is not unique to
bcmgenet or even to net_device drivers. In fact, SPI controllers do it
too (see dspi_shutdown -> dspi_remove), and presumably, I2C controllers
and MDIO controllers do it too (this is something I have not researched
too deeply, but even if this is not the case today, it is certainly
plausible to happen in the future, and must be taken into consideration).
Since DSA switches might be SPI devices, I2C devices, MDIO devices, the
insane implication is that for the exact same DSA switch device, we
might have both ->shutdown and ->remove getting called.
So we need to do something with that insane environment. The pattern
I've come up with is "if this, then not that", so if either ->shutdown
or ->remove gets called, we set the device's drvdata to NULL, and in the
other hook, we check whether the drvdata is NULL and just do nothing.
This is probably not necessary for platform devices, just for devices on
buses, but I would really insist for consistency among drivers, because
when code is copy-pasted, it is not always copy-pasted from the best
sources.
So depending on whether the DSA switch's ->remove or ->shutdown will get
called first, we cannot really guarantee even for the same driver if
rebooting will result in the same code path on all platforms. But
nonetheless, we need to do something minimally reasonable on ->shutdown
too to fix the bug. Of course, the ->remove will do more (a full
teardown of the tree, with all data structures freed, and this is why
the bug was not caught for so long). The new ->shutdown method is kept
separate from dsa_unregister_switch not because we couldn't have
unregistered the switch, but simply in the interest of doing something
quick and to the point.
The big question is: does the DSA switch's ->shutdown get called earlier
than the DSA master's ->shutdown? If not, there is still a risk that we
might still trigger the WARN_ON in unregister_netdevice that says we are
attempting to unregister a net_device which has uppers. That's no good.
Although the reference to the master net_device won't physically go away
even if DSA's ->shutdown comes afterwards, remember we have a dev_hold
on it.
The answer to that question lies in this comment above device_link_add:
* A side effect of the link creation is re-ordering of dpm_list and the
* devices_kset list by moving the consumer device and all devices depending
* on it to the ends of these lists (that does not happen to devices that have
* not been registered when this function is called).
so the fact that DSA uses device_link_add towards its master is not
exactly for nothing. device_shutdown() walks devices_kset from the back,
so this is our guarantee that DSA's shutdown happens before the master's
shutdown.
Fixes: 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings")
Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/
Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-09-17 16:34:33 +03:00
|
|
|
|
|
|
|
dsa_unregister_switch(priv->ds);
|
2019-05-02 23:23:30 +03:00
|
|
|
}
|
|
|
|
|
net: dsa: be compatible with masters which unregister on shutdown
Lino reports that on his system with bcmgenet as DSA master and KSZ9897
as a switch, rebooting or shutting down never works properly.
What does the bcmgenet driver have special to trigger this, that other
DSA masters do not? It has an implementation of ->shutdown which simply
calls its ->remove implementation. Otherwise said, it unregisters its
network interface on shutdown.
This message can be seen in a loop, and it hangs the reboot process there:
unregister_netdevice: waiting for eth0 to become free. Usage count = 3
So why 3?
A usage count of 1 is normal for a registered network interface, and any
virtual interface which links itself as an upper of that will increment
it via dev_hold. In the case of DSA, this is the call path:
dsa_slave_create
-> netdev_upper_dev_link
-> __netdev_upper_dev_link
-> __netdev_adjacent_dev_insert
-> dev_hold
So a DSA switch with 3 interfaces will result in a usage count elevated
by two, and netdev_wait_allrefs will wait until they have gone away.
Other stacked interfaces, like VLAN, watch NETDEV_UNREGISTER events and
delete themselves, but DSA cannot just vanish and go poof, at most it
can unbind itself from the switch devices, but that must happen strictly
earlier compared to when the DSA master unregisters its net_device, so
reacting on the NETDEV_UNREGISTER event is way too late.
It seems that it is a pretty established pattern to have a driver's
->shutdown hook redirect to its ->remove hook, so the same code is
executed regardless of whether the driver is unbound from the device, or
the system is just shutting down. As Florian puts it, it is quite a big
hammer for bcmgenet to unregister its net_device during shutdown, but
having a common code path with the driver unbind helps ensure it is well
tested.
So DSA, for better or for worse, has to live with that and engage in an
arms race of implementing the ->shutdown hook too, from all individual
drivers, and do something sane when paired with masters that unregister
their net_device there. The only sane thing to do, of course, is to
unlink from the master.
However, complications arise really quickly.
The pattern of redirecting ->shutdown to ->remove is not unique to
bcmgenet or even to net_device drivers. In fact, SPI controllers do it
too (see dspi_shutdown -> dspi_remove), and presumably, I2C controllers
and MDIO controllers do it too (this is something I have not researched
too deeply, but even if this is not the case today, it is certainly
plausible to happen in the future, and must be taken into consideration).
Since DSA switches might be SPI devices, I2C devices, MDIO devices, the
insane implication is that for the exact same DSA switch device, we
might have both ->shutdown and ->remove getting called.
So we need to do something with that insane environment. The pattern
I've come up with is "if this, then not that", so if either ->shutdown
or ->remove gets called, we set the device's drvdata to NULL, and in the
other hook, we check whether the drvdata is NULL and just do nothing.
This is probably not necessary for platform devices, just for devices on
buses, but I would really insist for consistency among drivers, because
when code is copy-pasted, it is not always copy-pasted from the best
sources.
So depending on whether the DSA switch's ->remove or ->shutdown will get
called first, we cannot really guarantee even for the same driver if
rebooting will result in the same code path on all platforms. But
nonetheless, we need to do something minimally reasonable on ->shutdown
too to fix the bug. Of course, the ->remove will do more (a full
teardown of the tree, with all data structures freed, and this is why
the bug was not caught for so long). The new ->shutdown method is kept
separate from dsa_unregister_switch not because we couldn't have
unregistered the switch, but simply in the interest of doing something
quick and to the point.
The big question is: does the DSA switch's ->shutdown get called earlier
than the DSA master's ->shutdown? If not, there is still a risk that we
might still trigger the WARN_ON in unregister_netdevice that says we are
attempting to unregister a net_device which has uppers. That's no good.
Although the reference to the master net_device won't physically go away
even if DSA's ->shutdown comes afterwards, remember we have a dev_hold
on it.
The answer to that question lies in this comment above device_link_add:
* A side effect of the link creation is re-ordering of dpm_list and the
* devices_kset list by moving the consumer device and all devices depending
* on it to the ends of these lists (that does not happen to devices that have
* not been registered when this function is called).
so the fact that DSA uses device_link_add towards its master is not
exactly for nothing. device_shutdown() walks devices_kset from the back,
so this is our guarantee that DSA's shutdown happens before the master's
shutdown.
Fixes: 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings")
Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/
Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-09-17 16:34:33 +03:00
|
|
|
static void sja1105_shutdown(struct spi_device *spi)
|
|
|
|
{
|
|
|
|
struct sja1105_private *priv = spi_get_drvdata(spi);
|
|
|
|
|
|
|
|
if (!priv)
|
|
|
|
return;
|
|
|
|
|
|
|
|
dsa_switch_shutdown(priv->ds);
|
|
|
|
|
|
|
|
spi_set_drvdata(spi, NULL);
|
|
|
|
}
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
static const struct of_device_id sja1105_dt_ids[] = {
|
|
|
|
{ .compatible = "nxp,sja1105e", .data = &sja1105e_info },
|
|
|
|
{ .compatible = "nxp,sja1105t", .data = &sja1105t_info },
|
|
|
|
{ .compatible = "nxp,sja1105p", .data = &sja1105p_info },
|
|
|
|
{ .compatible = "nxp,sja1105q", .data = &sja1105q_info },
|
|
|
|
{ .compatible = "nxp,sja1105r", .data = &sja1105r_info },
|
|
|
|
{ .compatible = "nxp,sja1105s", .data = &sja1105s_info },
|
net: dsa: sja1105: add support for the SJA1110 switch family
The SJA1110 is basically an SJA1105 with more ports, some integrated
PHYs (100base-T1 and 100base-TX) and an embedded microcontroller which
can be disabled, and the switch core can be controlled by a host running
Linux, over SPI.
This patch contains:
- the static and dynamic config packing functions, for the tables that
are common with SJA1105
- one more static config tables which is "unique" to the SJA1110
(actually it is a rehash of stuff that was placed somewhere else in
SJA1105): the PCP Remapping Table
- a reset and clock configuration procedure for the SJA1110 switch.
This resets just the switch subsystem, and gates off the clock which
powers on the embedded microcontroller.
- an RGMII delay configuration procedure for SJA1110, which is very
similar to SJA1105, but different enough for us to be unable to reuse
it (this is a pattern that repeats itself)
- some adaptations to dynamic config table entries which are no longer
programmed in the same way. For example, to delete a VLAN, you used to
write an entry through the dynamic reconfiguration interface with the
desired VLAN ID, and with the VALIDENT bit set to false. Now, the VLAN
table entries contain a TYPE_ENTRY field, which must be set to zero
(in a backwards-incompatible way) in order for the entry to be deleted,
or to some other entry for the VLAN to match "inner tagged" or "outer
tagged" packets.
- a similar thing for the static config: the xMII Mode Parameters Table
encoding for SGMII and MII (the latter just when attached to a
100base-TX PHY) just isn't what it used to be in SJA1105. They are
identical, except there is an extra "special" bit which needs to be
set. Set it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-08 12:25:36 +03:00
|
|
|
{ .compatible = "nxp,sja1110a", .data = &sja1110a_info },
|
|
|
|
{ .compatible = "nxp,sja1110b", .data = &sja1110b_info },
|
|
|
|
{ .compatible = "nxp,sja1110c", .data = &sja1110c_info },
|
|
|
|
{ .compatible = "nxp,sja1110d", .data = &sja1110d_info },
|
2019-05-02 23:23:30 +03:00
|
|
|
{ /* sentinel */ },
|
|
|
|
};
|
|
|
|
MODULE_DEVICE_TABLE(of, sja1105_dt_ids);
|
|
|
|
|
net: dsa: sja1105: silent spi_device_id warnings
Add spi_device_id entries to silent following warnings:
SPI driver sja1105 has no spi_device_id for nxp,sja1105e
SPI driver sja1105 has no spi_device_id for nxp,sja1105t
SPI driver sja1105 has no spi_device_id for nxp,sja1105p
SPI driver sja1105 has no spi_device_id for nxp,sja1105q
SPI driver sja1105 has no spi_device_id for nxp,sja1105r
SPI driver sja1105 has no spi_device_id for nxp,sja1105s
SPI driver sja1105 has no spi_device_id for nxp,sja1110a
SPI driver sja1105 has no spi_device_id for nxp,sja1110b
SPI driver sja1105 has no spi_device_id for nxp,sja1110c
SPI driver sja1105 has no spi_device_id for nxp,sja1110d
Fixes: 5fa6863ba692 ("spi: Check we have a spi_device_id for each DT compatible")
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20220717135831.2492844-1-o.rempel@pengutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2022-07-17 15:58:30 +02:00
|
|
|
static const struct spi_device_id sja1105_spi_ids[] = {
|
|
|
|
{ "sja1105e" },
|
|
|
|
{ "sja1105t" },
|
|
|
|
{ "sja1105p" },
|
|
|
|
{ "sja1105q" },
|
|
|
|
{ "sja1105r" },
|
|
|
|
{ "sja1105s" },
|
|
|
|
{ "sja1110a" },
|
|
|
|
{ "sja1110b" },
|
|
|
|
{ "sja1110c" },
|
|
|
|
{ "sja1110d" },
|
|
|
|
{ },
|
|
|
|
};
|
|
|
|
MODULE_DEVICE_TABLE(spi, sja1105_spi_ids);
|
|
|
|
|
2019-05-02 23:23:30 +03:00
|
|
|
static struct spi_driver sja1105_driver = {
|
|
|
|
.driver = {
|
|
|
|
.name = "sja1105",
|
|
|
|
.of_match_table = of_match_ptr(sja1105_dt_ids),
|
|
|
|
},
|
net: dsa: sja1105: silent spi_device_id warnings
Add spi_device_id entries to silent following warnings:
SPI driver sja1105 has no spi_device_id for nxp,sja1105e
SPI driver sja1105 has no spi_device_id for nxp,sja1105t
SPI driver sja1105 has no spi_device_id for nxp,sja1105p
SPI driver sja1105 has no spi_device_id for nxp,sja1105q
SPI driver sja1105 has no spi_device_id for nxp,sja1105r
SPI driver sja1105 has no spi_device_id for nxp,sja1105s
SPI driver sja1105 has no spi_device_id for nxp,sja1110a
SPI driver sja1105 has no spi_device_id for nxp,sja1110b
SPI driver sja1105 has no spi_device_id for nxp,sja1110c
SPI driver sja1105 has no spi_device_id for nxp,sja1110d
Fixes: 5fa6863ba692 ("spi: Check we have a spi_device_id for each DT compatible")
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20220717135831.2492844-1-o.rempel@pengutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2022-07-17 15:58:30 +02:00
|
|
|
.id_table = sja1105_spi_ids,
|
2019-05-02 23:23:30 +03:00
|
|
|
.probe = sja1105_probe,
|
|
|
|
.remove = sja1105_remove,
|
net: dsa: be compatible with masters which unregister on shutdown
Lino reports that on his system with bcmgenet as DSA master and KSZ9897
as a switch, rebooting or shutting down never works properly.
What does the bcmgenet driver have special to trigger this, that other
DSA masters do not? It has an implementation of ->shutdown which simply
calls its ->remove implementation. Otherwise said, it unregisters its
network interface on shutdown.
This message can be seen in a loop, and it hangs the reboot process there:
unregister_netdevice: waiting for eth0 to become free. Usage count = 3
So why 3?
A usage count of 1 is normal for a registered network interface, and any
virtual interface which links itself as an upper of that will increment
it via dev_hold. In the case of DSA, this is the call path:
dsa_slave_create
-> netdev_upper_dev_link
-> __netdev_upper_dev_link
-> __netdev_adjacent_dev_insert
-> dev_hold
So a DSA switch with 3 interfaces will result in a usage count elevated
by two, and netdev_wait_allrefs will wait until they have gone away.
Other stacked interfaces, like VLAN, watch NETDEV_UNREGISTER events and
delete themselves, but DSA cannot just vanish and go poof, at most it
can unbind itself from the switch devices, but that must happen strictly
earlier compared to when the DSA master unregisters its net_device, so
reacting on the NETDEV_UNREGISTER event is way too late.
It seems that it is a pretty established pattern to have a driver's
->shutdown hook redirect to its ->remove hook, so the same code is
executed regardless of whether the driver is unbound from the device, or
the system is just shutting down. As Florian puts it, it is quite a big
hammer for bcmgenet to unregister its net_device during shutdown, but
having a common code path with the driver unbind helps ensure it is well
tested.
So DSA, for better or for worse, has to live with that and engage in an
arms race of implementing the ->shutdown hook too, from all individual
drivers, and do something sane when paired with masters that unregister
their net_device there. The only sane thing to do, of course, is to
unlink from the master.
However, complications arise really quickly.
The pattern of redirecting ->shutdown to ->remove is not unique to
bcmgenet or even to net_device drivers. In fact, SPI controllers do it
too (see dspi_shutdown -> dspi_remove), and presumably, I2C controllers
and MDIO controllers do it too (this is something I have not researched
too deeply, but even if this is not the case today, it is certainly
plausible to happen in the future, and must be taken into consideration).
Since DSA switches might be SPI devices, I2C devices, MDIO devices, the
insane implication is that for the exact same DSA switch device, we
might have both ->shutdown and ->remove getting called.
So we need to do something with that insane environment. The pattern
I've come up with is "if this, then not that", so if either ->shutdown
or ->remove gets called, we set the device's drvdata to NULL, and in the
other hook, we check whether the drvdata is NULL and just do nothing.
This is probably not necessary for platform devices, just for devices on
buses, but I would really insist for consistency among drivers, because
when code is copy-pasted, it is not always copy-pasted from the best
sources.
So depending on whether the DSA switch's ->remove or ->shutdown will get
called first, we cannot really guarantee even for the same driver if
rebooting will result in the same code path on all platforms. But
nonetheless, we need to do something minimally reasonable on ->shutdown
too to fix the bug. Of course, the ->remove will do more (a full
teardown of the tree, with all data structures freed, and this is why
the bug was not caught for so long). The new ->shutdown method is kept
separate from dsa_unregister_switch not because we couldn't have
unregistered the switch, but simply in the interest of doing something
quick and to the point.
The big question is: does the DSA switch's ->shutdown get called earlier
than the DSA master's ->shutdown? If not, there is still a risk that we
might still trigger the WARN_ON in unregister_netdevice that says we are
attempting to unregister a net_device which has uppers. That's no good.
Although the reference to the master net_device won't physically go away
even if DSA's ->shutdown comes afterwards, remember we have a dev_hold
on it.
The answer to that question lies in this comment above device_link_add:
* A side effect of the link creation is re-ordering of dpm_list and the
* devices_kset list by moving the consumer device and all devices depending
* on it to the ends of these lists (that does not happen to devices that have
* not been registered when this function is called).
so the fact that DSA uses device_link_add towards its master is not
exactly for nothing. device_shutdown() walks devices_kset from the back,
so this is our guarantee that DSA's shutdown happens before the master's
shutdown.
Fixes: 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings")
Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/
Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-09-17 16:34:33 +03:00
|
|
|
.shutdown = sja1105_shutdown,
|
2019-05-02 23:23:30 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
module_spi_driver(sja1105_driver);
|
|
|
|
|
|
|
|
MODULE_AUTHOR("Vladimir Oltean <olteanv@gmail.com>");
|
|
|
|
MODULE_AUTHOR("Georg Waibel <georg.waibel@sensor-technik.de>");
|
|
|
|
MODULE_DESCRIPTION("SJA1105 Driver");
|
|
|
|
MODULE_LICENSE("GPL v2");
|