linux/drivers/net/dsa/microchip/ksz_common.c

475 lines
11 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Microchip switch driver main logic
*
* Copyright (C) 2017-2019 Microchip Technology Inc.
*/
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/gpio/consumer.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/platform_data/microchip-ksz.h>
#include <linux/phy.h>
#include <linux/etherdevice.h>
#include <linux/if_bridge.h>
#include <linux/of_net.h>
#include <net/dsa.h>
#include <net/switchdev.h>
#include "ksz_common.h"
void ksz_update_port_member(struct ksz_device *dev, int port)
{
struct ksz_port *p = &dev->ports[port];
struct dsa_switch *ds = dev->ds;
u8 port_member = 0, cpu_port;
const struct dsa_port *dp;
int i;
if (!dsa_is_user_port(ds, port))
return;
dp = dsa_to_port(ds, port);
cpu_port = BIT(dsa_upstream_port(ds, port));
for (i = 0; i < ds->num_ports; i++) {
const struct dsa_port *other_dp = dsa_to_port(ds, i);
struct ksz_port *other_p = &dev->ports[i];
u8 val = 0;
if (!dsa_is_user_port(ds, i))
continue;
if (port == i)
continue;
if (!dsa_port_bridge_same(dp, other_dp))
continue;
if (other_p->stp_state == BR_STATE_FORWARDING &&
p->stp_state == BR_STATE_FORWARDING) {
val |= BIT(port);
port_member |= BIT(i);
}
dev->dev_ops->cfg_port_member(dev, i, val | cpu_port);
}
dev->dev_ops->cfg_port_member(dev, port, port_member | cpu_port);
}
EXPORT_SYMBOL_GPL(ksz_update_port_member);
static void port_r_cnt(struct ksz_device *dev, int port)
{
struct ksz_port_mib *mib = &dev->ports[port].mib;
u64 *dropped;
/* Some ports may not have MIB counters before SWITCH_COUNTER_NUM. */
while (mib->cnt_ptr < dev->reg_mib_cnt) {
dev->dev_ops->r_mib_cnt(dev, port, mib->cnt_ptr,
&mib->counters[mib->cnt_ptr]);
++mib->cnt_ptr;
}
/* last one in storage */
dropped = &mib->counters[dev->mib_cnt];
/* Some ports may not have MIB counters after SWITCH_COUNTER_NUM. */
while (mib->cnt_ptr < dev->mib_cnt) {
dev->dev_ops->r_mib_pkt(dev, port, mib->cnt_ptr,
dropped, &mib->counters[mib->cnt_ptr]);
++mib->cnt_ptr;
}
mib->cnt_ptr = 0;
}
static void ksz_mib_read_work(struct work_struct *work)
{
struct ksz_device *dev = container_of(work, struct ksz_device,
mib_read.work);
struct ksz_port_mib *mib;
struct ksz_port *p;
int i;
for (i = 0; i < dev->port_cnt; i++) {
if (dsa_is_unused_port(dev->ds, i))
continue;
p = &dev->ports[i];
mib = &p->mib;
mutex_lock(&mib->cnt_mutex);
/* Only read MIB counters when the port is told to do.
* If not, read only dropped counters when link is not up.
*/
if (!p->read) {
const struct dsa_port *dp = dsa_to_port(dev->ds, i);
if (!netif_carrier_ok(dp->slave))
mib->cnt_ptr = dev->reg_mib_cnt;
}
port_r_cnt(dev, i);
p->read = false;
mutex_unlock(&mib->cnt_mutex);
}
schedule_delayed_work(&dev->mib_read, dev->mib_read_interval);
}
void ksz_init_mib_timer(struct ksz_device *dev)
{
int i;
INIT_DELAYED_WORK(&dev->mib_read, ksz_mib_read_work);
for (i = 0; i < dev->port_cnt; i++)
dev->dev_ops->port_init_cnt(dev, i);
}
EXPORT_SYMBOL_GPL(ksz_init_mib_timer);
int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg)
{
struct ksz_device *dev = ds->priv;
u16 val = 0xffff;
dev->dev_ops->r_phy(dev, addr, reg, &val);
return val;
}
EXPORT_SYMBOL_GPL(ksz_phy_read16);
int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val)
{
struct ksz_device *dev = ds->priv;
dev->dev_ops->w_phy(dev, addr, reg, val);
return 0;
}
EXPORT_SYMBOL_GPL(ksz_phy_write16);
void ksz_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode,
phy_interface_t interface)
{
struct ksz_device *dev = ds->priv;
struct ksz_port *p = &dev->ports[port];
/* Read all MIB counters when the link is going down. */
p->read = true;
net: dsa: microchip: fix race condition Between queuing the delayed work and finishing the setup of the dsa ports, the process may sleep in request_module() (via phy_device_create()) and the queued work may be executed prior to the switch net devices being registered. In ksz_mib_read_work(), a NULL dereference will happen within netof_carrier_ok(dp->slave). Not queuing the delayed work in ksz_init_mib_timer() makes things even worse because the work will now be queued for immediate execution (instead of 2000 ms) in ksz_mac_link_down() via dsa_port_link_register_of(). Call tree: ksz9477_i2c_probe() \--ksz9477_switch_register() \--ksz_switch_register() +--dsa_register_switch() | \--dsa_switch_probe() | \--dsa_tree_setup() | \--dsa_tree_setup_switches() | +--dsa_switch_setup() | | +--ksz9477_setup() | | | \--ksz_init_mib_timer() | | | |--/* Start the timer 2 seconds later. */ | | | \--schedule_delayed_work(&dev->mib_read, msecs_to_jiffies(2000)); | | \--__mdiobus_register() | | \--mdiobus_scan() | | \--get_phy_device() | | +--get_phy_id() | | \--phy_device_create() | | |--/* sleeping, ksz_mib_read_work() can be called meanwhile */ | | \--request_module() | | | \--dsa_port_setup() | +--/* Called for non-CPU ports */ | +--dsa_slave_create() | | +--/* Too late, ksz_mib_read_work() may be called beforehand */ | | \--port->slave = ... | ... | +--Called for CPU port */ | \--dsa_port_link_register_of() | \--ksz_mac_link_down() | +--/* mib_read must be initialized here */ | +--/* work is already scheduled, so it will be executed after 2000 ms */ | \--schedule_delayed_work(&dev->mib_read, 0); \-- /* here port->slave is setup properly, scheduling the delayed work should be safe */ Solution: 1. Do not queue (only initialize) delayed work in ksz_init_mib_timer(). 2. Only queue delayed work in ksz_mac_link_down() if init is completed. 3. Queue work once in ksz_switch_register(), after dsa_register_switch() has completed. Fixes: 7c6ff470aa86 ("net: dsa: microchip: add MIB counter reading support") Signed-off-by: Christian Eggers <ceggers@arri.de> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-12 10:39:42 +02:00
/* timer started */
if (dev->mib_read_interval)
schedule_delayed_work(&dev->mib_read, 0);
}
EXPORT_SYMBOL_GPL(ksz_mac_link_down);
int ksz_sset_count(struct dsa_switch *ds, int port, int sset)
{
struct ksz_device *dev = ds->priv;
if (sset != ETH_SS_STATS)
return 0;
return dev->mib_cnt;
}
EXPORT_SYMBOL_GPL(ksz_sset_count);
void ksz_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *buf)
{
const struct dsa_port *dp = dsa_to_port(ds, port);
struct ksz_device *dev = ds->priv;
struct ksz_port_mib *mib;
mib = &dev->ports[port].mib;
mutex_lock(&mib->cnt_mutex);
/* Only read dropped counters if no link. */
if (!netif_carrier_ok(dp->slave))
mib->cnt_ptr = dev->reg_mib_cnt;
port_r_cnt(dev, port);
memcpy(buf, mib->counters, dev->mib_cnt * sizeof(u64));
mutex_unlock(&mib->cnt_mutex);
}
EXPORT_SYMBOL_GPL(ksz_get_ethtool_stats);
int ksz_port_bridge_join(struct dsa_switch *ds, int port,
struct dsa_bridge bridge,
bool *tx_fwd_offload)
{
/* port_stp_state_set() will be called after to put the port in
* appropriate state so there is no need to do anything.
*/
return 0;
}
EXPORT_SYMBOL_GPL(ksz_port_bridge_join);
void ksz_port_bridge_leave(struct dsa_switch *ds, int port,
net: dsa: keep the bridge_dev and bridge_num as part of the same structure The main desire behind this is to provide coherent bridge information to the fast path without locking. For example, right now we set dp->bridge_dev and dp->bridge_num from separate code paths, it is theoretically possible for a packet transmission to read these two port properties consecutively and find a bridge number which does not correspond with the bridge device. Another desire is to start passing more complex bridge information to dsa_switch_ops functions. For example, with FDB isolation, it is expected that drivers will need to be passed the bridge which requested an FDB/MDB entry to be offloaded, and along with that bridge_dev, the associated bridge_num should be passed too, in case the driver might want to implement an isolation scheme based on that number. We already pass the {bridge_dev, bridge_num} pair to the TX forwarding offload switch API, however we'd like to remove that and squash it into the basic bridge join/leave API. So that means we need to pass this pair to the bridge join/leave API. During dsa_port_bridge_leave, first we unset dp->bridge_dev, then we call the driver's .port_bridge_leave with what used to be our dp->bridge_dev, but provided as an argument. When bridge_dev and bridge_num get folded into a single structure, we need to preserve this behavior in dsa_port_bridge_leave: we need a copy of what used to be in dp->bridge. Switch drivers check bridge membership by comparing dp->bridge_dev with the provided bridge_dev, but now, if we provide the struct dsa_bridge as a pointer, they cannot keep comparing dp->bridge to the provided pointer, since this only points to an on-stack copy. To make this obvious and prevent driver writers from forgetting and doing stupid things, in this new API, the struct dsa_bridge is provided as a full structure (not very large, contains an int and a pointer) instead of a pointer. An explicit comparison function needs to be used to determine bridge membership: dsa_port_offloads_bridge(). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-06 18:57:56 +02:00
struct dsa_bridge bridge)
{
/* port_stp_state_set() will be called after to put the port in
* forwarding state so there is no need to do anything.
*/
}
EXPORT_SYMBOL_GPL(ksz_port_bridge_leave);
void ksz_port_fast_age(struct dsa_switch *ds, int port)
{
struct ksz_device *dev = ds->priv;
dev->dev_ops->flush_dyn_mac_table(dev, port);
}
EXPORT_SYMBOL_GPL(ksz_port_fast_age);
int ksz_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb,
void *data)
{
struct ksz_device *dev = ds->priv;
int ret = 0;
u16 i = 0;
u16 entries = 0;
u8 timestamp = 0;
u8 fid;
u8 member;
struct alu_struct alu;
do {
alu.is_static = false;
ret = dev->dev_ops->r_dyn_mac_table(dev, i, alu.mac, &fid,
&member, &timestamp,
&entries);
if (!ret && (member & BIT(port))) {
ret = cb(alu.mac, alu.fid, alu.is_static, data);
if (ret)
break;
}
i++;
} while (i < entries);
if (i >= entries)
ret = 0;
return ret;
}
EXPORT_SYMBOL_GPL(ksz_port_fdb_dump);
int ksz_port_mdb_add(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_mdb *mdb)
{
struct ksz_device *dev = ds->priv;
struct alu_struct alu;
int index;
int empty = 0;
alu.port_forward = 0;
for (index = 0; index < dev->num_statics; index++) {
if (!dev->dev_ops->r_sta_mac_table(dev, index, &alu)) {
/* Found one already in static MAC table. */
if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
alu.fid == mdb->vid)
break;
/* Remember the first empty entry. */
} else if (!empty) {
empty = index + 1;
}
}
/* no available entry */
if (index == dev->num_statics && !empty)
return -ENOSPC;
/* add entry */
if (index == dev->num_statics) {
index = empty - 1;
memset(&alu, 0, sizeof(alu));
memcpy(alu.mac, mdb->addr, ETH_ALEN);
alu.is_static = true;
}
alu.port_forward |= BIT(port);
if (mdb->vid) {
alu.is_use_fid = true;
/* Need a way to map VID to FID. */
alu.fid = mdb->vid;
}
dev->dev_ops->w_sta_mac_table(dev, index, &alu);
return 0;
}
EXPORT_SYMBOL_GPL(ksz_port_mdb_add);
int ksz_port_mdb_del(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_mdb *mdb)
{
struct ksz_device *dev = ds->priv;
struct alu_struct alu;
int index;
for (index = 0; index < dev->num_statics; index++) {
if (!dev->dev_ops->r_sta_mac_table(dev, index, &alu)) {
/* Found one already in static MAC table. */
if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
alu.fid == mdb->vid)
break;
}
}
/* no available entry */
if (index == dev->num_statics)
goto exit;
/* clear port */
alu.port_forward &= ~BIT(port);
if (!alu.port_forward)
alu.is_static = false;
dev->dev_ops->w_sta_mac_table(dev, index, &alu);
exit:
return 0;
}
EXPORT_SYMBOL_GPL(ksz_port_mdb_del);
int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
{
struct ksz_device *dev = ds->priv;
if (!dsa_is_user_port(ds, port))
return 0;
/* setup slave port */
dev->dev_ops->port_setup(dev, port, false);
/* port_stp_state_set() will be called after to enable the port so
* there is no need to do anything.
*/
return 0;
}
EXPORT_SYMBOL_GPL(ksz_enable_port);
struct ksz_device *ksz_switch_alloc(struct device *base, void *priv)
{
struct dsa_switch *ds;
struct ksz_device *swdev;
ds = devm_kzalloc(base, sizeof(*ds), GFP_KERNEL);
if (!ds)
return NULL;
ds->dev = base;
ds->num_ports = DSA_MAX_PORTS;
swdev = devm_kzalloc(base, sizeof(*swdev), GFP_KERNEL);
if (!swdev)
return NULL;
ds->priv = swdev;
swdev->dev = base;
swdev->ds = ds;
swdev->priv = priv;
return swdev;
}
EXPORT_SYMBOL(ksz_switch_alloc);
int ksz_switch_register(struct ksz_device *dev,
const struct ksz_dev_ops *ops)
{
struct device_node *port, *ports;
phy_interface_t interface;
unsigned int port_num;
int ret;
if (dev->pdata)
dev->chip_id = dev->pdata->chip_id;
dev->reset_gpio = devm_gpiod_get_optional(dev->dev, "reset",
GPIOD_OUT_LOW);
if (IS_ERR(dev->reset_gpio))
return PTR_ERR(dev->reset_gpio);
if (dev->reset_gpio) {
gpiod_set_value_cansleep(dev->reset_gpio, 1);
usleep_range(10000, 12000);
gpiod_set_value_cansleep(dev->reset_gpio, 0);
msleep(100);
}
mutex_init(&dev->dev_mutex);
mutex_init(&dev->regmap_mutex);
mutex_init(&dev->alu_mutex);
mutex_init(&dev->vlan_mutex);
dev->dev_ops = ops;
if (dev->dev_ops->detect(dev))
return -EINVAL;
ret = dev->dev_ops->init(dev);
if (ret)
return ret;
/* Host port interface will be self detected, or specifically set in
* device tree.
*/
for (port_num = 0; port_num < dev->port_cnt; ++port_num)
dev->ports[port_num].interface = PHY_INTERFACE_MODE_NA;
if (dev->dev->of_node) {
ret = of_get_phy_mode(dev->dev->of_node, &interface);
if (ret == 0)
dev->compat_interface = interface;
ports = of_get_child_by_name(dev->dev->of_node, "ethernet-ports");
if (!ports)
ports = of_get_child_by_name(dev->dev->of_node, "ports");
if (ports)
for_each_available_child_of_node(ports, port) {
if (of_property_read_u32(port, "reg",
&port_num))
continue;
if (!(dev->port_mask & BIT(port_num))) {
of_node_put(port);
return -EINVAL;
}
of_get_phy_mode(port,
&dev->ports[port_num].interface);
}
dev->synclko_125 = of_property_read_bool(dev->dev->of_node,
"microchip,synclko-125");
}
ret = dsa_register_switch(dev->ds);
if (ret) {
dev->dev_ops->exit(dev);
return ret;
}
net: dsa: microchip: fix race condition Between queuing the delayed work and finishing the setup of the dsa ports, the process may sleep in request_module() (via phy_device_create()) and the queued work may be executed prior to the switch net devices being registered. In ksz_mib_read_work(), a NULL dereference will happen within netof_carrier_ok(dp->slave). Not queuing the delayed work in ksz_init_mib_timer() makes things even worse because the work will now be queued for immediate execution (instead of 2000 ms) in ksz_mac_link_down() via dsa_port_link_register_of(). Call tree: ksz9477_i2c_probe() \--ksz9477_switch_register() \--ksz_switch_register() +--dsa_register_switch() | \--dsa_switch_probe() | \--dsa_tree_setup() | \--dsa_tree_setup_switches() | +--dsa_switch_setup() | | +--ksz9477_setup() | | | \--ksz_init_mib_timer() | | | |--/* Start the timer 2 seconds later. */ | | | \--schedule_delayed_work(&dev->mib_read, msecs_to_jiffies(2000)); | | \--__mdiobus_register() | | \--mdiobus_scan() | | \--get_phy_device() | | +--get_phy_id() | | \--phy_device_create() | | |--/* sleeping, ksz_mib_read_work() can be called meanwhile */ | | \--request_module() | | | \--dsa_port_setup() | +--/* Called for non-CPU ports */ | +--dsa_slave_create() | | +--/* Too late, ksz_mib_read_work() may be called beforehand */ | | \--port->slave = ... | ... | +--Called for CPU port */ | \--dsa_port_link_register_of() | \--ksz_mac_link_down() | +--/* mib_read must be initialized here */ | +--/* work is already scheduled, so it will be executed after 2000 ms */ | \--schedule_delayed_work(&dev->mib_read, 0); \-- /* here port->slave is setup properly, scheduling the delayed work should be safe */ Solution: 1. Do not queue (only initialize) delayed work in ksz_init_mib_timer(). 2. Only queue delayed work in ksz_mac_link_down() if init is completed. 3. Queue work once in ksz_switch_register(), after dsa_register_switch() has completed. Fixes: 7c6ff470aa86 ("net: dsa: microchip: add MIB counter reading support") Signed-off-by: Christian Eggers <ceggers@arri.de> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-12 10:39:42 +02:00
/* Read MIB counters every 30 seconds to avoid overflow. */
dev->mib_read_interval = msecs_to_jiffies(30000);
/* Start the MIB timer. */
schedule_delayed_work(&dev->mib_read, 0);
return 0;
}
EXPORT_SYMBOL(ksz_switch_register);
void ksz_switch_remove(struct ksz_device *dev)
{
/* timer started */
if (dev->mib_read_interval) {
dev->mib_read_interval = 0;
cancel_delayed_work_sync(&dev->mib_read);
}
dev->dev_ops->exit(dev);
dsa_unregister_switch(dev->ds);
if (dev->reset_gpio)
gpiod_set_value_cansleep(dev->reset_gpio, 1);
}
EXPORT_SYMBOL(ksz_switch_remove);
MODULE_AUTHOR("Woojung Huh <Woojung.Huh@microchip.com>");
MODULE_DESCRIPTION("Microchip KSZ Series Switch DSA Driver");
MODULE_LICENSE("GPL");