linux/drivers/net/ethernet/intel/idpf/idpf_main.c
Marco Leogrande 9a5b021cb8 idpf: convert workqueues to unbound
When a workqueue is created with `WQ_UNBOUND`, its work items are
served by special worker-pools, whose host workers are not bound to
any specific CPU. In the default configuration (i.e. when
`queue_delayed_work` and friends do not specify which CPU to run the
work item on), `WQ_UNBOUND` allows the work item to be executed on any
CPU in the same node of the CPU it was enqueued on. While this
solution potentially sacrifices locality, it avoids contention with
other processes that might dominate the CPU time of the processor the
work item was scheduled on.

This is not just a theoretical problem: in a particular scenario
misconfigured process was hogging most of the time from CPU0, leaving
less than 0.5% of its CPU time to the kworker. The IDPF workqueues
that were using the kworker on CPU0 suffered large completion delays
as a result, causing performance degradation, timeouts and eventual
system crash.

Tested:

* I have also run a manual test to gauge the performance
  improvement. The test consists of an antagonist process
  (`./stress --cpu 2`) consuming as much of CPU 0 as possible. This
  process is run under `taskset 01` to bind it to CPU0, and its
  priority is changed with `chrt -pQ 9900 10000 ${pid}` and
  `renice -n -20 ${pid}` after start.

  Then, the IDPF driver is forced to prefer CPU0 by editing all calls
  to `queue_delayed_work`, `mod_delayed_work`, etc... to use CPU 0.

  Finally, `ktraces` for the workqueue events are collected.

  Without the current patch, the antagonist process can force
  arbitrary delays between `workqueue_queue_work` and
  `workqueue_execute_start`, that in my tests were as high as
  `30ms`. With the current patch applied, the workqueue can be
  migrated to another unloaded CPU in the same node, and, keeping
  everything else equal, the maximum delay I could see was `6us`.

Fixes: 0fe45467a1 ("idpf: add create vport and netdev configuration")
Signed-off-by: Marco Leogrande <leogrande@google.com>
Signed-off-by: Manoj Vishwanathan <manojvishy@google.com>
Signed-off-by: Brian Vazquez <brianvv@google.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Pavan Kumar Linga <pavan.kumar.linga@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2025-01-24 10:03:39 -08:00

287 lines
7.3 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (C) 2023 Intel Corporation */
#include "idpf.h"
#include "idpf_devids.h"
#include "idpf_virtchnl.h"
#define DRV_SUMMARY "Intel(R) Infrastructure Data Path Function Linux Driver"
MODULE_DESCRIPTION(DRV_SUMMARY);
MODULE_IMPORT_NS("LIBETH");
MODULE_LICENSE("GPL");
/**
* idpf_remove - Device removal routine
* @pdev: PCI device information struct
*/
static void idpf_remove(struct pci_dev *pdev)
{
struct idpf_adapter *adapter = pci_get_drvdata(pdev);
int i;
set_bit(IDPF_REMOVE_IN_PROG, adapter->flags);
/* Wait until vc_event_task is done to consider if any hard reset is
* in progress else we may go ahead and release the resources but the
* thread doing the hard reset might continue the init path and
* end up in bad state.
*/
cancel_delayed_work_sync(&adapter->vc_event_task);
if (adapter->num_vfs)
idpf_sriov_configure(pdev, 0);
idpf_vc_core_deinit(adapter);
/* Be a good citizen and leave the device clean on exit */
adapter->dev_ops.reg_ops.trigger_reset(adapter, IDPF_HR_FUNC_RESET);
idpf_deinit_dflt_mbx(adapter);
if (!adapter->netdevs)
goto destroy_wqs;
/* There are some cases where it's possible to still have netdevs
* registered with the stack at this point, e.g. if the driver detected
* a HW reset and rmmod is called before it fully recovers. Unregister
* any stale netdevs here.
*/
for (i = 0; i < adapter->max_vports; i++) {
if (!adapter->netdevs[i])
continue;
if (adapter->netdevs[i]->reg_state != NETREG_UNINITIALIZED)
unregister_netdev(adapter->netdevs[i]);
free_netdev(adapter->netdevs[i]);
adapter->netdevs[i] = NULL;
}
destroy_wqs:
destroy_workqueue(adapter->init_wq);
destroy_workqueue(adapter->serv_wq);
destroy_workqueue(adapter->mbx_wq);
destroy_workqueue(adapter->stats_wq);
destroy_workqueue(adapter->vc_event_wq);
for (i = 0; i < adapter->max_vports; i++) {
kfree(adapter->vport_config[i]);
adapter->vport_config[i] = NULL;
}
kfree(adapter->vport_config);
adapter->vport_config = NULL;
kfree(adapter->netdevs);
adapter->netdevs = NULL;
kfree(adapter->vcxn_mngr);
adapter->vcxn_mngr = NULL;
mutex_destroy(&adapter->vport_ctrl_lock);
mutex_destroy(&adapter->vector_lock);
mutex_destroy(&adapter->queue_lock);
mutex_destroy(&adapter->vc_buf_lock);
pci_set_drvdata(pdev, NULL);
kfree(adapter);
}
/**
* idpf_shutdown - PCI callback for shutting down device
* @pdev: PCI device information struct
*/
static void idpf_shutdown(struct pci_dev *pdev)
{
idpf_remove(pdev);
if (system_state == SYSTEM_POWER_OFF)
pci_set_power_state(pdev, PCI_D3hot);
}
/**
* idpf_cfg_hw - Initialize HW struct
* @adapter: adapter to setup hw struct for
*
* Returns 0 on success, negative on failure
*/
static int idpf_cfg_hw(struct idpf_adapter *adapter)
{
struct pci_dev *pdev = adapter->pdev;
struct idpf_hw *hw = &adapter->hw;
hw->hw_addr = pcim_iomap_table(pdev)[0];
if (!hw->hw_addr) {
pci_err(pdev, "failed to allocate PCI iomap table\n");
return -ENOMEM;
}
hw->back = adapter;
return 0;
}
/**
* idpf_probe - Device initialization routine
* @pdev: PCI device information struct
* @ent: entry in idpf_pci_tbl
*
* Returns 0 on success, negative on failure
*/
static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct device *dev = &pdev->dev;
struct idpf_adapter *adapter;
int err;
adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
if (!adapter)
return -ENOMEM;
adapter->req_tx_splitq = true;
adapter->req_rx_splitq = true;
switch (ent->device) {
case IDPF_DEV_ID_PF:
idpf_dev_ops_init(adapter);
break;
case IDPF_DEV_ID_VF:
idpf_vf_dev_ops_init(adapter);
adapter->crc_enable = true;
break;
default:
err = -ENODEV;
dev_err(&pdev->dev, "Unexpected dev ID 0x%x in idpf probe\n",
ent->device);
goto err_free;
}
adapter->pdev = pdev;
err = pcim_enable_device(pdev);
if (err)
goto err_free;
err = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev));
if (err) {
pci_err(pdev, "pcim_iomap_regions failed %pe\n", ERR_PTR(err));
goto err_free;
}
/* set up for high or low dma */
err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (err) {
pci_err(pdev, "DMA configuration failed: %pe\n", ERR_PTR(err));
goto err_free;
}
pci_set_master(pdev);
pci_set_drvdata(pdev, adapter);
adapter->init_wq = alloc_workqueue("%s-%s-init",
WQ_UNBOUND | WQ_MEM_RECLAIM, 0,
dev_driver_string(dev),
dev_name(dev));
if (!adapter->init_wq) {
dev_err(dev, "Failed to allocate init workqueue\n");
err = -ENOMEM;
goto err_free;
}
adapter->serv_wq = alloc_workqueue("%s-%s-service",
WQ_UNBOUND | WQ_MEM_RECLAIM, 0,
dev_driver_string(dev),
dev_name(dev));
if (!adapter->serv_wq) {
dev_err(dev, "Failed to allocate service workqueue\n");
err = -ENOMEM;
goto err_serv_wq_alloc;
}
adapter->mbx_wq = alloc_workqueue("%s-%s-mbx",
WQ_UNBOUND | WQ_MEM_RECLAIM, 0,
dev_driver_string(dev),
dev_name(dev));
if (!adapter->mbx_wq) {
dev_err(dev, "Failed to allocate mailbox workqueue\n");
err = -ENOMEM;
goto err_mbx_wq_alloc;
}
adapter->stats_wq = alloc_workqueue("%s-%s-stats",
WQ_UNBOUND | WQ_MEM_RECLAIM, 0,
dev_driver_string(dev),
dev_name(dev));
if (!adapter->stats_wq) {
dev_err(dev, "Failed to allocate workqueue\n");
err = -ENOMEM;
goto err_stats_wq_alloc;
}
adapter->vc_event_wq = alloc_workqueue("%s-%s-vc_event",
WQ_UNBOUND | WQ_MEM_RECLAIM, 0,
dev_driver_string(dev),
dev_name(dev));
if (!adapter->vc_event_wq) {
dev_err(dev, "Failed to allocate virtchnl event workqueue\n");
err = -ENOMEM;
goto err_vc_event_wq_alloc;
}
/* setup msglvl */
adapter->msg_enable = netif_msg_init(-1, IDPF_AVAIL_NETIF_M);
err = idpf_cfg_hw(adapter);
if (err) {
dev_err(dev, "Failed to configure HW structure for adapter: %d\n",
err);
goto err_cfg_hw;
}
mutex_init(&adapter->vport_ctrl_lock);
mutex_init(&adapter->vector_lock);
mutex_init(&adapter->queue_lock);
mutex_init(&adapter->vc_buf_lock);
INIT_DELAYED_WORK(&adapter->init_task, idpf_init_task);
INIT_DELAYED_WORK(&adapter->serv_task, idpf_service_task);
INIT_DELAYED_WORK(&adapter->mbx_task, idpf_mbx_task);
INIT_DELAYED_WORK(&adapter->stats_task, idpf_statistics_task);
INIT_DELAYED_WORK(&adapter->vc_event_task, idpf_vc_event_task);
adapter->dev_ops.reg_ops.reset_reg_init(adapter);
set_bit(IDPF_HR_DRV_LOAD, adapter->flags);
queue_delayed_work(adapter->vc_event_wq, &adapter->vc_event_task,
msecs_to_jiffies(10 * (pdev->devfn & 0x07)));
return 0;
err_cfg_hw:
destroy_workqueue(adapter->vc_event_wq);
err_vc_event_wq_alloc:
destroy_workqueue(adapter->stats_wq);
err_stats_wq_alloc:
destroy_workqueue(adapter->mbx_wq);
err_mbx_wq_alloc:
destroy_workqueue(adapter->serv_wq);
err_serv_wq_alloc:
destroy_workqueue(adapter->init_wq);
err_free:
kfree(adapter);
return err;
}
/* idpf_pci_tbl - PCI Dev idpf ID Table
*/
static const struct pci_device_id idpf_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, IDPF_DEV_ID_PF)},
{ PCI_VDEVICE(INTEL, IDPF_DEV_ID_VF)},
{ /* Sentinel */ }
};
MODULE_DEVICE_TABLE(pci, idpf_pci_tbl);
static struct pci_driver idpf_driver = {
.name = KBUILD_MODNAME,
.id_table = idpf_pci_tbl,
.probe = idpf_probe,
.sriov_configure = idpf_sriov_configure,
.remove = idpf_remove,
.shutdown = idpf_shutdown,
};
module_pci_driver(idpf_driver);