linux/drivers/crypto/intel/qat/qat_common/adf_init.c

517 lines
13 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
/* Copyright(c) 2014 - 2020 Intel Corporation */
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/bitops.h>
#include <linux/delay.h>
#include "adf_accel_devices.h"
#include "adf_cfg.h"
#include "adf_common_drv.h"
#include "adf_dbgfs.h"
crypto: qat - add heartbeat feature Under some circumstances, firmware in the QAT devices could become unresponsive. The Heartbeat feature provides a mechanism to detect unresponsive devices. The QAT FW periodically writes to memory a set of counters that allow to detect the liveness of a device. This patch adds logic to enable the reporting of those counters, analyze them and report if a device is alive or not. In particular this adds (1) heartbeat enabling, reading and detection logic (2) reporting of heartbeat status and configuration via debugfs (3) documentation for the newly created sysfs entries (4) configuration of FW settings related to heartbeat, e.g. tick period (5) logic to convert time in ms (provided by the user) to clock ticks This patch introduces a new folder in debugfs called heartbeat with the following attributes: - status - queries_sent - queries_failed - config All attributes except config are reading only. In particular: - `status` file returns 0 when device is operational and -1 otherwise. - `queries_sent` returns the total number of heartbeat queries sent. - `queries_failed` returns the total number of heartbeat queries failed. - `config` allows to adjust the frequency at which the firmware writes counters to memory. This period is given in milliseconds and it is fixed for GEN4 devices. Signed-off-by: Damian Muszynski <damian.muszynski@intel.com> Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2023-06-30 19:03:57 +02:00
#include "adf_heartbeat.h"
#include "adf_rl.h"
#include "adf_sysfs_ras_counters.h"
crypto: qat - add support for device telemetry Expose through debugfs device telemetry data for QAT GEN4 devices. This allows to gather metrics about the performance and the utilization of a device. In particular, statistics on (1) the utilization of the PCIe channel, (2) address translation, when SVA is enabled and (3) the internal engines for crypto and data compression. If telemetry is supported by the firmware, the driver allocates a DMA region and a circular buffer. When telemetry is enabled, through the `control` attribute in debugfs, the driver sends to the firmware, via the admin interface, the `TL_START` command. This triggers the device to periodically gather telemetry data from hardware registers and write it into the DMA memory region. The device writes into the shared region every second. The driver, every 500ms, snapshots the DMA shared region into the circular buffer. This is then used to compute basic metric (min/max/average) on each counter, every time the `device_data` attribute is queried. Telemetry counters are exposed through debugfs in the folder /sys/kernel/debug/qat_<device>_<BDF>/telemetry. For details, refer to debugfs-driver-qat_telemetry in Documentation/ABI. This patch is based on earlier work done by Wojciech Ziemba. Signed-off-by: Lucas Segarra Fernandez <lucas.segarra.fernandez@intel.com> Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Reviewed-by: Damian Muszynski <damian.muszynski@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2023-12-22 11:35:07 +01:00
#include "adf_telemetry.h"
static LIST_HEAD(service_table);
static DEFINE_MUTEX(service_lock);
static void adf_service_add(struct service_hndl *service)
{
mutex_lock(&service_lock);
list_add(&service->list, &service_table);
mutex_unlock(&service_lock);
}
int adf_service_register(struct service_hndl *service)
{
memset(service->init_status, 0, sizeof(service->init_status));
memset(service->start_status, 0, sizeof(service->start_status));
adf_service_add(service);
return 0;
}
static void adf_service_remove(struct service_hndl *service)
{
mutex_lock(&service_lock);
list_del(&service->list);
mutex_unlock(&service_lock);
}
int adf_service_unregister(struct service_hndl *service)
{
int i;
for (i = 0; i < ARRAY_SIZE(service->init_status); i++) {
if (service->init_status[i] || service->start_status[i]) {
pr_err("QAT: Could not remove active service\n");
return -EFAULT;
}
}
adf_service_remove(service);
return 0;
}
/**
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-09 11:54:58 -08:00
* adf_dev_init() - Init data structures and services for the given accel device
* @accel_dev: Pointer to acceleration device.
*
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-09 11:54:58 -08:00
* Initialize the ring data structures and the admin comms and arbitration
* services.
*
* Return: 0 on success, error code otherwise.
*/
static int adf_dev_init(struct adf_accel_dev *accel_dev)
{
struct service_hndl *service;
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
int ret;
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-09 11:54:58 -08:00
if (!hw_data) {
dev_err(&GET_DEV(accel_dev),
"Failed to init device - hw_data not set\n");
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-09 11:54:58 -08:00
return -EFAULT;
}
if (!test_bit(ADF_STATUS_CONFIGURED, &accel_dev->status) &&
!accel_dev->is_vf) {
dev_err(&GET_DEV(accel_dev), "Device not configured\n");
return -EFAULT;
}
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-09 11:54:58 -08:00
if (adf_init_etr_data(accel_dev)) {
dev_err(&GET_DEV(accel_dev), "Failed initialize etr\n");
return -EFAULT;
}
crypto: qat - power up 4xxx device After reset or boot, QAT 4xxx devices are inactive and require to be explicitly activated. This is done by writing the DRV_ACTIVE bit in the PM_INTERRUPT register and polling the PM_INIT_STATE to make sure that the transaction has completed properly. If this is not done, the driver will fail the initialization sequence reporting the following message: [ 22.081193] 4xxx 0000:f7:00.0: enabling device (0140 -> 0142) [ 22.720285] QAT: AE0 is inactive!! [ 22.720287] QAT: failed to get device out of reset [ 22.720288] 4xxx 0000:f7:00.0: qat_hal_clr_reset error [ 22.720290] 4xxx 0000:f7:00.0: Failed to init the AEs [ 22.720290] 4xxx 0000:f7:00.0: Failed to initialise Acceleration Engine [ 22.720789] 4xxx 0000:f7:00.0: Resetting device qat_dev0 [ 22.825099] 4xxx: probe of 0000:f7:00.0 failed with error -14 The patch also temporarily disables the power management source of interrupt, to avoid possible spurious interrupts as the power management feature is not fully supported. The device init function has been added to adf_dev_init(), and not in the probe of 4xxx to make sure that the device is re-enabled in case of reset. Note that the error code reported by hw_data->init_device() in adf_dev_init() has been shadowed for consistency with the other calls in the same function. Fixes: 8c8268166e83 ("crypto: qat - add qat_4xxx driver") Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Reviewed-by: Wojciech Ziemba <wojciech.ziemba@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2021-09-16 15:45:41 +01:00
if (hw_data->init_device && hw_data->init_device(accel_dev)) {
dev_err(&GET_DEV(accel_dev), "Failed to initialize device\n");
return -EFAULT;
}
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-09 11:54:58 -08:00
if (hw_data->init_admin_comms && hw_data->init_admin_comms(accel_dev)) {
dev_err(&GET_DEV(accel_dev), "Failed initialize admin comms\n");
return -EFAULT;
}
if (hw_data->init_arb && hw_data->init_arb(accel_dev)) {
dev_err(&GET_DEV(accel_dev), "Failed initialize hw arbiter\n");
return -EFAULT;
}
if (hw_data->get_ring_to_svc_map)
hw_data->ring_to_svc_map = hw_data->get_ring_to_svc_map(accel_dev);
if (adf_ae_init(accel_dev)) {
dev_err(&GET_DEV(accel_dev),
"Failed to initialise Acceleration Engine\n");
return -EFAULT;
}
set_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status);
if (adf_ae_fw_load(accel_dev)) {
dev_err(&GET_DEV(accel_dev),
"Failed to load acceleration FW\n");
return -EFAULT;
}
set_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status);
if (hw_data->alloc_irq(accel_dev)) {
dev_err(&GET_DEV(accel_dev), "Failed to allocate interrupts\n");
return -EFAULT;
}
set_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status);
if (hw_data->ras_ops.enable_ras_errors)
hw_data->ras_ops.enable_ras_errors(accel_dev);
hw_data->enable_ints(accel_dev);
hw_data->enable_error_correction(accel_dev);
ret = hw_data->pfvf_ops.enable_comms(accel_dev);
if (ret)
return ret;
if (!test_bit(ADF_STATUS_CONFIGURED, &accel_dev->status) &&
accel_dev->is_vf) {
if (qat_crypto_vf_dev_config(accel_dev))
return -EFAULT;
}
crypto: qat - add heartbeat feature Under some circumstances, firmware in the QAT devices could become unresponsive. The Heartbeat feature provides a mechanism to detect unresponsive devices. The QAT FW periodically writes to memory a set of counters that allow to detect the liveness of a device. This patch adds logic to enable the reporting of those counters, analyze them and report if a device is alive or not. In particular this adds (1) heartbeat enabling, reading and detection logic (2) reporting of heartbeat status and configuration via debugfs (3) documentation for the newly created sysfs entries (4) configuration of FW settings related to heartbeat, e.g. tick period (5) logic to convert time in ms (provided by the user) to clock ticks This patch introduces a new folder in debugfs called heartbeat with the following attributes: - status - queries_sent - queries_failed - config All attributes except config are reading only. In particular: - `status` file returns 0 when device is operational and -1 otherwise. - `queries_sent` returns the total number of heartbeat queries sent. - `queries_failed` returns the total number of heartbeat queries failed. - `config` allows to adjust the frequency at which the firmware writes counters to memory. This period is given in milliseconds and it is fixed for GEN4 devices. Signed-off-by: Damian Muszynski <damian.muszynski@intel.com> Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2023-06-30 19:03:57 +02:00
adf_heartbeat_init(accel_dev);
ret = adf_rl_init(accel_dev);
if (ret && ret != -EOPNOTSUPP)
return ret;
crypto: qat - add heartbeat feature Under some circumstances, firmware in the QAT devices could become unresponsive. The Heartbeat feature provides a mechanism to detect unresponsive devices. The QAT FW periodically writes to memory a set of counters that allow to detect the liveness of a device. This patch adds logic to enable the reporting of those counters, analyze them and report if a device is alive or not. In particular this adds (1) heartbeat enabling, reading and detection logic (2) reporting of heartbeat status and configuration via debugfs (3) documentation for the newly created sysfs entries (4) configuration of FW settings related to heartbeat, e.g. tick period (5) logic to convert time in ms (provided by the user) to clock ticks This patch introduces a new folder in debugfs called heartbeat with the following attributes: - status - queries_sent - queries_failed - config All attributes except config are reading only. In particular: - `status` file returns 0 when device is operational and -1 otherwise. - `queries_sent` returns the total number of heartbeat queries sent. - `queries_failed` returns the total number of heartbeat queries failed. - `config` allows to adjust the frequency at which the firmware writes counters to memory. This period is given in milliseconds and it is fixed for GEN4 devices. Signed-off-by: Damian Muszynski <damian.muszynski@intel.com> Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2023-06-30 19:03:57 +02:00
crypto: qat - add support for device telemetry Expose through debugfs device telemetry data for QAT GEN4 devices. This allows to gather metrics about the performance and the utilization of a device. In particular, statistics on (1) the utilization of the PCIe channel, (2) address translation, when SVA is enabled and (3) the internal engines for crypto and data compression. If telemetry is supported by the firmware, the driver allocates a DMA region and a circular buffer. When telemetry is enabled, through the `control` attribute in debugfs, the driver sends to the firmware, via the admin interface, the `TL_START` command. This triggers the device to periodically gather telemetry data from hardware registers and write it into the DMA memory region. The device writes into the shared region every second. The driver, every 500ms, snapshots the DMA shared region into the circular buffer. This is then used to compute basic metric (min/max/average) on each counter, every time the `device_data` attribute is queried. Telemetry counters are exposed through debugfs in the folder /sys/kernel/debug/qat_<device>_<BDF>/telemetry. For details, refer to debugfs-driver-qat_telemetry in Documentation/ABI. This patch is based on earlier work done by Wojciech Ziemba. Signed-off-by: Lucas Segarra Fernandez <lucas.segarra.fernandez@intel.com> Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Reviewed-by: Damian Muszynski <damian.muszynski@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2023-12-22 11:35:07 +01:00
ret = adf_tl_init(accel_dev);
if (ret && ret != -EOPNOTSUPP)
return ret;
/*
* Subservice initialisation is divided into two stages: init and start.
* This is to facilitate any ordering dependencies between services
* prior to starting any of the accelerators.
*/
list_for_each_entry(service, &service_table, list) {
if (service->event_hld(accel_dev, ADF_EVENT_INIT)) {
dev_err(&GET_DEV(accel_dev),
"Failed to initialise service %s\n",
service->name);
return -EFAULT;
}
set_bit(accel_dev->accel_id, service->init_status);
}
return 0;
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-09 11:54:58 -08:00
}
/**
* adf_dev_start() - Start acceleration service for the given accel device
* @accel_dev: Pointer to acceleration device.
*
* Function notifies all the registered services that the acceleration device
* is ready to be used.
* To be used by QAT device specific drivers.
*
* Return: 0 on success, error code otherwise.
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-09 11:54:58 -08:00
*/
static int adf_dev_start(struct adf_accel_dev *accel_dev)
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-09 11:54:58 -08:00
{
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-09 11:54:58 -08:00
struct service_hndl *service;
int ret;
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-09 11:54:58 -08:00
set_bit(ADF_STATUS_STARTING, &accel_dev->status);
if (adf_ae_start(accel_dev)) {
dev_err(&GET_DEV(accel_dev), "AE Start Failed\n");
return -EFAULT;
}
set_bit(ADF_STATUS_AE_STARTED, &accel_dev->status);
if (hw_data->send_admin_init(accel_dev)) {
dev_err(&GET_DEV(accel_dev), "Failed to send init message\n");
return -EFAULT;
}
if (hw_data->measure_clock) {
ret = hw_data->measure_clock(accel_dev);
if (ret) {
dev_err(&GET_DEV(accel_dev), "Failed measure device clock\n");
return ret;
}
}
/* Set ssm watch dog timer */
if (hw_data->set_ssm_wdtimer)
hw_data->set_ssm_wdtimer(accel_dev);
/* Enable Power Management */
if (hw_data->enable_pm && hw_data->enable_pm(accel_dev)) {
dev_err(&GET_DEV(accel_dev), "Failed to configure Power Management\n");
return -EFAULT;
}
if (hw_data->start_timer) {
ret = hw_data->start_timer(accel_dev);
if (ret) {
dev_err(&GET_DEV(accel_dev), "Failed to start internal sync timer\n");
return ret;
}
}
crypto: qat - add heartbeat feature Under some circumstances, firmware in the QAT devices could become unresponsive. The Heartbeat feature provides a mechanism to detect unresponsive devices. The QAT FW periodically writes to memory a set of counters that allow to detect the liveness of a device. This patch adds logic to enable the reporting of those counters, analyze them and report if a device is alive or not. In particular this adds (1) heartbeat enabling, reading and detection logic (2) reporting of heartbeat status and configuration via debugfs (3) documentation for the newly created sysfs entries (4) configuration of FW settings related to heartbeat, e.g. tick period (5) logic to convert time in ms (provided by the user) to clock ticks This patch introduces a new folder in debugfs called heartbeat with the following attributes: - status - queries_sent - queries_failed - config All attributes except config are reading only. In particular: - `status` file returns 0 when device is operational and -1 otherwise. - `queries_sent` returns the total number of heartbeat queries sent. - `queries_failed` returns the total number of heartbeat queries failed. - `config` allows to adjust the frequency at which the firmware writes counters to memory. This period is given in milliseconds and it is fixed for GEN4 devices. Signed-off-by: Damian Muszynski <damian.muszynski@intel.com> Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2023-06-30 19:03:57 +02:00
adf_heartbeat_start(accel_dev);
ret = adf_rl_start(accel_dev);
if (ret && ret != -EOPNOTSUPP)
return ret;
crypto: qat - add heartbeat feature Under some circumstances, firmware in the QAT devices could become unresponsive. The Heartbeat feature provides a mechanism to detect unresponsive devices. The QAT FW periodically writes to memory a set of counters that allow to detect the liveness of a device. This patch adds logic to enable the reporting of those counters, analyze them and report if a device is alive or not. In particular this adds (1) heartbeat enabling, reading and detection logic (2) reporting of heartbeat status and configuration via debugfs (3) documentation for the newly created sysfs entries (4) configuration of FW settings related to heartbeat, e.g. tick period (5) logic to convert time in ms (provided by the user) to clock ticks This patch introduces a new folder in debugfs called heartbeat with the following attributes: - status - queries_sent - queries_failed - config All attributes except config are reading only. In particular: - `status` file returns 0 when device is operational and -1 otherwise. - `queries_sent` returns the total number of heartbeat queries sent. - `queries_failed` returns the total number of heartbeat queries failed. - `config` allows to adjust the frequency at which the firmware writes counters to memory. This period is given in milliseconds and it is fixed for GEN4 devices. Signed-off-by: Damian Muszynski <damian.muszynski@intel.com> Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2023-06-30 19:03:57 +02:00
crypto: qat - add support for device telemetry Expose through debugfs device telemetry data for QAT GEN4 devices. This allows to gather metrics about the performance and the utilization of a device. In particular, statistics on (1) the utilization of the PCIe channel, (2) address translation, when SVA is enabled and (3) the internal engines for crypto and data compression. If telemetry is supported by the firmware, the driver allocates a DMA region and a circular buffer. When telemetry is enabled, through the `control` attribute in debugfs, the driver sends to the firmware, via the admin interface, the `TL_START` command. This triggers the device to periodically gather telemetry data from hardware registers and write it into the DMA memory region. The device writes into the shared region every second. The driver, every 500ms, snapshots the DMA shared region into the circular buffer. This is then used to compute basic metric (min/max/average) on each counter, every time the `device_data` attribute is queried. Telemetry counters are exposed through debugfs in the folder /sys/kernel/debug/qat_<device>_<BDF>/telemetry. For details, refer to debugfs-driver-qat_telemetry in Documentation/ABI. This patch is based on earlier work done by Wojciech Ziemba. Signed-off-by: Lucas Segarra Fernandez <lucas.segarra.fernandez@intel.com> Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Reviewed-by: Damian Muszynski <damian.muszynski@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2023-12-22 11:35:07 +01:00
ret = adf_tl_start(accel_dev);
if (ret && ret != -EOPNOTSUPP)
return ret;
list_for_each_entry(service, &service_table, list) {
if (service->event_hld(accel_dev, ADF_EVENT_START)) {
dev_err(&GET_DEV(accel_dev),
"Failed to start service %s\n",
service->name);
return -EFAULT;
}
set_bit(accel_dev->accel_id, service->start_status);
}
clear_bit(ADF_STATUS_STARTING, &accel_dev->status);
set_bit(ADF_STATUS_STARTED, &accel_dev->status);
if (!list_empty(&accel_dev->crypto_list) &&
(qat_algs_register() || qat_asym_algs_register())) {
dev_err(&GET_DEV(accel_dev),
"Failed to register crypto algs\n");
set_bit(ADF_STATUS_STARTING, &accel_dev->status);
clear_bit(ADF_STATUS_STARTED, &accel_dev->status);
return -EFAULT;
}
set_bit(ADF_STATUS_CRYPTO_ALGS_REGISTERED, &accel_dev->status);
if (!list_empty(&accel_dev->compression_list) && qat_comp_algs_register()) {
dev_err(&GET_DEV(accel_dev),
"Failed to register compression algs\n");
set_bit(ADF_STATUS_STARTING, &accel_dev->status);
clear_bit(ADF_STATUS_STARTED, &accel_dev->status);
return -EFAULT;
}
set_bit(ADF_STATUS_COMP_ALGS_REGISTERED, &accel_dev->status);
adf_dbgfs_add(accel_dev);
adf_sysfs_start_ras(accel_dev);
return 0;
}
/**
* adf_dev_stop() - Stop acceleration service for the given accel device
* @accel_dev: Pointer to acceleration device.
*
* Function notifies all the registered services that the acceleration device
* is shuting down.
* To be used by QAT device specific drivers.
*
* Return: void
*/
static void adf_dev_stop(struct adf_accel_dev *accel_dev)
{
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
struct service_hndl *service;
bool wait = false;
int ret;
if (!adf_dev_started(accel_dev) &&
!test_bit(ADF_STATUS_STARTING, &accel_dev->status))
return;
crypto: qat - add support for device telemetry Expose through debugfs device telemetry data for QAT GEN4 devices. This allows to gather metrics about the performance and the utilization of a device. In particular, statistics on (1) the utilization of the PCIe channel, (2) address translation, when SVA is enabled and (3) the internal engines for crypto and data compression. If telemetry is supported by the firmware, the driver allocates a DMA region and a circular buffer. When telemetry is enabled, through the `control` attribute in debugfs, the driver sends to the firmware, via the admin interface, the `TL_START` command. This triggers the device to periodically gather telemetry data from hardware registers and write it into the DMA memory region. The device writes into the shared region every second. The driver, every 500ms, snapshots the DMA shared region into the circular buffer. This is then used to compute basic metric (min/max/average) on each counter, every time the `device_data` attribute is queried. Telemetry counters are exposed through debugfs in the folder /sys/kernel/debug/qat_<device>_<BDF>/telemetry. For details, refer to debugfs-driver-qat_telemetry in Documentation/ABI. This patch is based on earlier work done by Wojciech Ziemba. Signed-off-by: Lucas Segarra Fernandez <lucas.segarra.fernandez@intel.com> Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Reviewed-by: Damian Muszynski <damian.muszynski@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2023-12-22 11:35:07 +01:00
adf_tl_stop(accel_dev);
adf_rl_stop(accel_dev);
adf_dbgfs_rm(accel_dev);
adf_sysfs_stop_ras(accel_dev);
clear_bit(ADF_STATUS_STARTING, &accel_dev->status);
clear_bit(ADF_STATUS_STARTED, &accel_dev->status);
if (!list_empty(&accel_dev->crypto_list) &&
test_bit(ADF_STATUS_CRYPTO_ALGS_REGISTERED, &accel_dev->status)) {
qat_algs_unregister();
qat_asym_algs_unregister();
}
clear_bit(ADF_STATUS_CRYPTO_ALGS_REGISTERED, &accel_dev->status);
if (!list_empty(&accel_dev->compression_list) &&
test_bit(ADF_STATUS_COMP_ALGS_REGISTERED, &accel_dev->status))
qat_comp_algs_unregister();
clear_bit(ADF_STATUS_COMP_ALGS_REGISTERED, &accel_dev->status);
list_for_each_entry(service, &service_table, list) {
if (!test_bit(accel_dev->accel_id, service->start_status))
continue;
ret = service->event_hld(accel_dev, ADF_EVENT_STOP);
if (!ret) {
clear_bit(accel_dev->accel_id, service->start_status);
} else if (ret == -EAGAIN) {
wait = true;
clear_bit(accel_dev->accel_id, service->start_status);
}
}
if (hw_data->stop_timer)
hw_data->stop_timer(accel_dev);
hw_data->disable_iov(accel_dev);
if (wait)
msleep(100);
if (test_bit(ADF_STATUS_AE_STARTED, &accel_dev->status)) {
if (adf_ae_stop(accel_dev))
dev_err(&GET_DEV(accel_dev), "failed to stop AE\n");
else
clear_bit(ADF_STATUS_AE_STARTED, &accel_dev->status);
}
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-09 11:54:58 -08:00
}
/**
* adf_dev_shutdown() - shutdown acceleration services and data strucutures
* @accel_dev: Pointer to acceleration device
*
* Cleanup the ring data structures and the admin comms and arbitration
* services.
*/
static void adf_dev_shutdown(struct adf_accel_dev *accel_dev)
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-09 11:54:58 -08:00
{
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
struct service_hndl *service;
if (!hw_data) {
dev_err(&GET_DEV(accel_dev),
"QAT: Failed to shutdown device - hw_data not set\n");
return;
}
if (test_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status)) {
adf_ae_fw_release(accel_dev);
clear_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status);
}
if (test_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status)) {
if (adf_ae_shutdown(accel_dev))
dev_err(&GET_DEV(accel_dev),
"Failed to shutdown Accel Engine\n");
else
clear_bit(ADF_STATUS_AE_INITIALISED,
&accel_dev->status);
}
list_for_each_entry(service, &service_table, list) {
if (!test_bit(accel_dev->accel_id, service->init_status))
continue;
if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN))
dev_err(&GET_DEV(accel_dev),
"Failed to shutdown service %s\n",
service->name);
else
clear_bit(accel_dev->accel_id, service->init_status);
}
adf_rl_exit(accel_dev);
if (hw_data->ras_ops.disable_ras_errors)
hw_data->ras_ops.disable_ras_errors(accel_dev);
crypto: qat - add heartbeat feature Under some circumstances, firmware in the QAT devices could become unresponsive. The Heartbeat feature provides a mechanism to detect unresponsive devices. The QAT FW periodically writes to memory a set of counters that allow to detect the liveness of a device. This patch adds logic to enable the reporting of those counters, analyze them and report if a device is alive or not. In particular this adds (1) heartbeat enabling, reading and detection logic (2) reporting of heartbeat status and configuration via debugfs (3) documentation for the newly created sysfs entries (4) configuration of FW settings related to heartbeat, e.g. tick period (5) logic to convert time in ms (provided by the user) to clock ticks This patch introduces a new folder in debugfs called heartbeat with the following attributes: - status - queries_sent - queries_failed - config All attributes except config are reading only. In particular: - `status` file returns 0 when device is operational and -1 otherwise. - `queries_sent` returns the total number of heartbeat queries sent. - `queries_failed` returns the total number of heartbeat queries failed. - `config` allows to adjust the frequency at which the firmware writes counters to memory. This period is given in milliseconds and it is fixed for GEN4 devices. Signed-off-by: Damian Muszynski <damian.muszynski@intel.com> Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2023-06-30 19:03:57 +02:00
adf_heartbeat_shutdown(accel_dev);
crypto: qat - add support for device telemetry Expose through debugfs device telemetry data for QAT GEN4 devices. This allows to gather metrics about the performance and the utilization of a device. In particular, statistics on (1) the utilization of the PCIe channel, (2) address translation, when SVA is enabled and (3) the internal engines for crypto and data compression. If telemetry is supported by the firmware, the driver allocates a DMA region and a circular buffer. When telemetry is enabled, through the `control` attribute in debugfs, the driver sends to the firmware, via the admin interface, the `TL_START` command. This triggers the device to periodically gather telemetry data from hardware registers and write it into the DMA memory region. The device writes into the shared region every second. The driver, every 500ms, snapshots the DMA shared region into the circular buffer. This is then used to compute basic metric (min/max/average) on each counter, every time the `device_data` attribute is queried. Telemetry counters are exposed through debugfs in the folder /sys/kernel/debug/qat_<device>_<BDF>/telemetry. For details, refer to debugfs-driver-qat_telemetry in Documentation/ABI. This patch is based on earlier work done by Wojciech Ziemba. Signed-off-by: Lucas Segarra Fernandez <lucas.segarra.fernandez@intel.com> Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Reviewed-by: Damian Muszynski <damian.muszynski@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2023-12-22 11:35:07 +01:00
adf_tl_shutdown(accel_dev);
crypto: qat - add heartbeat feature Under some circumstances, firmware in the QAT devices could become unresponsive. The Heartbeat feature provides a mechanism to detect unresponsive devices. The QAT FW periodically writes to memory a set of counters that allow to detect the liveness of a device. This patch adds logic to enable the reporting of those counters, analyze them and report if a device is alive or not. In particular this adds (1) heartbeat enabling, reading and detection logic (2) reporting of heartbeat status and configuration via debugfs (3) documentation for the newly created sysfs entries (4) configuration of FW settings related to heartbeat, e.g. tick period (5) logic to convert time in ms (provided by the user) to clock ticks This patch introduces a new folder in debugfs called heartbeat with the following attributes: - status - queries_sent - queries_failed - config All attributes except config are reading only. In particular: - `status` file returns 0 when device is operational and -1 otherwise. - `queries_sent` returns the total number of heartbeat queries sent. - `queries_failed` returns the total number of heartbeat queries failed. - `config` allows to adjust the frequency at which the firmware writes counters to memory. This period is given in milliseconds and it is fixed for GEN4 devices. Signed-off-by: Damian Muszynski <damian.muszynski@intel.com> Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2023-06-30 19:03:57 +02:00
if (test_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status)) {
hw_data->free_irq(accel_dev);
clear_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status);
}
/* If not restarting, delete all cfg sections except for GENERAL */
if (!test_bit(ADF_STATUS_RESTARTING, &accel_dev->status))
adf_cfg_del_all_except(accel_dev, ADF_GENERAL_SEC);
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-09 11:54:58 -08:00
if (hw_data->exit_arb)
hw_data->exit_arb(accel_dev);
if (hw_data->exit_admin_comms)
hw_data->exit_admin_comms(accel_dev);
adf_cleanup_etr_data(accel_dev);
crypto: qat - flush misc workqueue during device shutdown Repeated loading and unloading of a device specific QAT driver, for example qat_4xxx, in a tight loop can lead to a crash due to a use-after-free scenario. This occurs when a power management (PM) interrupt triggers just before the device-specific driver (e.g., qat_4xxx.ko) is unloaded, while the core driver (intel_qat.ko) remains loaded. Since the driver uses a shared workqueue (`qat_misc_wq`) across all devices and owned by intel_qat.ko, a deferred routine from the device-specific driver may still be pending in the queue. If this routine executes after the driver is unloaded, it can dereference freed memory, resulting in a page fault and kernel crash like the following: BUG: unable to handle page fault for address: ffa000002e50a01c #PF: supervisor read access in kernel mode RIP: 0010:pm_bh_handler+0x1d2/0x250 [intel_qat] Call Trace: pm_bh_handler+0x1d2/0x250 [intel_qat] process_one_work+0x171/0x340 worker_thread+0x277/0x3a0 kthread+0xf0/0x120 ret_from_fork+0x2d/0x50 To prevent this, flush the misc workqueue during device shutdown to ensure that all pending work items are completed before the driver is unloaded. Note: This approach may slightly increase shutdown latency if the workqueue contains jobs from other devices, but it ensures correctness and stability. Fixes: e5745f34113b ("crypto: qat - enable power management for QAT GEN4") Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Cc: stable@vger.kernel.org Reviewed-by: Ahsan Atta <ahsan.atta@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-07-11 13:27:43 +01:00
adf_misc_wq_flush();
adf_dev_restore(accel_dev);
}
int adf_dev_restarting_notify(struct adf_accel_dev *accel_dev)
{
struct service_hndl *service;
list_for_each_entry(service, &service_table, list) {
if (service->event_hld(accel_dev, ADF_EVENT_RESTARTING))
dev_err(&GET_DEV(accel_dev),
"Failed to restart service %s.\n",
service->name);
}
return 0;
}
int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev)
{
struct service_hndl *service;
list_for_each_entry(service, &service_table, list) {
if (service->event_hld(accel_dev, ADF_EVENT_RESTARTED))
dev_err(&GET_DEV(accel_dev),
"Failed to restart service %s.\n",
service->name);
}
return 0;
}
void adf_error_notifier(struct adf_accel_dev *accel_dev)
{
struct service_hndl *service;
list_for_each_entry(service, &service_table, list) {
if (service->event_hld(accel_dev, ADF_EVENT_FATAL_ERROR))
dev_err(&GET_DEV(accel_dev),
"Failed to send error event to %s.\n",
service->name);
}
}
int adf_dev_down(struct adf_accel_dev *accel_dev)
{
int ret = 0;
if (!accel_dev)
return -EINVAL;
mutex_lock(&accel_dev->state_lock);
adf_dev_stop(accel_dev);
adf_dev_shutdown(accel_dev);
mutex_unlock(&accel_dev->state_lock);
return ret;
}
EXPORT_SYMBOL_GPL(adf_dev_down);
int adf_dev_up(struct adf_accel_dev *accel_dev, bool config)
{
int ret = 0;
if (!accel_dev)
return -EINVAL;
mutex_lock(&accel_dev->state_lock);
if (adf_dev_started(accel_dev)) {
dev_info(&GET_DEV(accel_dev), "Device qat_dev%d already up\n",
accel_dev->accel_id);
ret = -EALREADY;
goto out;
}
if (config && GET_HW_DATA(accel_dev)->dev_config) {
ret = GET_HW_DATA(accel_dev)->dev_config(accel_dev);
if (unlikely(ret))
goto out;
}
ret = adf_dev_init(accel_dev);
if (unlikely(ret))
goto out;
ret = adf_dev_start(accel_dev);
out:
mutex_unlock(&accel_dev->state_lock);
return ret;
}
EXPORT_SYMBOL_GPL(adf_dev_up);
int adf_dev_restart(struct adf_accel_dev *accel_dev)
{
int ret = 0;
if (!accel_dev)
return -EFAULT;
adf_dev_down(accel_dev);
ret = adf_dev_up(accel_dev, false);
/* if device is already up return success*/
if (ret == -EALREADY)
return 0;
return ret;
}
EXPORT_SYMBOL_GPL(adf_dev_restart);