linux/drivers/pci/hotplug/pnv_php.c

1252 lines
30 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0+
/*
* PCI Hotplug Driver for PowerPC PowerNV platform.
*
* Copyright Gavin Shan, IBM Corporation 2016.
PCI: pnv_php: Clean up allocated IRQs on unplug When the root of a nested PCIe bridge configuration is unplugged, the pnv_php driver leaked the allocated IRQ resources for the child bridges' hotplug event notifications, resulting in a panic. Fix this by walking all child buses and deallocating all its IRQ resources before calling pci_hp_remove_devices(). Also modify the lifetime of the workqueue at struct pnv_php_slot::wq so that it is only destroyed in pnv_php_free_slot(), instead of pnv_php_disable_irq(). This is required since pnv_php_disable_irq() will now be called by workers triggered by hot unplug interrupts, so the workqueue needs to stay allocated. The abridged kernel panic that occurs without this patch is as follows: WARNING: CPU: 0 PID: 687 at kernel/irq/msi.c:292 msi_device_data_release+0x6c/0x9c CPU: 0 UID: 0 PID: 687 Comm: bash Not tainted 6.14.0-rc5+ #2 Call Trace: msi_device_data_release+0x34/0x9c (unreliable) release_nodes+0x64/0x13c devres_release_all+0xc0/0x140 device_del+0x2d4/0x46c pci_destroy_dev+0x5c/0x194 pci_hp_remove_devices+0x90/0x128 pci_hp_remove_devices+0x44/0x128 pnv_php_disable_slot+0x54/0xd4 power_write_file+0xf8/0x18c pci_slot_attr_store+0x40/0x5c sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x3bc/0x50c ksys_write+0x84/0x140 system_call_exception+0x124/0x230 system_call_vectored_common+0x15c/0x2ec Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/2013845045.1359852.1752615367790.JavaMail.zimbra@raptorengineeringinc.com
2025-07-15 16:36:07 -05:00
* Copyright (C) 2025 Raptor Engineering, LLC
* Copyright (C) 2025 Raptor Computing Systems, LLC
*/
#include <linux/bitfield.h>
#include <linux/libfdt.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/pci_hotplug.h>
#include <linux/of_fdt.h>
#include <asm/opal.h>
#include <asm/pnv-pci.h>
#include <asm/ppc-pci.h>
#define DRIVER_VERSION "0.1"
#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation"
#define DRIVER_DESC "PowerPC PowerNV PCI Hotplug Driver"
#define SLOT_WARN(sl, x...) \
((sl)->pdev ? pci_warn((sl)->pdev, x) : dev_warn(&(sl)->bus->dev, x))
struct pnv_php_event {
bool added;
struct pnv_php_slot *php_slot;
struct work_struct work;
};
static LIST_HEAD(pnv_php_slot_list);
static DEFINE_SPINLOCK(pnv_php_lock);
static void pnv_php_register(struct device_node *dn);
static void pnv_php_unregister_one(struct device_node *dn);
static void pnv_php_unregister(struct device_node *dn);
PCI: pnv_php: Clean up allocated IRQs on unplug When the root of a nested PCIe bridge configuration is unplugged, the pnv_php driver leaked the allocated IRQ resources for the child bridges' hotplug event notifications, resulting in a panic. Fix this by walking all child buses and deallocating all its IRQ resources before calling pci_hp_remove_devices(). Also modify the lifetime of the workqueue at struct pnv_php_slot::wq so that it is only destroyed in pnv_php_free_slot(), instead of pnv_php_disable_irq(). This is required since pnv_php_disable_irq() will now be called by workers triggered by hot unplug interrupts, so the workqueue needs to stay allocated. The abridged kernel panic that occurs without this patch is as follows: WARNING: CPU: 0 PID: 687 at kernel/irq/msi.c:292 msi_device_data_release+0x6c/0x9c CPU: 0 UID: 0 PID: 687 Comm: bash Not tainted 6.14.0-rc5+ #2 Call Trace: msi_device_data_release+0x34/0x9c (unreliable) release_nodes+0x64/0x13c devres_release_all+0xc0/0x140 device_del+0x2d4/0x46c pci_destroy_dev+0x5c/0x194 pci_hp_remove_devices+0x90/0x128 pci_hp_remove_devices+0x44/0x128 pnv_php_disable_slot+0x54/0xd4 power_write_file+0xf8/0x18c pci_slot_attr_store+0x40/0x5c sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x3bc/0x50c ksys_write+0x84/0x140 system_call_exception+0x124/0x230 system_call_vectored_common+0x15c/0x2ec Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/2013845045.1359852.1752615367790.JavaMail.zimbra@raptorengineeringinc.com
2025-07-15 16:36:07 -05:00
static void pnv_php_enable_irq(struct pnv_php_slot *php_slot);
static void pnv_php_disable_irq(struct pnv_php_slot *php_slot,
PCI: pnv_php: Clean up allocated IRQs on unplug When the root of a nested PCIe bridge configuration is unplugged, the pnv_php driver leaked the allocated IRQ resources for the child bridges' hotplug event notifications, resulting in a panic. Fix this by walking all child buses and deallocating all its IRQ resources before calling pci_hp_remove_devices(). Also modify the lifetime of the workqueue at struct pnv_php_slot::wq so that it is only destroyed in pnv_php_free_slot(), instead of pnv_php_disable_irq(). This is required since pnv_php_disable_irq() will now be called by workers triggered by hot unplug interrupts, so the workqueue needs to stay allocated. The abridged kernel panic that occurs without this patch is as follows: WARNING: CPU: 0 PID: 687 at kernel/irq/msi.c:292 msi_device_data_release+0x6c/0x9c CPU: 0 UID: 0 PID: 687 Comm: bash Not tainted 6.14.0-rc5+ #2 Call Trace: msi_device_data_release+0x34/0x9c (unreliable) release_nodes+0x64/0x13c devres_release_all+0xc0/0x140 device_del+0x2d4/0x46c pci_destroy_dev+0x5c/0x194 pci_hp_remove_devices+0x90/0x128 pci_hp_remove_devices+0x44/0x128 pnv_php_disable_slot+0x54/0xd4 power_write_file+0xf8/0x18c pci_slot_attr_store+0x40/0x5c sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x3bc/0x50c ksys_write+0x84/0x140 system_call_exception+0x124/0x230 system_call_vectored_common+0x15c/0x2ec Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/2013845045.1359852.1752615367790.JavaMail.zimbra@raptorengineeringinc.com
2025-07-15 16:36:07 -05:00
bool disable_device, bool disable_msi)
{
struct pci_dev *pdev = php_slot->pdev;
u16 ctrl;
if (php_slot->irq > 0) {
pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl);
ctrl &= ~(PCI_EXP_SLTCTL_HPIE |
PCI_EXP_SLTCTL_PDCE |
PCI_EXP_SLTCTL_DLLSCE);
pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl);
free_irq(php_slot->irq, php_slot);
php_slot->irq = 0;
}
PCI: pnv_php: Clean up allocated IRQs on unplug When the root of a nested PCIe bridge configuration is unplugged, the pnv_php driver leaked the allocated IRQ resources for the child bridges' hotplug event notifications, resulting in a panic. Fix this by walking all child buses and deallocating all its IRQ resources before calling pci_hp_remove_devices(). Also modify the lifetime of the workqueue at struct pnv_php_slot::wq so that it is only destroyed in pnv_php_free_slot(), instead of pnv_php_disable_irq(). This is required since pnv_php_disable_irq() will now be called by workers triggered by hot unplug interrupts, so the workqueue needs to stay allocated. The abridged kernel panic that occurs without this patch is as follows: WARNING: CPU: 0 PID: 687 at kernel/irq/msi.c:292 msi_device_data_release+0x6c/0x9c CPU: 0 UID: 0 PID: 687 Comm: bash Not tainted 6.14.0-rc5+ #2 Call Trace: msi_device_data_release+0x34/0x9c (unreliable) release_nodes+0x64/0x13c devres_release_all+0xc0/0x140 device_del+0x2d4/0x46c pci_destroy_dev+0x5c/0x194 pci_hp_remove_devices+0x90/0x128 pci_hp_remove_devices+0x44/0x128 pnv_php_disable_slot+0x54/0xd4 power_write_file+0xf8/0x18c pci_slot_attr_store+0x40/0x5c sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x3bc/0x50c ksys_write+0x84/0x140 system_call_exception+0x124/0x230 system_call_vectored_common+0x15c/0x2ec Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/2013845045.1359852.1752615367790.JavaMail.zimbra@raptorengineeringinc.com
2025-07-15 16:36:07 -05:00
if (disable_device || disable_msi) {
if (pdev->msix_enabled)
pci_disable_msix(pdev);
else if (pdev->msi_enabled)
pci_disable_msi(pdev);
PCI: pnv_php: Clean up allocated IRQs on unplug When the root of a nested PCIe bridge configuration is unplugged, the pnv_php driver leaked the allocated IRQ resources for the child bridges' hotplug event notifications, resulting in a panic. Fix this by walking all child buses and deallocating all its IRQ resources before calling pci_hp_remove_devices(). Also modify the lifetime of the workqueue at struct pnv_php_slot::wq so that it is only destroyed in pnv_php_free_slot(), instead of pnv_php_disable_irq(). This is required since pnv_php_disable_irq() will now be called by workers triggered by hot unplug interrupts, so the workqueue needs to stay allocated. The abridged kernel panic that occurs without this patch is as follows: WARNING: CPU: 0 PID: 687 at kernel/irq/msi.c:292 msi_device_data_release+0x6c/0x9c CPU: 0 UID: 0 PID: 687 Comm: bash Not tainted 6.14.0-rc5+ #2 Call Trace: msi_device_data_release+0x34/0x9c (unreliable) release_nodes+0x64/0x13c devres_release_all+0xc0/0x140 device_del+0x2d4/0x46c pci_destroy_dev+0x5c/0x194 pci_hp_remove_devices+0x90/0x128 pci_hp_remove_devices+0x44/0x128 pnv_php_disable_slot+0x54/0xd4 power_write_file+0xf8/0x18c pci_slot_attr_store+0x40/0x5c sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x3bc/0x50c ksys_write+0x84/0x140 system_call_exception+0x124/0x230 system_call_vectored_common+0x15c/0x2ec Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/2013845045.1359852.1752615367790.JavaMail.zimbra@raptorengineeringinc.com
2025-07-15 16:36:07 -05:00
}
PCI: pnv_php: Clean up allocated IRQs on unplug When the root of a nested PCIe bridge configuration is unplugged, the pnv_php driver leaked the allocated IRQ resources for the child bridges' hotplug event notifications, resulting in a panic. Fix this by walking all child buses and deallocating all its IRQ resources before calling pci_hp_remove_devices(). Also modify the lifetime of the workqueue at struct pnv_php_slot::wq so that it is only destroyed in pnv_php_free_slot(), instead of pnv_php_disable_irq(). This is required since pnv_php_disable_irq() will now be called by workers triggered by hot unplug interrupts, so the workqueue needs to stay allocated. The abridged kernel panic that occurs without this patch is as follows: WARNING: CPU: 0 PID: 687 at kernel/irq/msi.c:292 msi_device_data_release+0x6c/0x9c CPU: 0 UID: 0 PID: 687 Comm: bash Not tainted 6.14.0-rc5+ #2 Call Trace: msi_device_data_release+0x34/0x9c (unreliable) release_nodes+0x64/0x13c devres_release_all+0xc0/0x140 device_del+0x2d4/0x46c pci_destroy_dev+0x5c/0x194 pci_hp_remove_devices+0x90/0x128 pci_hp_remove_devices+0x44/0x128 pnv_php_disable_slot+0x54/0xd4 power_write_file+0xf8/0x18c pci_slot_attr_store+0x40/0x5c sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x3bc/0x50c ksys_write+0x84/0x140 system_call_exception+0x124/0x230 system_call_vectored_common+0x15c/0x2ec Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/2013845045.1359852.1752615367790.JavaMail.zimbra@raptorengineeringinc.com
2025-07-15 16:36:07 -05:00
if (disable_device)
pci_disable_device(pdev);
}
static void pnv_php_free_slot(struct kref *kref)
{
struct pnv_php_slot *php_slot = container_of(kref,
struct pnv_php_slot, kref);
WARN_ON(!list_empty(&php_slot->children));
PCI: pnv_php: Clean up allocated IRQs on unplug When the root of a nested PCIe bridge configuration is unplugged, the pnv_php driver leaked the allocated IRQ resources for the child bridges' hotplug event notifications, resulting in a panic. Fix this by walking all child buses and deallocating all its IRQ resources before calling pci_hp_remove_devices(). Also modify the lifetime of the workqueue at struct pnv_php_slot::wq so that it is only destroyed in pnv_php_free_slot(), instead of pnv_php_disable_irq(). This is required since pnv_php_disable_irq() will now be called by workers triggered by hot unplug interrupts, so the workqueue needs to stay allocated. The abridged kernel panic that occurs without this patch is as follows: WARNING: CPU: 0 PID: 687 at kernel/irq/msi.c:292 msi_device_data_release+0x6c/0x9c CPU: 0 UID: 0 PID: 687 Comm: bash Not tainted 6.14.0-rc5+ #2 Call Trace: msi_device_data_release+0x34/0x9c (unreliable) release_nodes+0x64/0x13c devres_release_all+0xc0/0x140 device_del+0x2d4/0x46c pci_destroy_dev+0x5c/0x194 pci_hp_remove_devices+0x90/0x128 pci_hp_remove_devices+0x44/0x128 pnv_php_disable_slot+0x54/0xd4 power_write_file+0xf8/0x18c pci_slot_attr_store+0x40/0x5c sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x3bc/0x50c ksys_write+0x84/0x140 system_call_exception+0x124/0x230 system_call_vectored_common+0x15c/0x2ec Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/2013845045.1359852.1752615367790.JavaMail.zimbra@raptorengineeringinc.com
2025-07-15 16:36:07 -05:00
pnv_php_disable_irq(php_slot, false, false);
destroy_workqueue(php_slot->wq);
kfree(php_slot->name);
kfree(php_slot);
}
static inline void pnv_php_put_slot(struct pnv_php_slot *php_slot)
{
if (!php_slot)
return;
kref_put(&php_slot->kref, pnv_php_free_slot);
}
static struct pnv_php_slot *pnv_php_match(struct device_node *dn,
struct pnv_php_slot *php_slot)
{
struct pnv_php_slot *target, *tmp;
if (php_slot->dn == dn) {
kref_get(&php_slot->kref);
return php_slot;
}
list_for_each_entry(tmp, &php_slot->children, link) {
target = pnv_php_match(dn, tmp);
if (target)
return target;
}
return NULL;
}
struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn)
{
struct pnv_php_slot *php_slot, *tmp;
unsigned long flags;
spin_lock_irqsave(&pnv_php_lock, flags);
list_for_each_entry(tmp, &pnv_php_slot_list, link) {
php_slot = pnv_php_match(dn, tmp);
if (php_slot) {
spin_unlock_irqrestore(&pnv_php_lock, flags);
return php_slot;
}
}
spin_unlock_irqrestore(&pnv_php_lock, flags);
return NULL;
}
EXPORT_SYMBOL_GPL(pnv_php_find_slot);
/*
* Remove pdn for all children of the indicated device node.
* The function should remove pdn in a depth-first manner.
*/
static void pnv_php_rmv_pdns(struct device_node *dn)
{
struct device_node *child;
for_each_child_of_node(dn, child) {
pnv_php_rmv_pdns(child);
pci_remove_device_node_info(child);
}
}
/*
* Detach all child nodes of the indicated device nodes. The
* function should handle device nodes in depth-first manner.
*
* We should not invoke of_node_release() as the memory for
* individual device node is part of large memory block. The
* large block is allocated from memblock (system bootup) or
* kmalloc() when unflattening the device tree by OF changeset.
* We can not free the large block allocated from memblock. For
* later case, it should be released at once.
*/
static void pnv_php_detach_device_nodes(struct device_node *parent)
{
struct device_node *dn;
for_each_child_of_node(parent, dn) {
pnv_php_detach_device_nodes(dn);
of_node_put(dn);
of_detach_node(dn);
}
}
static void pnv_php_rmv_devtree(struct pnv_php_slot *php_slot)
{
pnv_php_rmv_pdns(php_slot->dn);
/*
* Decrease the refcount if the device nodes were created
* through OF changeset before detaching them.
*/
if (php_slot->fdt)
of_changeset_destroy(&php_slot->ocs);
pnv_php_detach_device_nodes(php_slot->dn);
if (php_slot->fdt) {
kfree(php_slot->dt);
kfree(php_slot->fdt);
php_slot->dt = NULL;
php_slot->dn->child = NULL;
php_slot->fdt = NULL;
}
}
/*
* As the nodes in OF changeset are applied in reverse order, we
* need revert the nodes in advance so that we have correct node
* order after the changeset is applied.
*/
static void pnv_php_reverse_nodes(struct device_node *parent)
{
struct device_node *child, *next;
/* In-depth first */
for_each_child_of_node(parent, child)
pnv_php_reverse_nodes(child);
/* Reverse the nodes in the child list */
child = parent->child;
parent->child = NULL;
while (child) {
next = child->sibling;
child->sibling = parent->child;
parent->child = child;
child = next;
}
}
static int pnv_php_populate_changeset(struct of_changeset *ocs,
struct device_node *dn)
{
struct device_node *child;
int ret = 0;
for_each_child_of_node(dn, child) {
ret = of_changeset_attach_node(ocs, child);
if (ret) {
of_node_put(child);
break;
}
ret = pnv_php_populate_changeset(ocs, child);
if (ret) {
of_node_put(child);
break;
}
}
return ret;
}
static void *pnv_php_add_one_pdn(struct device_node *dn, void *data)
{
struct pci_controller *hose = (struct pci_controller *)data;
struct pci_dn *pdn;
pdn = pci_add_device_node_info(hose, dn);
if (!pdn)
return ERR_PTR(-ENOMEM);
return NULL;
}
static void pnv_php_add_pdns(struct pnv_php_slot *slot)
{
struct pci_controller *hose = pci_bus_to_host(slot->bus);
pci_traverse_device_nodes(slot->dn, pnv_php_add_one_pdn, hose);
}
static int pnv_php_add_devtree(struct pnv_php_slot *php_slot)
{
void *fdt, *fdt1, *dt;
int ret;
/* We don't know the FDT blob size. We try to get it through
* maximal memory chunk and then copy it to another chunk that
* fits the real size.
*/
fdt1 = kzalloc(0x10000, GFP_KERNEL);
if (!fdt1) {
ret = -ENOMEM;
goto out;
}
ret = pnv_pci_get_device_tree(php_slot->dn->phandle, fdt1, 0x10000);
if (ret) {
SLOT_WARN(php_slot, "Error %d getting FDT blob\n", ret);
goto free_fdt1;
}
fdt = kmemdup(fdt1, fdt_totalsize(fdt1), GFP_KERNEL);
if (!fdt) {
ret = -ENOMEM;
goto free_fdt1;
}
/* Unflatten device tree blob */
dt = of_fdt_unflatten_tree(fdt, php_slot->dn, NULL);
if (!dt) {
ret = -EINVAL;
SLOT_WARN(php_slot, "Cannot unflatten FDT\n");
goto free_fdt;
}
/* Initialize and apply the changeset */
of_changeset_init(&php_slot->ocs);
pnv_php_reverse_nodes(php_slot->dn);
ret = pnv_php_populate_changeset(&php_slot->ocs, php_slot->dn);
if (ret) {
pnv_php_reverse_nodes(php_slot->dn);
SLOT_WARN(php_slot, "Error %d populating changeset\n",
ret);
goto free_dt;
}
php_slot->dn->child = NULL;
ret = of_changeset_apply(&php_slot->ocs);
if (ret) {
SLOT_WARN(php_slot, "Error %d applying changeset\n", ret);
goto destroy_changeset;
}
/* Add device node firmware data */
pnv_php_add_pdns(php_slot);
php_slot->fdt = fdt;
php_slot->dt = dt;
kfree(fdt1);
goto out;
destroy_changeset:
of_changeset_destroy(&php_slot->ocs);
free_dt:
kfree(dt);
php_slot->dn->child = NULL;
free_fdt:
kfree(fdt);
free_fdt1:
kfree(fdt1);
out:
return ret;
}
PCI: hotplug: Drop hotplug_slot_info Ever since the PCI hotplug core was introduced in 2002, drivers had to allocate and register a struct hotplug_slot_info for every slot: https://git.kernel.org/tglx/history/c/a8a2069f432c Apparently the idea was that drivers furnish the hotplug core with an up-to-date card presence status, power status, latch status and attention indicator status as well as notify the hotplug core of changes thereof. However only 4 out of 12 hotplug drivers bother to notify the hotplug core with pci_hp_change_slot_info() and the hotplug core never made any use of the information: There is just a single macro in pci_hotplug_core.c, GET_STATUS(), which uses the hotplug_slot_info if the driver lacks the corresponding callback in hotplug_slot_ops. The macro is called when the user reads the attribute via sysfs. Now, if the callback isn't defined, the attribute isn't exposed in sysfs in the first place (see e.g. has_power_file()). There are only two situations when the hotplug_slot_info would actually be accessed: * If the driver defines ->enable_slot or ->disable_slot but not ->get_power_status. * If the driver defines ->set_attention_status but not ->get_attention_status. There is no driver doing the former and just a single driver doing the latter, namely pnv_php.c. Amend it with a ->get_attention_status callback. With that, the hotplug_slot_info becomes completely unused by the PCI hotplug core. But a few drivers use it internally as a cache: cpcihp uses it to cache the latch_status and adapter_status. cpqhp uses it to cache the adapter_status. pnv_php and rpaphp use it to cache the attention_status. shpchp uses it to cache all four values. Amend these drivers to cache the information in their private slot struct. shpchp's slot struct already contains members to cache the power_status and adapter_status, so additional members are only needed for the other two values. In the case of cpqphp, the cached value is only accessed in a single place, so instead of caching it, read the current value from the hardware. Caution: acpiphp, cpci, cpqhp, shpchp, asus-wmi and eeepc-laptop populate the hotplug_slot_info with initial values on probe. That code is herewith removed. There is a theoretical chance that the code has side effects without which the driver fails to function, e.g. if the ACPI method to read the adapter status needs to be executed at least once on probe. That seems unlikely to me, still maintainers should review the changes carefully for this possibility. Rafael adds: "I'm not aware of any case in which it will break anything, [...] but if that happens, it may be necessary to add the execution of the control methods in question directly to the initialization part." Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> # drivers/pci/hotplug/rpa* Acked-by: Sebastian Ott <sebott@linux.ibm.com> # drivers/pci/hotplug/s390* Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86 Cc: Len Brown <lenb@kernel.org> Cc: Scott Murray <scott@spiteful.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Oliver OHalloran <oliveroh@au1.ibm.com> Cc: Gavin Shan <gwshan@linux.vnet.ibm.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Corentin Chary <corentin.chary@gmail.com> Cc: Darren Hart <dvhart@infradead.org>
2018-09-08 09:59:01 +02:00
static inline struct pnv_php_slot *to_pnv_php_slot(struct hotplug_slot *slot)
{
return container_of(slot, struct pnv_php_slot, slot);
}
int pnv_php_set_slot_power_state(struct hotplug_slot *slot,
uint8_t state)
{
PCI: hotplug: Embed hotplug_slot When the PCI hotplug core and its first user, cpqphp, were introduced in February 2002 with historic commit a8a2069f432c, cpqphp allocated a slot struct for its internal use plus a hotplug_slot struct to be registered with the hotplug core and linked the two with pointers: https://git.kernel.org/tglx/history/c/a8a2069f432c Nowadays, the predominant pattern in the tree is to embed ("subclass") such structures in one another and cast to the containing struct with container_of(). But it wasn't until July 2002 that container_of() was introduced with historic commit ec4f214232cf: https://git.kernel.org/tglx/history/c/ec4f214232cf pnv_php, introduced in 2016, did the right thing and embedded struct hotplug_slot in its internal struct pnv_php_slot, but all other drivers cargo-culted cpqphp's design and linked separate structs with pointers. Embedding structs is preferrable to linking them with pointers because it requires fewer allocations, thereby reducing overhead and simplifying error paths. Casting an embedded struct to the containing struct becomes a cheap subtraction rather than a dereference. And having fewer pointers reduces the risk of them pointing nowhere either accidentally or due to an attack. Convert all drivers to embed struct hotplug_slot in their internal slot struct. The "private" pointer in struct hotplug_slot thereby becomes unused, so drop it. Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> # drivers/pci/hotplug/rpa* Acked-by: Sebastian Ott <sebott@linux.ibm.com> # drivers/pci/hotplug/s390* Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86 Cc: Len Brown <lenb@kernel.org> Cc: Scott Murray <scott@spiteful.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Oliver OHalloran <oliveroh@au1.ibm.com> Cc: Gavin Shan <gwshan@linux.vnet.ibm.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Corentin Chary <corentin.chary@gmail.com> Cc: Darren Hart <dvhart@infradead.org>
2018-09-08 09:59:01 +02:00
struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
struct opal_msg msg;
int ret;
ret = pnv_pci_set_power_state(php_slot->id, state, &msg);
if (ret > 0) {
if (be64_to_cpu(msg.params[1]) != php_slot->dn->phandle ||
be64_to_cpu(msg.params[2]) != state) {
SLOT_WARN(php_slot, "Wrong msg (%lld, %lld, %lld)\n",
be64_to_cpu(msg.params[1]),
be64_to_cpu(msg.params[2]),
be64_to_cpu(msg.params[3]));
return -ENOMSG;
}
if (be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) {
ret = -ENODEV;
goto error;
}
} else if (ret < 0) {
goto error;
}
if (state == OPAL_PCI_SLOT_POWER_OFF || state == OPAL_PCI_SLOT_OFFLINE)
pnv_php_rmv_devtree(php_slot);
else
ret = pnv_php_add_devtree(php_slot);
return ret;
error:
SLOT_WARN(php_slot, "Error %d powering %s\n",
ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off");
return ret;
}
EXPORT_SYMBOL_GPL(pnv_php_set_slot_power_state);
static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state)
{
PCI: hotplug: Embed hotplug_slot When the PCI hotplug core and its first user, cpqphp, were introduced in February 2002 with historic commit a8a2069f432c, cpqphp allocated a slot struct for its internal use plus a hotplug_slot struct to be registered with the hotplug core and linked the two with pointers: https://git.kernel.org/tglx/history/c/a8a2069f432c Nowadays, the predominant pattern in the tree is to embed ("subclass") such structures in one another and cast to the containing struct with container_of(). But it wasn't until July 2002 that container_of() was introduced with historic commit ec4f214232cf: https://git.kernel.org/tglx/history/c/ec4f214232cf pnv_php, introduced in 2016, did the right thing and embedded struct hotplug_slot in its internal struct pnv_php_slot, but all other drivers cargo-culted cpqphp's design and linked separate structs with pointers. Embedding structs is preferrable to linking them with pointers because it requires fewer allocations, thereby reducing overhead and simplifying error paths. Casting an embedded struct to the containing struct becomes a cheap subtraction rather than a dereference. And having fewer pointers reduces the risk of them pointing nowhere either accidentally or due to an attack. Convert all drivers to embed struct hotplug_slot in their internal slot struct. The "private" pointer in struct hotplug_slot thereby becomes unused, so drop it. Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> # drivers/pci/hotplug/rpa* Acked-by: Sebastian Ott <sebott@linux.ibm.com> # drivers/pci/hotplug/s390* Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86 Cc: Len Brown <lenb@kernel.org> Cc: Scott Murray <scott@spiteful.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Oliver OHalloran <oliveroh@au1.ibm.com> Cc: Gavin Shan <gwshan@linux.vnet.ibm.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Corentin Chary <corentin.chary@gmail.com> Cc: Darren Hart <dvhart@infradead.org>
2018-09-08 09:59:01 +02:00
struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
uint8_t power_state = OPAL_PCI_SLOT_POWER_ON;
int ret;
/*
* Retrieve power status from firmware. If we fail
* getting that, the power status fails back to
* be on.
*/
ret = pnv_pci_get_power_state(php_slot->id, &power_state);
if (ret) {
SLOT_WARN(php_slot, "Error %d getting power status\n",
ret);
} else {
*state = power_state;
}
return 0;
}
static int pcie_check_link_active(struct pci_dev *pdev)
{
u16 lnk_status;
int ret;
ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
if (ret == PCIBIOS_DEVICE_NOT_FOUND || PCI_POSSIBLE_ERROR(lnk_status))
return -ENODEV;
ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
return ret;
}
static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state)
{
PCI: hotplug: Embed hotplug_slot When the PCI hotplug core and its first user, cpqphp, were introduced in February 2002 with historic commit a8a2069f432c, cpqphp allocated a slot struct for its internal use plus a hotplug_slot struct to be registered with the hotplug core and linked the two with pointers: https://git.kernel.org/tglx/history/c/a8a2069f432c Nowadays, the predominant pattern in the tree is to embed ("subclass") such structures in one another and cast to the containing struct with container_of(). But it wasn't until July 2002 that container_of() was introduced with historic commit ec4f214232cf: https://git.kernel.org/tglx/history/c/ec4f214232cf pnv_php, introduced in 2016, did the right thing and embedded struct hotplug_slot in its internal struct pnv_php_slot, but all other drivers cargo-culted cpqphp's design and linked separate structs with pointers. Embedding structs is preferrable to linking them with pointers because it requires fewer allocations, thereby reducing overhead and simplifying error paths. Casting an embedded struct to the containing struct becomes a cheap subtraction rather than a dereference. And having fewer pointers reduces the risk of them pointing nowhere either accidentally or due to an attack. Convert all drivers to embed struct hotplug_slot in their internal slot struct. The "private" pointer in struct hotplug_slot thereby becomes unused, so drop it. Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> # drivers/pci/hotplug/rpa* Acked-by: Sebastian Ott <sebott@linux.ibm.com> # drivers/pci/hotplug/s390* Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86 Cc: Len Brown <lenb@kernel.org> Cc: Scott Murray <scott@spiteful.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Oliver OHalloran <oliveroh@au1.ibm.com> Cc: Gavin Shan <gwshan@linux.vnet.ibm.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Corentin Chary <corentin.chary@gmail.com> Cc: Darren Hart <dvhart@infradead.org>
2018-09-08 09:59:01 +02:00
struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
uint8_t presence = OPAL_PCI_SLOT_EMPTY;
int ret;
/*
* Retrieve presence status from firmware. If we can't
* get that, it will fail back to be empty.
*/
ret = pnv_pci_get_presence_state(php_slot->id, &presence);
if (ret >= 0) {
if (pci_pcie_type(php_slot->pdev) == PCI_EXP_TYPE_DOWNSTREAM &&
presence == OPAL_PCI_SLOT_EMPTY) {
/*
* Similar to pciehp_hpc, check whether the Link Active
* bit is set to account for broken downstream bridges
* that don't properly assert Presence Detect State, as
* was observed on the Microsemi Switchtec PM8533 PFX
* [11f8:8533].
*/
if (pcie_check_link_active(php_slot->pdev) > 0)
presence = OPAL_PCI_SLOT_PRESENT;
}
*state = presence;
ret = 0;
} else {
SLOT_WARN(php_slot, "Error %d getting presence\n", ret);
}
return ret;
}
static int pnv_php_get_raw_indicator_status(struct hotplug_slot *slot, u8 *state)
{
struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
struct pci_dev *bridge = php_slot->pdev;
u16 status;
pcie_capability_read_word(bridge, PCI_EXP_SLTCTL, &status);
*state = (status & (PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC)) >> 6;
return 0;
}
PCI: hotplug: Drop hotplug_slot_info Ever since the PCI hotplug core was introduced in 2002, drivers had to allocate and register a struct hotplug_slot_info for every slot: https://git.kernel.org/tglx/history/c/a8a2069f432c Apparently the idea was that drivers furnish the hotplug core with an up-to-date card presence status, power status, latch status and attention indicator status as well as notify the hotplug core of changes thereof. However only 4 out of 12 hotplug drivers bother to notify the hotplug core with pci_hp_change_slot_info() and the hotplug core never made any use of the information: There is just a single macro in pci_hotplug_core.c, GET_STATUS(), which uses the hotplug_slot_info if the driver lacks the corresponding callback in hotplug_slot_ops. The macro is called when the user reads the attribute via sysfs. Now, if the callback isn't defined, the attribute isn't exposed in sysfs in the first place (see e.g. has_power_file()). There are only two situations when the hotplug_slot_info would actually be accessed: * If the driver defines ->enable_slot or ->disable_slot but not ->get_power_status. * If the driver defines ->set_attention_status but not ->get_attention_status. There is no driver doing the former and just a single driver doing the latter, namely pnv_php.c. Amend it with a ->get_attention_status callback. With that, the hotplug_slot_info becomes completely unused by the PCI hotplug core. But a few drivers use it internally as a cache: cpcihp uses it to cache the latch_status and adapter_status. cpqhp uses it to cache the adapter_status. pnv_php and rpaphp use it to cache the attention_status. shpchp uses it to cache all four values. Amend these drivers to cache the information in their private slot struct. shpchp's slot struct already contains members to cache the power_status and adapter_status, so additional members are only needed for the other two values. In the case of cpqphp, the cached value is only accessed in a single place, so instead of caching it, read the current value from the hardware. Caution: acpiphp, cpci, cpqhp, shpchp, asus-wmi and eeepc-laptop populate the hotplug_slot_info with initial values on probe. That code is herewith removed. There is a theoretical chance that the code has side effects without which the driver fails to function, e.g. if the ACPI method to read the adapter status needs to be executed at least once on probe. That seems unlikely to me, still maintainers should review the changes carefully for this possibility. Rafael adds: "I'm not aware of any case in which it will break anything, [...] but if that happens, it may be necessary to add the execution of the control methods in question directly to the initialization part." Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> # drivers/pci/hotplug/rpa* Acked-by: Sebastian Ott <sebott@linux.ibm.com> # drivers/pci/hotplug/s390* Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86 Cc: Len Brown <lenb@kernel.org> Cc: Scott Murray <scott@spiteful.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Oliver OHalloran <oliveroh@au1.ibm.com> Cc: Gavin Shan <gwshan@linux.vnet.ibm.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Corentin Chary <corentin.chary@gmail.com> Cc: Darren Hart <dvhart@infradead.org>
2018-09-08 09:59:01 +02:00
static int pnv_php_get_attention_state(struct hotplug_slot *slot, u8 *state)
{
struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
pnv_php_get_raw_indicator_status(slot, &php_slot->attention_state);
PCI: hotplug: Drop hotplug_slot_info Ever since the PCI hotplug core was introduced in 2002, drivers had to allocate and register a struct hotplug_slot_info for every slot: https://git.kernel.org/tglx/history/c/a8a2069f432c Apparently the idea was that drivers furnish the hotplug core with an up-to-date card presence status, power status, latch status and attention indicator status as well as notify the hotplug core of changes thereof. However only 4 out of 12 hotplug drivers bother to notify the hotplug core with pci_hp_change_slot_info() and the hotplug core never made any use of the information: There is just a single macro in pci_hotplug_core.c, GET_STATUS(), which uses the hotplug_slot_info if the driver lacks the corresponding callback in hotplug_slot_ops. The macro is called when the user reads the attribute via sysfs. Now, if the callback isn't defined, the attribute isn't exposed in sysfs in the first place (see e.g. has_power_file()). There are only two situations when the hotplug_slot_info would actually be accessed: * If the driver defines ->enable_slot or ->disable_slot but not ->get_power_status. * If the driver defines ->set_attention_status but not ->get_attention_status. There is no driver doing the former and just a single driver doing the latter, namely pnv_php.c. Amend it with a ->get_attention_status callback. With that, the hotplug_slot_info becomes completely unused by the PCI hotplug core. But a few drivers use it internally as a cache: cpcihp uses it to cache the latch_status and adapter_status. cpqhp uses it to cache the adapter_status. pnv_php and rpaphp use it to cache the attention_status. shpchp uses it to cache all four values. Amend these drivers to cache the information in their private slot struct. shpchp's slot struct already contains members to cache the power_status and adapter_status, so additional members are only needed for the other two values. In the case of cpqphp, the cached value is only accessed in a single place, so instead of caching it, read the current value from the hardware. Caution: acpiphp, cpci, cpqhp, shpchp, asus-wmi and eeepc-laptop populate the hotplug_slot_info with initial values on probe. That code is herewith removed. There is a theoretical chance that the code has side effects without which the driver fails to function, e.g. if the ACPI method to read the adapter status needs to be executed at least once on probe. That seems unlikely to me, still maintainers should review the changes carefully for this possibility. Rafael adds: "I'm not aware of any case in which it will break anything, [...] but if that happens, it may be necessary to add the execution of the control methods in question directly to the initialization part." Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> # drivers/pci/hotplug/rpa* Acked-by: Sebastian Ott <sebott@linux.ibm.com> # drivers/pci/hotplug/s390* Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86 Cc: Len Brown <lenb@kernel.org> Cc: Scott Murray <scott@spiteful.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Oliver OHalloran <oliveroh@au1.ibm.com> Cc: Gavin Shan <gwshan@linux.vnet.ibm.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Corentin Chary <corentin.chary@gmail.com> Cc: Darren Hart <dvhart@infradead.org>
2018-09-08 09:59:01 +02:00
*state = php_slot->attention_state;
return 0;
}
static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state)
{
PCI: hotplug: Drop hotplug_slot_info Ever since the PCI hotplug core was introduced in 2002, drivers had to allocate and register a struct hotplug_slot_info for every slot: https://git.kernel.org/tglx/history/c/a8a2069f432c Apparently the idea was that drivers furnish the hotplug core with an up-to-date card presence status, power status, latch status and attention indicator status as well as notify the hotplug core of changes thereof. However only 4 out of 12 hotplug drivers bother to notify the hotplug core with pci_hp_change_slot_info() and the hotplug core never made any use of the information: There is just a single macro in pci_hotplug_core.c, GET_STATUS(), which uses the hotplug_slot_info if the driver lacks the corresponding callback in hotplug_slot_ops. The macro is called when the user reads the attribute via sysfs. Now, if the callback isn't defined, the attribute isn't exposed in sysfs in the first place (see e.g. has_power_file()). There are only two situations when the hotplug_slot_info would actually be accessed: * If the driver defines ->enable_slot or ->disable_slot but not ->get_power_status. * If the driver defines ->set_attention_status but not ->get_attention_status. There is no driver doing the former and just a single driver doing the latter, namely pnv_php.c. Amend it with a ->get_attention_status callback. With that, the hotplug_slot_info becomes completely unused by the PCI hotplug core. But a few drivers use it internally as a cache: cpcihp uses it to cache the latch_status and adapter_status. cpqhp uses it to cache the adapter_status. pnv_php and rpaphp use it to cache the attention_status. shpchp uses it to cache all four values. Amend these drivers to cache the information in their private slot struct. shpchp's slot struct already contains members to cache the power_status and adapter_status, so additional members are only needed for the other two values. In the case of cpqphp, the cached value is only accessed in a single place, so instead of caching it, read the current value from the hardware. Caution: acpiphp, cpci, cpqhp, shpchp, asus-wmi and eeepc-laptop populate the hotplug_slot_info with initial values on probe. That code is herewith removed. There is a theoretical chance that the code has side effects without which the driver fails to function, e.g. if the ACPI method to read the adapter status needs to be executed at least once on probe. That seems unlikely to me, still maintainers should review the changes carefully for this possibility. Rafael adds: "I'm not aware of any case in which it will break anything, [...] but if that happens, it may be necessary to add the execution of the control methods in question directly to the initialization part." Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> # drivers/pci/hotplug/rpa* Acked-by: Sebastian Ott <sebott@linux.ibm.com> # drivers/pci/hotplug/s390* Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86 Cc: Len Brown <lenb@kernel.org> Cc: Scott Murray <scott@spiteful.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Oliver OHalloran <oliveroh@au1.ibm.com> Cc: Gavin Shan <gwshan@linux.vnet.ibm.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Corentin Chary <corentin.chary@gmail.com> Cc: Darren Hart <dvhart@infradead.org>
2018-09-08 09:59:01 +02:00
struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
struct pci_dev *bridge = php_slot->pdev;
u16 new, mask;
PCI: hotplug: Drop hotplug_slot_info Ever since the PCI hotplug core was introduced in 2002, drivers had to allocate and register a struct hotplug_slot_info for every slot: https://git.kernel.org/tglx/history/c/a8a2069f432c Apparently the idea was that drivers furnish the hotplug core with an up-to-date card presence status, power status, latch status and attention indicator status as well as notify the hotplug core of changes thereof. However only 4 out of 12 hotplug drivers bother to notify the hotplug core with pci_hp_change_slot_info() and the hotplug core never made any use of the information: There is just a single macro in pci_hotplug_core.c, GET_STATUS(), which uses the hotplug_slot_info if the driver lacks the corresponding callback in hotplug_slot_ops. The macro is called when the user reads the attribute via sysfs. Now, if the callback isn't defined, the attribute isn't exposed in sysfs in the first place (see e.g. has_power_file()). There are only two situations when the hotplug_slot_info would actually be accessed: * If the driver defines ->enable_slot or ->disable_slot but not ->get_power_status. * If the driver defines ->set_attention_status but not ->get_attention_status. There is no driver doing the former and just a single driver doing the latter, namely pnv_php.c. Amend it with a ->get_attention_status callback. With that, the hotplug_slot_info becomes completely unused by the PCI hotplug core. But a few drivers use it internally as a cache: cpcihp uses it to cache the latch_status and adapter_status. cpqhp uses it to cache the adapter_status. pnv_php and rpaphp use it to cache the attention_status. shpchp uses it to cache all four values. Amend these drivers to cache the information in their private slot struct. shpchp's slot struct already contains members to cache the power_status and adapter_status, so additional members are only needed for the other two values. In the case of cpqphp, the cached value is only accessed in a single place, so instead of caching it, read the current value from the hardware. Caution: acpiphp, cpci, cpqhp, shpchp, asus-wmi and eeepc-laptop populate the hotplug_slot_info with initial values on probe. That code is herewith removed. There is a theoretical chance that the code has side effects without which the driver fails to function, e.g. if the ACPI method to read the adapter status needs to be executed at least once on probe. That seems unlikely to me, still maintainers should review the changes carefully for this possibility. Rafael adds: "I'm not aware of any case in which it will break anything, [...] but if that happens, it may be necessary to add the execution of the control methods in question directly to the initialization part." Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> # drivers/pci/hotplug/rpa* Acked-by: Sebastian Ott <sebott@linux.ibm.com> # drivers/pci/hotplug/s390* Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86 Cc: Len Brown <lenb@kernel.org> Cc: Scott Murray <scott@spiteful.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Oliver OHalloran <oliveroh@au1.ibm.com> Cc: Gavin Shan <gwshan@linux.vnet.ibm.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Corentin Chary <corentin.chary@gmail.com> Cc: Darren Hart <dvhart@infradead.org>
2018-09-08 09:59:01 +02:00
php_slot->attention_state = state;
if (!bridge)
return 0;
mask = PCI_EXP_SLTCTL_AIC;
if (state)
new = FIELD_PREP(PCI_EXP_SLTCTL_AIC, state);
else
new = PCI_EXP_SLTCTL_ATTN_IND_OFF;
pcie_capability_clear_and_set_word(bridge, PCI_EXP_SLTCTL, mask, new);
return 0;
}
static int pnv_php_activate_slot(struct pnv_php_slot *php_slot,
struct hotplug_slot *slot)
{
int ret, i;
/*
* Issue initial slot activation command to firmware
*
* Firmware will power slot on, attempt to train the link, and
* discover any downstream devices. If this process fails, firmware
* will return an error code and an invalid device tree. Failure
* can be caused for multiple reasons, including a faulty
* downstream device, poor connection to the downstream device, or
* a previously latched PHB fence. On failure, issue fundamental
* reset up to three times before aborting.
*/
ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON);
if (ret) {
SLOT_WARN(
php_slot,
"PCI slot activation failed with error code %d, possible frozen PHB",
ret);
SLOT_WARN(
php_slot,
"Attempting complete PHB reset before retrying slot activation\n");
for (i = 0; i < 3; i++) {
/*
* Slot activation failed, PHB may be fenced from a
* prior device failure.
*
* Use the OPAL fundamental reset call to both try a
* device reset and clear any potentially active PHB
* fence / freeze.
*/
SLOT_WARN(php_slot, "Try %d...\n", i + 1);
pci_set_pcie_reset_state(php_slot->pdev,
pcie_warm_reset);
msleep(250);
pci_set_pcie_reset_state(php_slot->pdev,
pcie_deassert_reset);
ret = pnv_php_set_slot_power_state(
slot, OPAL_PCI_SLOT_POWER_ON);
if (!ret)
break;
}
if (i >= 3)
SLOT_WARN(php_slot,
"Failed to bring slot online, aborting!\n");
}
return ret;
}
static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan)
{
struct hotplug_slot *slot = &php_slot->slot;
uint8_t presence = OPAL_PCI_SLOT_EMPTY;
uint8_t power_status = OPAL_PCI_SLOT_POWER_ON;
int ret;
/* Check if the slot has been configured */
if (php_slot->state != PNV_PHP_STATE_REGISTERED)
return 0;
/* Retrieve slot presence status */
ret = pnv_php_get_adapter_state(slot, &presence);
if (ret)
return ret;
/*
* Proceed if there have nothing behind the slot. However,
* we should leave the slot in registered state at the
* beginning. Otherwise, the PCI devices inserted afterwards
* won't be probed and populated.
*/
if (presence == OPAL_PCI_SLOT_EMPTY) {
if (!php_slot->power_state_check) {
php_slot->power_state_check = true;
return 0;
}
goto scan;
}
/*
* If the power supply to the slot is off, we can't detect
* adapter presence state. That means we have to turn the
* slot on before going to probe slot's presence state.
*
* On the first time, we don't change the power status to
* boost system boot with assumption that the firmware
* supplies consistent slot power status: empty slot always
* has its power off and non-empty slot has its power on.
*/
if (!php_slot->power_state_check) {
php_slot->power_state_check = true;
ret = pnv_php_get_power_state(slot, &power_status);
if (ret)
return ret;
if (power_status != OPAL_PCI_SLOT_POWER_ON)
return 0;
}
/* Check the power status. Scan the slot if it is already on */
ret = pnv_php_get_power_state(slot, &power_status);
if (ret)
return ret;
if (power_status == OPAL_PCI_SLOT_POWER_ON)
goto scan;
/* Power is off, turn it on and then scan the slot */
ret = pnv_php_activate_slot(php_slot, slot);
if (ret)
return ret;
scan:
if (presence == OPAL_PCI_SLOT_PRESENT) {
if (rescan) {
pci_lock_rescan_remove();
pci_hp_add_devices(php_slot->bus);
pci_unlock_rescan_remove();
}
/* Rescan for child hotpluggable slots */
php_slot->state = PNV_PHP_STATE_POPULATED;
if (rescan)
pnv_php_register(php_slot->dn);
} else {
php_slot->state = PNV_PHP_STATE_POPULATED;
}
return 0;
}
static int pnv_php_reset_slot(struct hotplug_slot *slot, bool probe)
{
struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
struct pci_dev *bridge = php_slot->pdev;
uint16_t sts;
/*
* The CAPI folks want pnv_php to drive OpenCAPI slots
* which don't have a bridge. Only claim to support
* reset_slot() if we have a bridge device (for now...)
*/
if (probe)
return !bridge;
/* mask our interrupt while resetting the bridge */
if (php_slot->irq > 0)
disable_irq(php_slot->irq);
pci_bridge_secondary_bus_reset(bridge);
/* clear any state changes that happened due to the reset */
pcie_capability_read_word(php_slot->pdev, PCI_EXP_SLTSTA, &sts);
sts &= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
pcie_capability_write_word(php_slot->pdev, PCI_EXP_SLTSTA, sts);
if (php_slot->irq > 0)
enable_irq(php_slot->irq);
return 0;
}
static int pnv_php_enable_slot(struct hotplug_slot *slot)
{
PCI: hotplug: Drop hotplug_slot_info Ever since the PCI hotplug core was introduced in 2002, drivers had to allocate and register a struct hotplug_slot_info for every slot: https://git.kernel.org/tglx/history/c/a8a2069f432c Apparently the idea was that drivers furnish the hotplug core with an up-to-date card presence status, power status, latch status and attention indicator status as well as notify the hotplug core of changes thereof. However only 4 out of 12 hotplug drivers bother to notify the hotplug core with pci_hp_change_slot_info() and the hotplug core never made any use of the information: There is just a single macro in pci_hotplug_core.c, GET_STATUS(), which uses the hotplug_slot_info if the driver lacks the corresponding callback in hotplug_slot_ops. The macro is called when the user reads the attribute via sysfs. Now, if the callback isn't defined, the attribute isn't exposed in sysfs in the first place (see e.g. has_power_file()). There are only two situations when the hotplug_slot_info would actually be accessed: * If the driver defines ->enable_slot or ->disable_slot but not ->get_power_status. * If the driver defines ->set_attention_status but not ->get_attention_status. There is no driver doing the former and just a single driver doing the latter, namely pnv_php.c. Amend it with a ->get_attention_status callback. With that, the hotplug_slot_info becomes completely unused by the PCI hotplug core. But a few drivers use it internally as a cache: cpcihp uses it to cache the latch_status and adapter_status. cpqhp uses it to cache the adapter_status. pnv_php and rpaphp use it to cache the attention_status. shpchp uses it to cache all four values. Amend these drivers to cache the information in their private slot struct. shpchp's slot struct already contains members to cache the power_status and adapter_status, so additional members are only needed for the other two values. In the case of cpqphp, the cached value is only accessed in a single place, so instead of caching it, read the current value from the hardware. Caution: acpiphp, cpci, cpqhp, shpchp, asus-wmi and eeepc-laptop populate the hotplug_slot_info with initial values on probe. That code is herewith removed. There is a theoretical chance that the code has side effects without which the driver fails to function, e.g. if the ACPI method to read the adapter status needs to be executed at least once on probe. That seems unlikely to me, still maintainers should review the changes carefully for this possibility. Rafael adds: "I'm not aware of any case in which it will break anything, [...] but if that happens, it may be necessary to add the execution of the control methods in question directly to the initialization part." Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> # drivers/pci/hotplug/rpa* Acked-by: Sebastian Ott <sebott@linux.ibm.com> # drivers/pci/hotplug/s390* Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86 Cc: Len Brown <lenb@kernel.org> Cc: Scott Murray <scott@spiteful.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Oliver OHalloran <oliveroh@au1.ibm.com> Cc: Gavin Shan <gwshan@linux.vnet.ibm.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Corentin Chary <corentin.chary@gmail.com> Cc: Darren Hart <dvhart@infradead.org>
2018-09-08 09:59:01 +02:00
struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
PCI: pnv_php: Clean up allocated IRQs on unplug When the root of a nested PCIe bridge configuration is unplugged, the pnv_php driver leaked the allocated IRQ resources for the child bridges' hotplug event notifications, resulting in a panic. Fix this by walking all child buses and deallocating all its IRQ resources before calling pci_hp_remove_devices(). Also modify the lifetime of the workqueue at struct pnv_php_slot::wq so that it is only destroyed in pnv_php_free_slot(), instead of pnv_php_disable_irq(). This is required since pnv_php_disable_irq() will now be called by workers triggered by hot unplug interrupts, so the workqueue needs to stay allocated. The abridged kernel panic that occurs without this patch is as follows: WARNING: CPU: 0 PID: 687 at kernel/irq/msi.c:292 msi_device_data_release+0x6c/0x9c CPU: 0 UID: 0 PID: 687 Comm: bash Not tainted 6.14.0-rc5+ #2 Call Trace: msi_device_data_release+0x34/0x9c (unreliable) release_nodes+0x64/0x13c devres_release_all+0xc0/0x140 device_del+0x2d4/0x46c pci_destroy_dev+0x5c/0x194 pci_hp_remove_devices+0x90/0x128 pci_hp_remove_devices+0x44/0x128 pnv_php_disable_slot+0x54/0xd4 power_write_file+0xf8/0x18c pci_slot_attr_store+0x40/0x5c sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x3bc/0x50c ksys_write+0x84/0x140 system_call_exception+0x124/0x230 system_call_vectored_common+0x15c/0x2ec Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/2013845045.1359852.1752615367790.JavaMail.zimbra@raptorengineeringinc.com
2025-07-15 16:36:07 -05:00
u32 prop32;
int ret;
ret = pnv_php_enable(php_slot, true);
if (ret)
return ret;
/* (Re-)enable interrupt if the slot supports surprise hotplug */
ret = of_property_read_u32(php_slot->dn, "ibm,slot-surprise-pluggable",
&prop32);
if (!ret && prop32)
pnv_php_enable_irq(php_slot);
return 0;
}
/*
* Disable any hotplug interrupts for all slots on the provided bus, as well as
* all downstream slots in preparation for a hot unplug.
*/
static int pnv_php_disable_all_irqs(struct pci_bus *bus)
{
struct pci_bus *child_bus;
struct pci_slot *slot;
/* First go down child buses */
list_for_each_entry(child_bus, &bus->children, node)
pnv_php_disable_all_irqs(child_bus);
/* Disable IRQs for all pnv_php slots on this bus */
list_for_each_entry(slot, &bus->slots, list) {
struct pnv_php_slot *php_slot = to_pnv_php_slot(slot->hotplug);
pnv_php_disable_irq(php_slot, false, true);
}
return 0;
}
/*
* Disable any hotplug interrupts for all downstream slots on the provided
* bus in preparation for a hot unplug.
*/
static int pnv_php_disable_all_downstream_irqs(struct pci_bus *bus)
{
struct pci_bus *child_bus;
/* Go down child buses, recursively deactivating their IRQs */
list_for_each_entry(child_bus, &bus->children, node)
pnv_php_disable_all_irqs(child_bus);
PCI: pnv_php: Clean up allocated IRQs on unplug When the root of a nested PCIe bridge configuration is unplugged, the pnv_php driver leaked the allocated IRQ resources for the child bridges' hotplug event notifications, resulting in a panic. Fix this by walking all child buses and deallocating all its IRQ resources before calling pci_hp_remove_devices(). Also modify the lifetime of the workqueue at struct pnv_php_slot::wq so that it is only destroyed in pnv_php_free_slot(), instead of pnv_php_disable_irq(). This is required since pnv_php_disable_irq() will now be called by workers triggered by hot unplug interrupts, so the workqueue needs to stay allocated. The abridged kernel panic that occurs without this patch is as follows: WARNING: CPU: 0 PID: 687 at kernel/irq/msi.c:292 msi_device_data_release+0x6c/0x9c CPU: 0 UID: 0 PID: 687 Comm: bash Not tainted 6.14.0-rc5+ #2 Call Trace: msi_device_data_release+0x34/0x9c (unreliable) release_nodes+0x64/0x13c devres_release_all+0xc0/0x140 device_del+0x2d4/0x46c pci_destroy_dev+0x5c/0x194 pci_hp_remove_devices+0x90/0x128 pci_hp_remove_devices+0x44/0x128 pnv_php_disable_slot+0x54/0xd4 power_write_file+0xf8/0x18c pci_slot_attr_store+0x40/0x5c sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x3bc/0x50c ksys_write+0x84/0x140 system_call_exception+0x124/0x230 system_call_vectored_common+0x15c/0x2ec Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/2013845045.1359852.1752615367790.JavaMail.zimbra@raptorengineeringinc.com
2025-07-15 16:36:07 -05:00
return 0;
}
static int pnv_php_disable_slot(struct hotplug_slot *slot)
{
PCI: hotplug: Embed hotplug_slot When the PCI hotplug core and its first user, cpqphp, were introduced in February 2002 with historic commit a8a2069f432c, cpqphp allocated a slot struct for its internal use plus a hotplug_slot struct to be registered with the hotplug core and linked the two with pointers: https://git.kernel.org/tglx/history/c/a8a2069f432c Nowadays, the predominant pattern in the tree is to embed ("subclass") such structures in one another and cast to the containing struct with container_of(). But it wasn't until July 2002 that container_of() was introduced with historic commit ec4f214232cf: https://git.kernel.org/tglx/history/c/ec4f214232cf pnv_php, introduced in 2016, did the right thing and embedded struct hotplug_slot in its internal struct pnv_php_slot, but all other drivers cargo-culted cpqphp's design and linked separate structs with pointers. Embedding structs is preferrable to linking them with pointers because it requires fewer allocations, thereby reducing overhead and simplifying error paths. Casting an embedded struct to the containing struct becomes a cheap subtraction rather than a dereference. And having fewer pointers reduces the risk of them pointing nowhere either accidentally or due to an attack. Convert all drivers to embed struct hotplug_slot in their internal slot struct. The "private" pointer in struct hotplug_slot thereby becomes unused, so drop it. Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> # drivers/pci/hotplug/rpa* Acked-by: Sebastian Ott <sebott@linux.ibm.com> # drivers/pci/hotplug/s390* Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86 Cc: Len Brown <lenb@kernel.org> Cc: Scott Murray <scott@spiteful.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Oliver OHalloran <oliveroh@au1.ibm.com> Cc: Gavin Shan <gwshan@linux.vnet.ibm.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Corentin Chary <corentin.chary@gmail.com> Cc: Darren Hart <dvhart@infradead.org>
2018-09-08 09:59:01 +02:00
struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
int ret;
/*
* Allow to disable a slot already in the registered state to
* cover cases where the slot couldn't be enabled and never
* reached the populated state
*/
if (php_slot->state != PNV_PHP_STATE_POPULATED &&
php_slot->state != PNV_PHP_STATE_REGISTERED)
return 0;
PCI: pnv_php: Clean up allocated IRQs on unplug When the root of a nested PCIe bridge configuration is unplugged, the pnv_php driver leaked the allocated IRQ resources for the child bridges' hotplug event notifications, resulting in a panic. Fix this by walking all child buses and deallocating all its IRQ resources before calling pci_hp_remove_devices(). Also modify the lifetime of the workqueue at struct pnv_php_slot::wq so that it is only destroyed in pnv_php_free_slot(), instead of pnv_php_disable_irq(). This is required since pnv_php_disable_irq() will now be called by workers triggered by hot unplug interrupts, so the workqueue needs to stay allocated. The abridged kernel panic that occurs without this patch is as follows: WARNING: CPU: 0 PID: 687 at kernel/irq/msi.c:292 msi_device_data_release+0x6c/0x9c CPU: 0 UID: 0 PID: 687 Comm: bash Not tainted 6.14.0-rc5+ #2 Call Trace: msi_device_data_release+0x34/0x9c (unreliable) release_nodes+0x64/0x13c devres_release_all+0xc0/0x140 device_del+0x2d4/0x46c pci_destroy_dev+0x5c/0x194 pci_hp_remove_devices+0x90/0x128 pci_hp_remove_devices+0x44/0x128 pnv_php_disable_slot+0x54/0xd4 power_write_file+0xf8/0x18c pci_slot_attr_store+0x40/0x5c sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x3bc/0x50c ksys_write+0x84/0x140 system_call_exception+0x124/0x230 system_call_vectored_common+0x15c/0x2ec Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/2013845045.1359852.1752615367790.JavaMail.zimbra@raptorengineeringinc.com
2025-07-15 16:36:07 -05:00
/*
* Free all IRQ resources from all child slots before remove.
* Note that we do not disable the root slot IRQ here as that
* would also deactivate the slot hot (re)plug interrupt!
*/
pnv_php_disable_all_downstream_irqs(php_slot->bus);
/* Remove all devices behind the slot */
pci_lock_rescan_remove();
pci_hp_remove_devices(php_slot->bus);
pci_unlock_rescan_remove();
/* Detach the child hotpluggable slots */
pnv_php_unregister(php_slot->dn);
/* Notify firmware and remove device nodes */
ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_OFF);
php_slot->state = PNV_PHP_STATE_REGISTERED;
return ret;
}
static const struct hotplug_slot_ops php_slot_ops = {
.get_power_status = pnv_php_get_power_state,
.get_adapter_status = pnv_php_get_adapter_state,
PCI: hotplug: Drop hotplug_slot_info Ever since the PCI hotplug core was introduced in 2002, drivers had to allocate and register a struct hotplug_slot_info for every slot: https://git.kernel.org/tglx/history/c/a8a2069f432c Apparently the idea was that drivers furnish the hotplug core with an up-to-date card presence status, power status, latch status and attention indicator status as well as notify the hotplug core of changes thereof. However only 4 out of 12 hotplug drivers bother to notify the hotplug core with pci_hp_change_slot_info() and the hotplug core never made any use of the information: There is just a single macro in pci_hotplug_core.c, GET_STATUS(), which uses the hotplug_slot_info if the driver lacks the corresponding callback in hotplug_slot_ops. The macro is called when the user reads the attribute via sysfs. Now, if the callback isn't defined, the attribute isn't exposed in sysfs in the first place (see e.g. has_power_file()). There are only two situations when the hotplug_slot_info would actually be accessed: * If the driver defines ->enable_slot or ->disable_slot but not ->get_power_status. * If the driver defines ->set_attention_status but not ->get_attention_status. There is no driver doing the former and just a single driver doing the latter, namely pnv_php.c. Amend it with a ->get_attention_status callback. With that, the hotplug_slot_info becomes completely unused by the PCI hotplug core. But a few drivers use it internally as a cache: cpcihp uses it to cache the latch_status and adapter_status. cpqhp uses it to cache the adapter_status. pnv_php and rpaphp use it to cache the attention_status. shpchp uses it to cache all four values. Amend these drivers to cache the information in their private slot struct. shpchp's slot struct already contains members to cache the power_status and adapter_status, so additional members are only needed for the other two values. In the case of cpqphp, the cached value is only accessed in a single place, so instead of caching it, read the current value from the hardware. Caution: acpiphp, cpci, cpqhp, shpchp, asus-wmi and eeepc-laptop populate the hotplug_slot_info with initial values on probe. That code is herewith removed. There is a theoretical chance that the code has side effects without which the driver fails to function, e.g. if the ACPI method to read the adapter status needs to be executed at least once on probe. That seems unlikely to me, still maintainers should review the changes carefully for this possibility. Rafael adds: "I'm not aware of any case in which it will break anything, [...] but if that happens, it may be necessary to add the execution of the control methods in question directly to the initialization part." Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> # drivers/pci/hotplug/rpa* Acked-by: Sebastian Ott <sebott@linux.ibm.com> # drivers/pci/hotplug/s390* Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86 Cc: Len Brown <lenb@kernel.org> Cc: Scott Murray <scott@spiteful.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Oliver OHalloran <oliveroh@au1.ibm.com> Cc: Gavin Shan <gwshan@linux.vnet.ibm.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Corentin Chary <corentin.chary@gmail.com> Cc: Darren Hart <dvhart@infradead.org>
2018-09-08 09:59:01 +02:00
.get_attention_status = pnv_php_get_attention_state,
.set_attention_status = pnv_php_set_attention_state,
.enable_slot = pnv_php_enable_slot,
.disable_slot = pnv_php_disable_slot,
.reset_slot = pnv_php_reset_slot,
};
PCI: hotplug: Demidlayer registration with the core When a hotplug driver calls pci_hp_register(), all steps necessary for registration are carried out in one go, including creation of a kobject and addition to sysfs. That's a problem for pciehp once it's converted to enable/disable the slot exclusively from the IRQ thread: The thread needs to be spawned after creation of the kobject (because it uses the kobject's name), but before addition to sysfs (because it will handle enable/disable requests submitted via sysfs). pci_hp_deregister() does offer a ->release callback that's invoked after deletion from sysfs and before destruction of the kobject. But because pci_hp_register() doesn't offer a counterpart, hotplug drivers' ->probe and ->remove code becomes asymmetric, which is error prone as recently discovered use-after-free bugs in pciehp's ->remove hook have shown. In a sense, this appears to be a case of the midlayer antipattern: "The core thesis of the "midlayer mistake" is that midlayers are bad and should not exist. That common functionality which it is so tempting to put in a midlayer should instead be provided as library routines which can [be] used, augmented, or ignored by each bottom level driver independently. Thus every subsystem that supports multiple implementations (or drivers) should provide a very thin top layer which calls directly into the bottom layer drivers, and a rich library of support code that eases the implementation of those drivers. This library is available to, but not forced upon, those drivers." -- Neil Brown (2009), https://lwn.net/Articles/336262/ The presence of midlayer traits in the PCI hotplug core might be ascribed to its age: When it was introduced in February 2002, the blessings of a library approach might not have been well known: https://git.kernel.org/tglx/history/c/a8a2069f432c For comparison, the driver core does offer split functions for creating a kobject (device_initialize()) and addition to sysfs (device_add()) as an alternative to carrying out everything at once (device_register()). This was introduced in October 2002: https://git.kernel.org/tglx/history/c/8b290eb19962 The odd ->release callback in the PCI hotplug core was added in 2003: https://git.kernel.org/tglx/history/c/69f8d663b595 Clearly, a library approach would not force every hotplug driver to implement a ->release callback, but rather allow the driver to remove the sysfs files, release its data structures and finally destroy the kobject. Alternatively, a driver may choose to remove everything with pci_hp_deregister(), then release its data structures. To this end, offer drivers pci_hp_initialize() and pci_hp_add() as a split-up version of pci_hp_register(). Likewise, offer pci_hp_del() and pci_hp_destroy() as a split-up version of pci_hp_deregister(). Eliminate the ->release callback and move its code into each driver's teardown routine. Declare pci_hp_deregister() void, in keeping with the usual kernel pattern that enablement can fail, but disablement cannot. It only returned an error if the caller passed in a NULL pointer or a slot which has never or is no longer registered or is sharing its name with another slot. Those would be bugs, so WARN about them. Few hotplug drivers actually checked the return value and those that did only printed a useless error message to dmesg. Remove that. For most drivers the conversion was straightforward since it doesn't matter whether the code in the ->release callback is executed before or after destruction of the kobject. But in the case of ibmphp, it was unclear to me whether setting slot_cur->ctrl and slot_cur->bus_on to NULL needs to happen before the kobject is destroyed, so I erred on the side of caution and ensured that the order stays the same. Another nontrivial case is pnv_php, I've found the list and kref logic difficult to understand, however my impression was that it is safe to delete the list element and drop the references until after the kobject is destroyed. Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86 Cc: Rafael J. Wysocki <rjw@rjwysocki.net> Cc: Len Brown <lenb@kernel.org> Cc: Scott Murray <scott@spiteful.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Gavin Shan <gwshan@linux.vnet.ibm.com> Cc: Sebastian Ott <sebott@linux.vnet.ibm.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Corentin Chary <corentin.chary@gmail.com> Cc: Darren Hart <dvhart@infradead.org> Cc: Andy Shevchenko <andy@infradead.org>
2018-07-19 17:27:43 -05:00
static void pnv_php_release(struct pnv_php_slot *php_slot)
{
unsigned long flags;
/* Remove from global or child list */
spin_lock_irqsave(&pnv_php_lock, flags);
list_del(&php_slot->link);
spin_unlock_irqrestore(&pnv_php_lock, flags);
/* Detach from parent */
pnv_php_put_slot(php_slot);
pnv_php_put_slot(php_slot->parent);
}
static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn)
{
struct pnv_php_slot *php_slot;
struct pci_bus *bus;
const char *label;
uint64_t id;
int ret;
ret = of_property_read_string(dn, "ibm,slot-label", &label);
if (ret)
return NULL;
if (pnv_pci_get_slot_id(dn, &id))
return NULL;
bus = pci_find_bus_by_node(dn);
if (!bus)
return NULL;
php_slot = kzalloc(sizeof(*php_slot), GFP_KERNEL);
if (!php_slot)
return NULL;
php_slot->name = kstrdup(label, GFP_KERNEL);
if (!php_slot->name) {
kfree(php_slot);
return NULL;
}
PCI: pnv_php: Clean up allocated IRQs on unplug When the root of a nested PCIe bridge configuration is unplugged, the pnv_php driver leaked the allocated IRQ resources for the child bridges' hotplug event notifications, resulting in a panic. Fix this by walking all child buses and deallocating all its IRQ resources before calling pci_hp_remove_devices(). Also modify the lifetime of the workqueue at struct pnv_php_slot::wq so that it is only destroyed in pnv_php_free_slot(), instead of pnv_php_disable_irq(). This is required since pnv_php_disable_irq() will now be called by workers triggered by hot unplug interrupts, so the workqueue needs to stay allocated. The abridged kernel panic that occurs without this patch is as follows: WARNING: CPU: 0 PID: 687 at kernel/irq/msi.c:292 msi_device_data_release+0x6c/0x9c CPU: 0 UID: 0 PID: 687 Comm: bash Not tainted 6.14.0-rc5+ #2 Call Trace: msi_device_data_release+0x34/0x9c (unreliable) release_nodes+0x64/0x13c devres_release_all+0xc0/0x140 device_del+0x2d4/0x46c pci_destroy_dev+0x5c/0x194 pci_hp_remove_devices+0x90/0x128 pci_hp_remove_devices+0x44/0x128 pnv_php_disable_slot+0x54/0xd4 power_write_file+0xf8/0x18c pci_slot_attr_store+0x40/0x5c sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x3bc/0x50c ksys_write+0x84/0x140 system_call_exception+0x124/0x230 system_call_vectored_common+0x15c/0x2ec Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/2013845045.1359852.1752615367790.JavaMail.zimbra@raptorengineeringinc.com
2025-07-15 16:36:07 -05:00
/* Allocate workqueue for this slot's interrupt handling */
php_slot->wq = alloc_workqueue("pciehp-%s", 0, 0, php_slot->name);
if (!php_slot->wq) {
SLOT_WARN(php_slot, "Cannot alloc workqueue\n");
kfree(php_slot->name);
kfree(php_slot);
return NULL;
}
if (dn->child && PCI_DN(dn->child))
php_slot->slot_no = PCI_SLOT(PCI_DN(dn->child)->devfn);
else
php_slot->slot_no = -1; /* Placeholder slot */
kref_init(&php_slot->kref);
php_slot->state = PNV_PHP_STATE_INITIALIZED;
php_slot->dn = dn;
php_slot->pdev = bus->self;
php_slot->bus = bus;
php_slot->id = id;
php_slot->power_state_check = false;
php_slot->slot.ops = &php_slot_ops;
INIT_LIST_HEAD(&php_slot->children);
INIT_LIST_HEAD(&php_slot->link);
return php_slot;
}
static int pnv_php_register_slot(struct pnv_php_slot *php_slot)
{
struct pnv_php_slot *parent;
struct device_node *dn = php_slot->dn;
unsigned long flags;
int ret;
/* Check if the slot is registered or not */
parent = pnv_php_find_slot(php_slot->dn);
if (parent) {
pnv_php_put_slot(parent);
return -EEXIST;
}
/* Register PCI slot */
ret = pci_hp_register(&php_slot->slot, php_slot->bus,
php_slot->slot_no, php_slot->name);
if (ret) {
SLOT_WARN(php_slot, "Error %d registering slot\n", ret);
return ret;
}
/* Attach to the parent's child list or global list */
while ((dn = of_get_parent(dn))) {
if (!PCI_DN(dn)) {
of_node_put(dn);
break;
}
parent = pnv_php_find_slot(dn);
if (parent) {
of_node_put(dn);
break;
}
of_node_put(dn);
}
spin_lock_irqsave(&pnv_php_lock, flags);
php_slot->parent = parent;
if (parent)
list_add_tail(&php_slot->link, &parent->children);
else
list_add_tail(&php_slot->link, &pnv_php_slot_list);
spin_unlock_irqrestore(&pnv_php_lock, flags);
php_slot->state = PNV_PHP_STATE_REGISTERED;
return 0;
}
static int pnv_php_enable_msix(struct pnv_php_slot *php_slot)
{
struct pci_dev *pdev = php_slot->pdev;
struct msix_entry entry;
int nr_entries, ret;
u16 pcie_flag;
/* Get total number of MSIx entries */
nr_entries = pci_msix_vec_count(pdev);
if (nr_entries < 0)
return nr_entries;
/* Check hotplug MSIx entry is in range */
pcie_capability_read_word(pdev, PCI_EXP_FLAGS, &pcie_flag);
entry.entry = FIELD_GET(PCI_EXP_FLAGS_IRQ, pcie_flag);
if (entry.entry >= nr_entries)
return -ERANGE;
/* Enable MSIx */
ret = pci_enable_msix_exact(pdev, &entry, 1);
if (ret) {
SLOT_WARN(php_slot, "Error %d enabling MSIx\n", ret);
return ret;
}
return entry.vector;
}
static void
pnv_php_detect_clear_suprise_removal_freeze(struct pnv_php_slot *php_slot)
{
struct pci_dev *pdev = php_slot->pdev;
struct eeh_dev *edev;
struct eeh_pe *pe;
int i, rc;
/*
* When a device is surprise removed from a downstream bridge slot,
* the upstream bridge port can still end up frozen due to related EEH
* events, which will in turn block the MSI interrupts for slot hotplug
* detection.
*
* Detect and thaw any frozen upstream PE after slot deactivation.
*/
edev = pci_dev_to_eeh_dev(pdev);
pe = edev ? edev->pe : NULL;
rc = eeh_pe_get_state(pe);
if ((rc == -ENODEV) || (rc == -ENOENT)) {
SLOT_WARN(
php_slot,
"Upstream bridge PE state unknown, hotplug detect may fail\n");
} else {
if (pe->state & EEH_PE_ISOLATED) {
SLOT_WARN(
php_slot,
"Upstream bridge PE %02x frozen, thawing...\n",
pe->addr);
for (i = 0; i < 3; i++)
if (!eeh_unfreeze_pe(pe))
break;
if (i >= 3)
SLOT_WARN(
php_slot,
"Unable to thaw PE %02x, hotplug detect will fail!\n",
pe->addr);
else
SLOT_WARN(php_slot,
"PE %02x thawed successfully\n",
pe->addr);
}
}
}
static void pnv_php_event_handler(struct work_struct *work)
{
struct pnv_php_event *event =
container_of(work, struct pnv_php_event, work);
struct pnv_php_slot *php_slot = event->php_slot;
if (event->added) {
pnv_php_enable_slot(&php_slot->slot);
} else {
pnv_php_disable_slot(&php_slot->slot);
pnv_php_detect_clear_suprise_removal_freeze(php_slot);
}
kfree(event);
}
static irqreturn_t pnv_php_interrupt(int irq, void *data)
{
struct pnv_php_slot *php_slot = data;
struct pci_dev *pchild, *pdev = php_slot->pdev;
struct eeh_dev *edev;
struct eeh_pe *pe;
struct pnv_php_event *event;
u16 sts, lsts;
u8 presence;
bool added;
unsigned long flags;
int ret;
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts);
sts &= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts);
pci_dbg(pdev, "PCI slot [%s]: HP int! DLAct: %d, PresDet: %d\n",
php_slot->name,
!!(sts & PCI_EXP_SLTSTA_DLLSC),
!!(sts & PCI_EXP_SLTSTA_PDC));
if (sts & PCI_EXP_SLTSTA_DLLSC) {
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lsts);
added = !!(lsts & PCI_EXP_LNKSTA_DLLLA);
} else if (!(php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) &&
(sts & PCI_EXP_SLTSTA_PDC)) {
ret = pnv_pci_get_presence_state(php_slot->id, &presence);
if (ret) {
SLOT_WARN(php_slot,
"PCI slot [%s] error %d getting presence (0x%04x), to retry the operation.\n",
php_slot->name, ret, sts);
return IRQ_HANDLED;
}
added = !!(presence == OPAL_PCI_SLOT_PRESENT);
} else {
pci_dbg(pdev, "PCI slot [%s]: Spurious IRQ?\n", php_slot->name);
return IRQ_NONE;
}
/* Freeze the removed PE to avoid unexpected error reporting */
if (!added) {
pchild = list_first_entry_or_null(&php_slot->bus->devices,
struct pci_dev, bus_list);
edev = pchild ? pci_dev_to_eeh_dev(pchild) : NULL;
pe = edev ? edev->pe : NULL;
if (pe) {
eeh_serialize_lock(&flags);
eeh_pe_mark_isolated(pe);
eeh_serialize_unlock(flags);
eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
}
}
/*
* The PE is left in frozen state if the event is missed. It's
* fine as the PCI devices (PE) aren't functional any more.
*/
event = kzalloc(sizeof(*event), GFP_ATOMIC);
if (!event) {
SLOT_WARN(php_slot,
"PCI slot [%s] missed hotplug event 0x%04x\n",
php_slot->name, sts);
return IRQ_HANDLED;
}
pci_info(pdev, "PCI slot [%s] %s (IRQ: %d)\n",
php_slot->name, added ? "added" : "removed", irq);
INIT_WORK(&event->work, pnv_php_event_handler);
event->added = added;
event->php_slot = php_slot;
queue_work(php_slot->wq, &event->work);
return IRQ_HANDLED;
}
static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq)
{
struct pci_dev *pdev = php_slot->pdev;
u32 broken_pdc = 0;
u16 sts, ctrl;
int ret;
/* Check PDC (Presence Detection Change) is broken or not */
ret = of_property_read_u32(php_slot->dn, "ibm,slot-broken-pdc",
&broken_pdc);
if (!ret && broken_pdc)
php_slot->flags |= PNV_PHP_FLAG_BROKEN_PDC;
/* Clear pending interrupts */
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts);
if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC)
sts |= PCI_EXP_SLTSTA_DLLSC;
else
sts |= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts);
/* Request the interrupt */
ret = request_irq(irq, pnv_php_interrupt, IRQF_SHARED,
php_slot->name, php_slot);
if (ret) {
PCI: pnv_php: Clean up allocated IRQs on unplug When the root of a nested PCIe bridge configuration is unplugged, the pnv_php driver leaked the allocated IRQ resources for the child bridges' hotplug event notifications, resulting in a panic. Fix this by walking all child buses and deallocating all its IRQ resources before calling pci_hp_remove_devices(). Also modify the lifetime of the workqueue at struct pnv_php_slot::wq so that it is only destroyed in pnv_php_free_slot(), instead of pnv_php_disable_irq(). This is required since pnv_php_disable_irq() will now be called by workers triggered by hot unplug interrupts, so the workqueue needs to stay allocated. The abridged kernel panic that occurs without this patch is as follows: WARNING: CPU: 0 PID: 687 at kernel/irq/msi.c:292 msi_device_data_release+0x6c/0x9c CPU: 0 UID: 0 PID: 687 Comm: bash Not tainted 6.14.0-rc5+ #2 Call Trace: msi_device_data_release+0x34/0x9c (unreliable) release_nodes+0x64/0x13c devres_release_all+0xc0/0x140 device_del+0x2d4/0x46c pci_destroy_dev+0x5c/0x194 pci_hp_remove_devices+0x90/0x128 pci_hp_remove_devices+0x44/0x128 pnv_php_disable_slot+0x54/0xd4 power_write_file+0xf8/0x18c pci_slot_attr_store+0x40/0x5c sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x3bc/0x50c ksys_write+0x84/0x140 system_call_exception+0x124/0x230 system_call_vectored_common+0x15c/0x2ec Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/2013845045.1359852.1752615367790.JavaMail.zimbra@raptorengineeringinc.com
2025-07-15 16:36:07 -05:00
pnv_php_disable_irq(php_slot, true, true);
SLOT_WARN(php_slot, "Error %d enabling IRQ %d\n", ret, irq);
return;
}
/* Enable the interrupts */
pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl);
if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) {
ctrl &= ~PCI_EXP_SLTCTL_PDCE;
ctrl |= (PCI_EXP_SLTCTL_HPIE |
PCI_EXP_SLTCTL_DLLSCE);
} else {
ctrl |= (PCI_EXP_SLTCTL_HPIE |
PCI_EXP_SLTCTL_PDCE |
PCI_EXP_SLTCTL_DLLSCE);
}
pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl);
/* The interrupt is initialized successfully when @irq is valid */
php_slot->irq = irq;
}
static void pnv_php_enable_irq(struct pnv_php_slot *php_slot)
{
struct pci_dev *pdev = php_slot->pdev;
int irq, ret;
pci/hotplug/pnv-php: Disable surprise hotplug capability on conflicts The root port or PCIe switch downstream port might have been associated with driver other than pnv-php. The MSI or MSIx might also have been enabled by that driver (e.g. pcieport_drv). Attempt to enable MSI incurs below backtrace: PowerPC PowerNV PCI Hotplug Driver version: 0.1 ------------[ cut here ]------------ WARNING: CPU: 19 PID: 1004 at drivers/pci/msi.c:1071 \ __pci_enable_msi_range+0x84/0x4e0 NIP [c000000000665c34] __pci_enable_msi_range+0x84/0x4e0 LR [c000000000665c24] __pci_enable_msi_range+0x74/0x4e0 Call Trace: [c000000384d67600] [c000000000665c24] __pci_enable_msi_range+0x74/0x4e0 [c000000384d676e0] [d00000000aa31b04] pnv_php_register+0x564/0x5a0 [pnv_php] [c000000384d677c0] [d00000000aa31658] pnv_php_register+0xb8/0x5a0 [pnv_php] [c000000384d678a0] [d00000000aa31658] pnv_php_register+0xb8/0x5a0 [pnv_php] [c000000384d67980] [d00000000aa31dfc] pnv_php_init+0x60/0x98 [pnv_php] [c000000384d679f0] [c00000000000cfdc] do_one_initcall+0x6c/0x1d0 [c000000384d67ab0] [c000000000b92354] do_init_module+0x94/0x254 [c000000384d67b40] [c00000000019719c] load_module+0x258c/0x2c60 [c000000384d67d30] [c000000000197bb0] SyS_finit_module+0xf0/0x170 [c000000384d67e30] [c00000000000b184] system_call+0x38/0xe0 This fixes the issue by skipping enabling the surprise hotplug capability if the MSI or MSIx on the PCI slot's upstream port has been enabled by other driver. Cc: <stable@vger.kernel.org> # v4.9+ Fixes: 360aebd85a4c ("drivers/pci/hotplug: Support surprise hotplug in powernv driver") Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> Tested-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2017-02-16 10:22:33 +11:00
/*
* The MSI/MSIx interrupt might have been occupied by other
* drivers. Don't populate the surprise hotplug capability
* in that case.
*/
if (pci_dev_msi_enabled(pdev))
return;
ret = pci_enable_device(pdev);
if (ret) {
SLOT_WARN(php_slot, "Error %d enabling device\n", ret);
return;
}
pci_set_master(pdev);
/* Enable MSIx interrupt */
irq = pnv_php_enable_msix(php_slot);
if (irq > 0) {
pnv_php_init_irq(php_slot, irq);
return;
}
/*
* Use MSI if MSIx doesn't work. Fail back to legacy INTx
* if MSI doesn't work either
*/
ret = pci_enable_msi(pdev);
if (!ret || pdev->irq) {
irq = pdev->irq;
pnv_php_init_irq(php_slot, irq);
}
}
static int pnv_php_register_one(struct device_node *dn)
{
struct pnv_php_slot *php_slot;
u32 prop32;
int ret;
/* Check if it's hotpluggable slot */
ret = of_property_read_u32(dn, "ibm,slot-pluggable", &prop32);
if (ret || !prop32)
return -ENXIO;
ret = of_property_read_u32(dn, "ibm,reset-by-firmware", &prop32);
if (ret || !prop32)
return -ENXIO;
php_slot = pnv_php_alloc_slot(dn);
if (!php_slot)
return -ENODEV;
ret = pnv_php_register_slot(php_slot);
if (ret)
goto free_slot;
ret = pnv_php_enable(php_slot, false);
if (ret)
goto unregister_slot;
/* Enable interrupt if the slot supports surprise hotplug */
ret = of_property_read_u32(dn, "ibm,slot-surprise-pluggable", &prop32);
if (!ret && prop32)
pnv_php_enable_irq(php_slot);
return 0;
unregister_slot:
pnv_php_unregister_one(php_slot->dn);
free_slot:
pnv_php_put_slot(php_slot);
return ret;
}
static void pnv_php_register(struct device_node *dn)
{
struct device_node *child;
/*
* The parent slots should be registered before their
* child slots.
*/
for_each_child_of_node(dn, child) {
pnv_php_register_one(child);
pnv_php_register(child);
}
}
static void pnv_php_unregister_one(struct device_node *dn)
{
struct pnv_php_slot *php_slot;
php_slot = pnv_php_find_slot(dn);
if (!php_slot)
return;
php_slot->state = PNV_PHP_STATE_OFFLINE;
pci_hp_deregister(&php_slot->slot);
PCI: hotplug: Demidlayer registration with the core When a hotplug driver calls pci_hp_register(), all steps necessary for registration are carried out in one go, including creation of a kobject and addition to sysfs. That's a problem for pciehp once it's converted to enable/disable the slot exclusively from the IRQ thread: The thread needs to be spawned after creation of the kobject (because it uses the kobject's name), but before addition to sysfs (because it will handle enable/disable requests submitted via sysfs). pci_hp_deregister() does offer a ->release callback that's invoked after deletion from sysfs and before destruction of the kobject. But because pci_hp_register() doesn't offer a counterpart, hotplug drivers' ->probe and ->remove code becomes asymmetric, which is error prone as recently discovered use-after-free bugs in pciehp's ->remove hook have shown. In a sense, this appears to be a case of the midlayer antipattern: "The core thesis of the "midlayer mistake" is that midlayers are bad and should not exist. That common functionality which it is so tempting to put in a midlayer should instead be provided as library routines which can [be] used, augmented, or ignored by each bottom level driver independently. Thus every subsystem that supports multiple implementations (or drivers) should provide a very thin top layer which calls directly into the bottom layer drivers, and a rich library of support code that eases the implementation of those drivers. This library is available to, but not forced upon, those drivers." -- Neil Brown (2009), https://lwn.net/Articles/336262/ The presence of midlayer traits in the PCI hotplug core might be ascribed to its age: When it was introduced in February 2002, the blessings of a library approach might not have been well known: https://git.kernel.org/tglx/history/c/a8a2069f432c For comparison, the driver core does offer split functions for creating a kobject (device_initialize()) and addition to sysfs (device_add()) as an alternative to carrying out everything at once (device_register()). This was introduced in October 2002: https://git.kernel.org/tglx/history/c/8b290eb19962 The odd ->release callback in the PCI hotplug core was added in 2003: https://git.kernel.org/tglx/history/c/69f8d663b595 Clearly, a library approach would not force every hotplug driver to implement a ->release callback, but rather allow the driver to remove the sysfs files, release its data structures and finally destroy the kobject. Alternatively, a driver may choose to remove everything with pci_hp_deregister(), then release its data structures. To this end, offer drivers pci_hp_initialize() and pci_hp_add() as a split-up version of pci_hp_register(). Likewise, offer pci_hp_del() and pci_hp_destroy() as a split-up version of pci_hp_deregister(). Eliminate the ->release callback and move its code into each driver's teardown routine. Declare pci_hp_deregister() void, in keeping with the usual kernel pattern that enablement can fail, but disablement cannot. It only returned an error if the caller passed in a NULL pointer or a slot which has never or is no longer registered or is sharing its name with another slot. Those would be bugs, so WARN about them. Few hotplug drivers actually checked the return value and those that did only printed a useless error message to dmesg. Remove that. For most drivers the conversion was straightforward since it doesn't matter whether the code in the ->release callback is executed before or after destruction of the kobject. But in the case of ibmphp, it was unclear to me whether setting slot_cur->ctrl and slot_cur->bus_on to NULL needs to happen before the kobject is destroyed, so I erred on the side of caution and ensured that the order stays the same. Another nontrivial case is pnv_php, I've found the list and kref logic difficult to understand, however my impression was that it is safe to delete the list element and drop the references until after the kobject is destroyed. Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86 Cc: Rafael J. Wysocki <rjw@rjwysocki.net> Cc: Len Brown <lenb@kernel.org> Cc: Scott Murray <scott@spiteful.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Gavin Shan <gwshan@linux.vnet.ibm.com> Cc: Sebastian Ott <sebott@linux.vnet.ibm.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Corentin Chary <corentin.chary@gmail.com> Cc: Darren Hart <dvhart@infradead.org> Cc: Andy Shevchenko <andy@infradead.org>
2018-07-19 17:27:43 -05:00
pnv_php_release(php_slot);
pnv_php_put_slot(php_slot);
}
static void pnv_php_unregister(struct device_node *dn)
{
struct device_node *child;
/* The child slots should go before their parent slots */
for_each_child_of_node(dn, child) {
pnv_php_unregister(child);
pnv_php_unregister_one(child);
}
}
static int __init pnv_php_init(void)
{
struct device_node *dn;
pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
for_each_compatible_node(dn, NULL, "ibm,ioda2-phb")
pnv_php_register(dn);
for_each_compatible_node(dn, NULL, "ibm,ioda3-phb")
pnv_php_register(dn);
for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb")
pnv_php_register_one(dn); /* slot directly under the PHB */
return 0;
}
static void __exit pnv_php_exit(void)
{
struct device_node *dn;
for_each_compatible_node(dn, NULL, "ibm,ioda2-phb")
pnv_php_unregister(dn);
for_each_compatible_node(dn, NULL, "ibm,ioda3-phb")
pnv_php_unregister(dn);
for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb")
pnv_php_unregister_one(dn); /* slot directly under the PHB */
}
module_init(pnv_php_init);
module_exit(pnv_php_exit);
MODULE_VERSION(DRIVER_VERSION);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);