2020-08-13 12:04:21 +03:00
|
|
|
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
|
|
|
|
/*
|
|
|
|
* Copyright (c) 2019-2020 The Linux Foundation. All rights reserved.
|
wifi: ath11k: enable 36 bit mask for stream DMA
Currently 32 bit DMA mask is used, telling kernel to get us an DMA
address under 4GB when mapping a buffer. This results in a very high
CPU overhead in the case where IOMMU is disabled and more than 4GB
system memory is installed. The reason is, with more than 4GB memory
installed, kernel is likely to allocate a buffer whose physical
address is above 4GB. While with IOMMU disabled, kernel has to involve
SWIOTLB to map/unmap that buffer, which consumes lots of CPU cycles.
We did hit an issue caused by the reason mentioned above: in a system
that disables IOMMU and gets 8GB memory installed, a total of 40.5%
CPU usage is observed in throughput test. CPU profiling shows nearly
60% of CPU cycles are consumed by SWIOTLB.
By enabling 36 bit DMA mask, we can bypass SWIOTLB for any buffer
whose physical address is below 64GB. There are two types of DMA mask
within struct device, named dma_mask and coherent_dma_mask. Here we
only enable 36 bit for dma_mask, because firmware crashes if
coherent_dma_mask is also enabled, due to some unknown hardware
limitations. This is acceptable because coherent_dma_mask is used for
mapping a consistent DMA buffer, which generally does not happen in
a hot path.
With this change, the total CPU usage mentioned in above issue drops
to 18.9%.
Tested-on: WCN6855 hw2.1 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.23
Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
Signed-off-by: Baochen Qiang <quic_bqiang@quicinc.com>
Acked-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
Link: https://msgid.link/20240123015201.28939-1-quic_bqiang@quicinc.com
2024-01-23 09:52:01 +08:00
|
|
|
* Copyright (c) 2021-2022,2024 Qualcomm Innovation Center, Inc. All rights reserved.
|
2020-08-13 12:04:21 +03:00
|
|
|
*/
|
2020-08-13 12:04:24 +03:00
|
|
|
#ifndef _ATH11K_PCI_H
|
|
|
|
#define _ATH11K_PCI_H
|
|
|
|
|
|
|
|
#include <linux/mhi.h>
|
2020-08-13 12:04:21 +03:00
|
|
|
|
|
|
|
#include "core.h"
|
|
|
|
|
2020-08-17 13:31:55 +03:00
|
|
|
#define PCIE_SOC_GLOBAL_RESET 0x3008
|
|
|
|
#define PCIE_SOC_GLOBAL_RESET_V 1
|
|
|
|
|
|
|
|
#define WLAON_WARM_SW_ENTRY 0x1f80504
|
|
|
|
#define WLAON_SOC_RESET_CAUSE_REG 0x01f8060c
|
|
|
|
|
|
|
|
#define PCIE_Q6_COOKIE_ADDR 0x01f80500
|
|
|
|
#define PCIE_Q6_COOKIE_DATA 0xc0000000
|
|
|
|
|
|
|
|
/* register to wake the UMAC from power collapse */
|
|
|
|
#define PCIE_SCRATCH_0_SOC_PCIE_REG 0x4040
|
|
|
|
|
|
|
|
/* register used for handshake mechanism to validate UMAC is awake */
|
|
|
|
#define PCIE_SOC_WAKE_PCIE_LOCAL_REG 0x3004
|
|
|
|
|
2020-12-10 16:05:21 +02:00
|
|
|
#define PCIE_PCIE_PARF_LTSSM 0x1e081b0
|
|
|
|
#define PARM_LTSSM_VALUE 0x111
|
|
|
|
|
|
|
|
#define GCC_GCC_PCIE_HOT_RST 0x1e402bc
|
|
|
|
#define GCC_GCC_PCIE_HOT_RST_VAL 0x10
|
|
|
|
|
|
|
|
#define PCIE_PCIE_INT_ALL_CLEAR 0x1e08228
|
|
|
|
#define PCIE_SMLH_REQ_RST_LINK_DOWN 0x2
|
|
|
|
#define PCIE_INT_CLEAR_ALL 0xffffffff
|
|
|
|
|
2021-02-16 09:16:22 +02:00
|
|
|
#define PCIE_QSERDES_COM_SYSCLK_EN_SEL_REG(x) \
|
|
|
|
(ab->hw_params.regs->pcie_qserdes_sysclk_en_sel)
|
2020-12-10 16:05:22 +02:00
|
|
|
#define PCIE_QSERDES_COM_SYSCLK_EN_SEL_VAL 0x10
|
|
|
|
#define PCIE_QSERDES_COM_SYSCLK_EN_SEL_MSK 0xffffffff
|
2021-02-16 09:16:22 +02:00
|
|
|
#define PCIE_PCS_OSC_DTCT_CONFIG1_REG(x) \
|
|
|
|
(ab->hw_params.regs->pcie_pcs_osc_dtct_config_base)
|
|
|
|
#define PCIE_PCS_OSC_DTCT_CONFIG1_VAL 0x02
|
|
|
|
#define PCIE_PCS_OSC_DTCT_CONFIG2_REG(x) \
|
|
|
|
(ab->hw_params.regs->pcie_pcs_osc_dtct_config_base + 0x4)
|
|
|
|
#define PCIE_PCS_OSC_DTCT_CONFIG2_VAL 0x52
|
|
|
|
#define PCIE_PCS_OSC_DTCT_CONFIG4_REG(x) \
|
|
|
|
(ab->hw_params.regs->pcie_pcs_osc_dtct_config_base + 0xc)
|
|
|
|
#define PCIE_PCS_OSC_DTCT_CONFIG4_VAL 0xff
|
|
|
|
#define PCIE_PCS_OSC_DTCT_CONFIG_MSK 0x000000ff
|
2020-12-10 16:05:22 +02:00
|
|
|
|
2020-12-10 16:05:23 +02:00
|
|
|
#define WLAON_QFPROM_PWR_CTRL_REG 0x01f8031c
|
|
|
|
#define QFPROM_PWR_CTRL_VDD4BLOW_MASK 0x4
|
|
|
|
|
2020-10-01 12:34:43 +03:00
|
|
|
enum ath11k_pci_flags {
|
2020-12-17 17:22:10 +02:00
|
|
|
ATH11K_PCI_ASPM_RESTORE,
|
2020-10-01 12:34:43 +03:00
|
|
|
};
|
|
|
|
|
2020-08-13 12:04:21 +03:00
|
|
|
struct ath11k_pci {
|
|
|
|
struct pci_dev *pdev;
|
|
|
|
struct ath11k_base *ab;
|
|
|
|
u16 dev_id;
|
2020-08-13 12:04:24 +03:00
|
|
|
char amss_path[100];
|
|
|
|
struct mhi_controller *mhi_ctrl;
|
2022-04-01 20:30:40 +03:00
|
|
|
const struct ath11k_msi_config *msi_config;
|
2024-01-11 15:14:06 +08:00
|
|
|
enum mhi_callback mhi_pre_cb;
|
2020-08-14 10:10:23 +03:00
|
|
|
u32 register_window;
|
|
|
|
|
|
|
|
/* protects register_window above */
|
|
|
|
spinlock_t window_lock;
|
2020-10-01 12:34:43 +03:00
|
|
|
|
|
|
|
/* enum ath11k_pci_flags */
|
|
|
|
unsigned long flags;
|
2020-12-17 17:22:10 +02:00
|
|
|
u16 link_ctl;
|
wifi: ath11k: enable 36 bit mask for stream DMA
Currently 32 bit DMA mask is used, telling kernel to get us an DMA
address under 4GB when mapping a buffer. This results in a very high
CPU overhead in the case where IOMMU is disabled and more than 4GB
system memory is installed. The reason is, with more than 4GB memory
installed, kernel is likely to allocate a buffer whose physical
address is above 4GB. While with IOMMU disabled, kernel has to involve
SWIOTLB to map/unmap that buffer, which consumes lots of CPU cycles.
We did hit an issue caused by the reason mentioned above: in a system
that disables IOMMU and gets 8GB memory installed, a total of 40.5%
CPU usage is observed in throughput test. CPU profiling shows nearly
60% of CPU cycles are consumed by SWIOTLB.
By enabling 36 bit DMA mask, we can bypass SWIOTLB for any buffer
whose physical address is below 64GB. There are two types of DMA mask
within struct device, named dma_mask and coherent_dma_mask. Here we
only enable 36 bit for dma_mask, because firmware crashes if
coherent_dma_mask is also enabled, due to some unknown hardware
limitations. This is acceptable because coherent_dma_mask is used for
mapping a consistent DMA buffer, which generally does not happen in
a hot path.
With this change, the total CPU usage mentioned in above issue drops
to 18.9%.
Tested-on: WCN6855 hw2.1 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.23
Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
Signed-off-by: Baochen Qiang <quic_bqiang@quicinc.com>
Acked-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
Link: https://msgid.link/20240123015201.28939-1-quic_bqiang@quicinc.com
2024-01-23 09:52:01 +08:00
|
|
|
u64 dma_mask;
|
2020-08-13 12:04:21 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
static inline struct ath11k_pci *ath11k_pci_priv(struct ath11k_base *ab)
|
|
|
|
{
|
|
|
|
return (struct ath11k_pci *)ab->drv_priv;
|
|
|
|
}
|
2020-08-13 12:04:24 +03:00
|
|
|
|
2022-04-01 14:53:08 +03:00
|
|
|
int ath11k_pci_get_msi_irq(struct ath11k_base *ab, unsigned int vector);
|
2020-08-13 12:04:24 +03:00
|
|
|
#endif
|